summaryrefslogtreecommitdiffstats
path: root/chrome/browser/history
diff options
context:
space:
mode:
Diffstat (limited to 'chrome/browser/history')
-rw-r--r--chrome/browser/history/archived_database.cc123
-rw-r--r--chrome/browser/history/archived_database.h63
-rw-r--r--chrome/browser/history/download_database.cc218
-rw-r--r--chrome/browser/history/download_database.h74
-rw-r--r--chrome/browser/history/download_types.h104
-rw-r--r--chrome/browser/history/expire_history_backend.cc698
-rw-r--r--chrome/browser/history/expire_history_backend.h290
-rw-r--r--chrome/browser/history/expire_history_backend_unittest.cc812
-rw-r--r--chrome/browser/history/history.cc762
-rw-r--r--chrome/browser/history/history.h852
-rw-r--r--chrome/browser/history/history_backend.cc2164
-rw-r--r--chrome/browser/history/history_backend.h560
-rw-r--r--chrome/browser/history/history_backend_unittest.cc606
-rw-r--r--chrome/browser/history/history_database.cc337
-rw-r--r--chrome/browser/history/history_database.h188
-rw-r--r--chrome/browser/history/history_indexer.idl57
-rw-r--r--chrome/browser/history/history_marshaling.h140
-rw-r--r--chrome/browser/history/history_notifications.h74
-rw-r--r--chrome/browser/history/history_publisher.cc46
-rw-r--r--chrome/browser/history/history_publisher.h84
-rw-r--r--chrome/browser/history/history_publisher_none.cc34
-rw-r--r--chrome/browser/history/history_publisher_win.cc139
-rw-r--r--chrome/browser/history/history_querying_unittest.cc350
-rw-r--r--chrome/browser/history/history_types.cc240
-rw-r--r--chrome/browser/history/history_types.h532
-rw-r--r--chrome/browser/history/history_types_unittest.cc171
-rw-r--r--chrome/browser/history/history_unittest.cc959
-rw-r--r--chrome/browser/history/in_memory_database.cc107
-rw-r--r--chrome/browser/history/in_memory_database.h51
-rw-r--r--chrome/browser/history/in_memory_history_backend.cc135
-rw-r--r--chrome/browser/history/in_memory_history_backend.h89
-rw-r--r--chrome/browser/history/in_memory_url_index.cc13
-rw-r--r--chrome/browser/history/in_memory_url_index.h23
-rw-r--r--chrome/browser/history/in_memory_url_index_unittest.cc22
-rw-r--r--chrome/browser/history/multipart_uitest.cc61
-rw-r--r--chrome/browser/history/page_usage_data.cc35
-rw-r--r--chrome/browser/history/page_usage_data.h134
-rw-r--r--chrome/browser/history/query_parser.cc386
-rw-r--r--chrome/browser/history/query_parser.h107
-rw-r--r--chrome/browser/history/query_parser_unittest.cc163
-rw-r--r--chrome/browser/history/redirect_uitest.cc303
-rw-r--r--chrome/browser/history/snippet.cc285
-rw-r--r--chrome/browser/history/snippet.h69
-rw-r--r--chrome/browser/history/snippet_unittest.cc254
-rw-r--r--chrome/browser/history/starred_url_database.cc628
-rw-r--r--chrome/browser/history/starred_url_database.h185
-rw-r--r--chrome/browser/history/starred_url_database_unittest.cc284
-rw-r--r--chrome/browser/history/text_database.cc378
-rw-r--r--chrome/browser/history/text_database.h162
-rw-r--r--chrome/browser/history/text_database_manager.cc550
-rw-r--r--chrome/browser/history/text_database_manager.h311
-rw-r--r--chrome/browser/history/text_database_manager_unittest.cc537
-rw-r--r--chrome/browser/history/text_database_unittest.cc332
-rw-r--r--chrome/browser/history/thumbnail_database.cc551
-rw-r--r--chrome/browser/history/thumbnail_database.h193
-rw-r--r--chrome/browser/history/thumbnail_database_unittest.cc371
-rw-r--r--chrome/browser/history/top_sites.cc572
-rw-r--r--chrome/browser/history/top_sites.h281
-rw-r--r--chrome/browser/history/top_sites_database.cc329
-rw-r--r--chrome/browser/history/top_sites_database.h136
-rw-r--r--chrome/browser/history/top_sites_unittest.cc950
-rw-r--r--chrome/browser/history/url_database.cc498
-rw-r--r--chrome/browser/history/url_database.h258
-rw-r--r--chrome/browser/history/url_database_unittest.cc179
-rw-r--r--chrome/browser/history/visit_database.cc440
-rw-r--r--chrome/browser/history/visit_database.h168
-rw-r--r--chrome/browser/history/visit_database_unittest.cc230
-rw-r--r--chrome/browser/history/visit_tracker.cc106
-rw-r--r--chrome/browser/history/visit_tracker.h66
-rw-r--r--chrome/browser/history/visit_tracker_unittest.cc138
-rw-r--r--chrome/browser/history/visitsegment_database.cc386
-rw-r--r--chrome/browser/history/visitsegment_database.h88
72 files changed, 22221 insertions, 0 deletions
diff --git a/chrome/browser/history/archived_database.cc b/chrome/browser/history/archived_database.cc
new file mode 100644
index 0000000..1b9e010
--- /dev/null
+++ b/chrome/browser/history/archived_database.cc
@@ -0,0 +1,123 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <algorithm>
+#include <string>
+
+#include "app/sql/statement.h"
+#include "app/sql/transaction.h"
+#include "base/string_util.h"
+#include "chrome/browser/history/archived_database.h"
+
+namespace history {
+
+namespace {
+
+static const int kCurrentVersionNumber = 2;
+static const int kCompatibleVersionNumber = 2;
+
+} // namespace
+
+ArchivedDatabase::ArchivedDatabase() {
+}
+
+ArchivedDatabase::~ArchivedDatabase() {
+}
+
+bool ArchivedDatabase::Init(const FilePath& file_name) {
+ // Set the database page size to something a little larger to give us
+ // better performance (we're typically seek rather than bandwidth limited).
+ // This only has an effect before any tables have been created, otherwise
+ // this is a NOP. Must be a power of 2 and a max of 8192.
+ db_.set_page_size(4096);
+
+ // Don't use very much memory caching this database. We seldom use it for
+ // anything important.
+ db_.set_cache_size(64);
+
+ // Run the database in exclusive mode. Nobody else should be accessing the
+ // database while we're running, and this will give somewhat improved perf.
+ db_.set_exclusive_locking();
+
+ if (!db_.Open(file_name))
+ return false;
+
+ sql::Transaction transaction(&db_);
+ if (!transaction.Begin()) {
+ db_.Close();
+ return false;
+ }
+
+ // Version check.
+ if (!meta_table_.Init(&db_, kCurrentVersionNumber,
+ kCompatibleVersionNumber)) {
+ db_.Close();
+ return false;
+ }
+
+ // Create the tables.
+ if (!CreateURLTable(false) || !InitVisitTable() ||
+ !InitKeywordSearchTermsTable()) {
+ db_.Close();
+ return false;
+ }
+ CreateMainURLIndex();
+
+ if (EnsureCurrentVersion() != sql::INIT_OK) {
+ db_.Close();
+ return false;
+ }
+
+ return transaction.Commit();
+}
+
+void ArchivedDatabase::BeginTransaction() {
+ db_.BeginTransaction();
+}
+
+void ArchivedDatabase::CommitTransaction() {
+ db_.CommitTransaction();
+}
+
+sql::Connection& ArchivedDatabase::GetDB() {
+ return db_;
+}
+
+// Migration -------------------------------------------------------------------
+
+sql::InitStatus ArchivedDatabase::EnsureCurrentVersion() {
+ // We can't read databases newer than we were designed for.
+ if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
+ LOG(WARNING) << "Archived database is too new.";
+ return sql::INIT_TOO_NEW;
+ }
+
+ // NOTICE: If you are changing structures for things shared with the archived
+ // history file like URLs, visits, or downloads, that will need migration as
+ // well. Instead of putting such migration code in this class, it should be
+ // in the corresponding file (url_database.cc, etc.) and called from here and
+ // from the archived_database.cc.
+
+ int cur_version = meta_table_.GetVersionNumber();
+ if (cur_version == 1) {
+ if (!DropStarredIDFromURLs()) {
+ LOG(WARNING) << "Unable to update archived database to version 2.";
+ return sql::INIT_FAILURE;
+ }
+ ++cur_version;
+ meta_table_.SetVersionNumber(cur_version);
+ meta_table_.SetCompatibleVersionNumber(
+ std::min(cur_version, kCompatibleVersionNumber));
+ }
+
+ // Put future migration cases here.
+
+ // When the version is too old, we just try to continue anyway, there should
+ // not be a released product that makes a database too old for us to handle.
+ LOG_IF(WARNING, cur_version < kCurrentVersionNumber) <<
+ "Archived database version " << cur_version << " is too old to handle.";
+
+ return sql::INIT_OK;
+}
+} // namespace history
diff --git a/chrome/browser/history/archived_database.h b/chrome/browser/history/archived_database.h
new file mode 100644
index 0000000..c9d8757
--- /dev/null
+++ b/chrome/browser/history/archived_database.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_ARCHIVED_DATABASE_H_
+#define CHROME_BROWSER_HISTORY_ARCHIVED_DATABASE_H_
+
+#include "app/sql/connection.h"
+#include "app/sql/init_status.h"
+#include "app/sql/meta_table.h"
+#include "base/basictypes.h"
+#include "chrome/browser/history/url_database.h"
+#include "chrome/browser/history/visit_database.h"
+
+class FilePath;
+
+namespace history {
+
+// Encapsulates the database operations for archived history.
+//
+// IMPORTANT NOTE: The IDs in this system for URLs and visits will be
+// different than those in the main database. This is to eliminate the
+// dependency between them so we can deal with each one on its own.
+class ArchivedDatabase : public URLDatabase,
+ public VisitDatabase {
+ public:
+ // Must call Init() before using other members.
+ ArchivedDatabase();
+ virtual ~ArchivedDatabase();
+
+ // Initializes the database connection. This must return true before any other
+ // functions on this class are called.
+ bool Init(const FilePath& file_name);
+
+ // Transactions on the database. We support nested transactions and only
+ // commit when the outermost one is committed (sqlite doesn't support true
+ // nested transactions).
+ void BeginTransaction();
+ void CommitTransaction();
+
+ private:
+ // Implemented for the specialized databases.
+ virtual sql::Connection& GetDB();
+
+ // Makes sure the version is up-to-date, updating if necessary. If the
+ // database is too old to migrate, the user will be notified. In this case, or
+ // for other errors, false will be returned. True means it is up-to-date and
+ // ready for use.
+ //
+ // This assumes it is called from the init function inside a transaction. It
+ // may commit the transaction and start a new one if migration requires it.
+ sql::InitStatus EnsureCurrentVersion();
+
+ // The database.
+ sql::Connection db_;
+ sql::MetaTable meta_table_;
+
+ DISALLOW_COPY_AND_ASSIGN(ArchivedDatabase);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_ARCHIVED_DATABASE_H_
diff --git a/chrome/browser/history/download_database.cc b/chrome/browser/history/download_database.cc
new file mode 100644
index 0000000..aa3dbde
--- /dev/null
+++ b/chrome/browser/history/download_database.cc
@@ -0,0 +1,218 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/download_database.h"
+
+#include <limits>
+#include <vector>
+
+#include "app/sql/connection.h"
+#include "app/sql/statement.h"
+#include "base/file_path.h"
+#include "base/utf_string_conversions.h"
+#include "build/build_config.h"
+#include "chrome/browser/download/download_item.h"
+#include "chrome/browser/history/download_types.h"
+
+// Download schema:
+//
+// id SQLite-generated primary key.
+// full_path Location of the download on disk.
+// url URL of the downloaded file.
+// start_time When the download was started.
+// received_bytes Total size downloaded.
+// total_bytes Total size of the download.
+// state Identifies if this download is completed or not. Not used
+// directly by the history system. See DownloadItem's
+// DownloadState for where this is used.
+
+namespace history {
+
+namespace {
+
+#if defined(OS_POSIX)
+
+// Binds/reads the given file path to the given column of the given statement.
+void BindFilePath(sql::Statement& statement, const FilePath& path, int col) {
+ statement.BindString(col, path.value());
+}
+FilePath ColumnFilePath(sql::Statement& statement, int col) {
+ return FilePath(statement.ColumnString(col));
+}
+
+#else
+
+// See above.
+void BindFilePath(sql::Statement& statement, const FilePath& path, int col) {
+ statement.BindString(col, UTF16ToUTF8(path.value()));
+}
+FilePath ColumnFilePath(sql::Statement& statement, int col) {
+ return FilePath(UTF8ToUTF16(statement.ColumnString(col)));
+}
+
+#endif
+
+} // namespace
+
+DownloadDatabase::DownloadDatabase() {
+}
+
+DownloadDatabase::~DownloadDatabase() {
+}
+
+bool DownloadDatabase::InitDownloadTable() {
+ if (!GetDB().DoesTableExist("downloads")) {
+ if (!GetDB().Execute(
+ "CREATE TABLE downloads ("
+ "id INTEGER PRIMARY KEY,"
+ "full_path LONGVARCHAR NOT NULL,"
+ "url LONGVARCHAR NOT NULL,"
+ "start_time INTEGER NOT NULL,"
+ "received_bytes INTEGER NOT NULL,"
+ "total_bytes INTEGER NOT NULL,"
+ "state INTEGER NOT NULL)"))
+ return false;
+ }
+ return true;
+}
+
+bool DownloadDatabase::DropDownloadTable() {
+ return GetDB().Execute("DROP TABLE downloads");
+}
+
+void DownloadDatabase::QueryDownloads(
+ std::vector<DownloadCreateInfo>* results) {
+ results->clear();
+
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT id, full_path, url, start_time, received_bytes, "
+ "total_bytes, state "
+ "FROM downloads "
+ "ORDER BY start_time"));
+ if (!statement)
+ return;
+
+ while (statement.Step()) {
+ DownloadCreateInfo info;
+ info.db_handle = statement.ColumnInt64(0);
+
+ info.path = ColumnFilePath(statement, 1);
+ info.url = GURL(statement.ColumnString(2));
+ info.start_time = base::Time::FromTimeT(statement.ColumnInt64(3));
+ info.received_bytes = statement.ColumnInt64(4);
+ info.total_bytes = statement.ColumnInt64(5);
+ info.state = statement.ColumnInt(6);
+ results->push_back(info);
+ }
+}
+
+bool DownloadDatabase::UpdateDownload(int64 received_bytes,
+ int32 state,
+ DownloadID db_handle) {
+ DCHECK(db_handle > 0);
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE downloads "
+ "SET received_bytes=?, state=? WHERE id=?"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, received_bytes);
+ statement.BindInt(1, state);
+ statement.BindInt64(2, db_handle);
+ return statement.Run();
+}
+
+bool DownloadDatabase::UpdateDownloadPath(const FilePath& path,
+ DownloadID db_handle) {
+ DCHECK(db_handle > 0);
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE downloads SET full_path=? WHERE id=?"));
+ if (!statement)
+ return false;
+
+ BindFilePath(statement, path, 0);
+ statement.BindInt64(1, db_handle);
+ return statement.Run();
+}
+
+bool DownloadDatabase::CleanUpInProgressEntries() {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE downloads SET state=? WHERE state=?"));
+ if (!statement)
+ return false;
+ statement.BindInt(0, DownloadItem::CANCELLED);
+ statement.BindInt(1, DownloadItem::IN_PROGRESS);
+ return statement.Run();
+}
+
+int64 DownloadDatabase::CreateDownload(const DownloadCreateInfo& info) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "INSERT INTO downloads "
+ "(full_path, url, start_time, received_bytes, total_bytes, state) "
+ "VALUES (?, ?, ?, ?, ?, ?)"));
+ if (!statement)
+ return 0;
+
+ BindFilePath(statement, info.path, 0);
+ statement.BindString(1, info.url.spec());
+ statement.BindInt64(2, info.start_time.ToTimeT());
+ statement.BindInt64(3, info.received_bytes);
+ statement.BindInt64(4, info.total_bytes);
+ statement.BindInt(5, info.state);
+
+ if (statement.Run())
+ return GetDB().GetLastInsertRowId();
+ return 0;
+}
+
+void DownloadDatabase::RemoveDownload(DownloadID db_handle) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM downloads WHERE id=?"));
+ if (!statement)
+ return;
+
+ statement.BindInt64(0, db_handle);
+ statement.Run();
+}
+
+void DownloadDatabase::RemoveDownloadsBetween(base::Time delete_begin,
+ base::Time delete_end) {
+ // This does not use an index. We currently aren't likely to have enough
+ // downloads where an index by time will give us a lot of benefit.
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM downloads WHERE start_time >= ? AND start_time < ? "
+ "AND (State = ? OR State = ?)"));
+ if (!statement)
+ return;
+
+ time_t start_time = delete_begin.ToTimeT();
+ time_t end_time = delete_end.ToTimeT();
+ statement.BindInt64(0, start_time);
+ statement.BindInt64(
+ 1,
+ end_time ? end_time : std::numeric_limits<int64>::max());
+ statement.BindInt(2, DownloadItem::COMPLETE);
+ statement.BindInt(3, DownloadItem::CANCELLED);
+ statement.Run();
+}
+
+void DownloadDatabase::SearchDownloads(std::vector<int64>* results,
+ const string16& search_text) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT id FROM downloads WHERE url LIKE ? "
+ "OR full_path LIKE ? ORDER BY id"));
+ if (!statement)
+ return;
+
+ std::string text("%");
+ text.append(UTF16ToUTF8(search_text));
+ text.push_back('%');
+ statement.BindString(0, text);
+ statement.BindString(1, text);
+
+ while (statement.Step())
+ results->push_back(statement.ColumnInt64(0));
+}
+
+} // namespace history
diff --git a/chrome/browser/history/download_database.h b/chrome/browser/history/download_database.h
new file mode 100644
index 0000000..11adf31
--- /dev/null
+++ b/chrome/browser/history/download_database.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_DOWNLOAD_DATABASE_H_
+#define CHROME_BROWSER_HISTORY_DOWNLOAD_DATABASE_H_
+
+#include "chrome/browser/history/history_types.h"
+
+struct DownloadCreateInfo;
+class FilePath;
+
+namespace sql {
+class Connection;
+}
+
+namespace history {
+
+// Maintains a table of downloads.
+class DownloadDatabase {
+ public:
+ // Must call InitDownloadTable before using any other functions.
+ DownloadDatabase();
+ virtual ~DownloadDatabase();
+
+ // Get all the downloads from the database.
+ void QueryDownloads(std::vector<DownloadCreateInfo>* results);
+
+ // Update the state of one download. Returns true if successful.
+ bool UpdateDownload(int64 received_bytes, int32 state, DownloadID db_handle);
+
+ // Update the path of one download. Returns true if successful.
+ bool UpdateDownloadPath(const FilePath& path, DownloadID db_handle);
+
+ // Fixes state of the download entries. Sometimes entries with IN_PROGRESS
+ // state are not updated during browser shutdown (particularly when crashing).
+ // On the next start such entries are considered canceled. This functions
+ // fixes such entries.
+ bool CleanUpInProgressEntries();
+
+ // Create a new database entry for one download and return its primary db id.
+ int64 CreateDownload(const DownloadCreateInfo& info);
+
+ // Remove a download from the database.
+ void RemoveDownload(DownloadID db_handle);
+
+ // Remove all completed downloads that started after |remove_begin|
+ // (inclusive) and before |remove_end|. You may use null Time values
+ // to do an unbounded delete in either direction. This function ignores
+ // all downloads that are in progress or are waiting to be cancelled.
+ void RemoveDownloadsBetween(base::Time remove_begin, base::Time remove_end);
+
+ // Search for downloads matching the search text.
+ void SearchDownloads(std::vector<int64>* results,
+ const string16& search_text);
+
+ protected:
+ // Returns the database for the functions in this interface.
+ virtual sql::Connection& GetDB() = 0;
+
+ // Creates the downloads table if needed.
+ bool InitDownloadTable();
+
+ // Used to quickly clear the downloads. First you would drop it, then you
+ // would re-initialize it.
+ bool DropDownloadTable();
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DownloadDatabase);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_DOWNLOAD_DATABASE_H_
diff --git a/chrome/browser/history/download_types.h b/chrome/browser/history/download_types.h
new file mode 100644
index 0000000..642ac5b
--- /dev/null
+++ b/chrome/browser/history/download_types.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// Download creation struct used for querying the history service.
+
+#ifndef CHROME_BROWSER_HISTORY_DOWNLOAD_TYPES_H_
+#define CHROME_BROWSER_HISTORY_DOWNLOAD_TYPES_H_
+
+#include <string>
+
+#include "base/basictypes.h"
+#include "base/file_path.h"
+#include "base/time.h"
+#include "chrome/browser/download/download_file.h"
+#include "googleurl/src/gurl.h"
+
+// Used for informing the download database of a new download, where we don't
+// want to pass DownloadItems between threads. The history service also uses a
+// vector of these structs for passing us the state of all downloads at
+// initialization time (see DownloadQueryInfo below).
+struct DownloadCreateInfo {
+ DownloadCreateInfo(const FilePath& path,
+ const GURL& url,
+ base::Time start_time,
+ int64 received_bytes,
+ int64 total_bytes,
+ int32 state,
+ int32 download_id)
+ : path(path),
+ url(url),
+ path_uniquifier(0),
+ start_time(start_time),
+ received_bytes(received_bytes),
+ total_bytes(total_bytes),
+ state(state),
+ download_id(download_id),
+ child_id(-1),
+ render_view_id(-1),
+ request_id(-1),
+ db_handle(0),
+ prompt_user_for_save_location(false),
+ is_dangerous(false),
+ is_extension_install(false) {
+ }
+
+ DownloadCreateInfo()
+ : path_uniquifier(0),
+ received_bytes(0),
+ total_bytes(0),
+ state(-1),
+ download_id(-1),
+ child_id(-1),
+ render_view_id(-1),
+ request_id(-1),
+ db_handle(0),
+ prompt_user_for_save_location(false),
+ is_dangerous(false),
+ is_extension_install(false) {
+ }
+
+ // DownloadItem fields
+ FilePath path;
+ GURL url;
+ GURL referrer_url;
+ FilePath suggested_path;
+ // A number that should be added to the suggested path to make it unique.
+ // 0 means no number should be appended. Not actually stored in the db.
+ int path_uniquifier;
+ base::Time start_time;
+ int64 received_bytes;
+ int64 total_bytes;
+ int32 state;
+ int32 download_id;
+ int child_id;
+ int render_view_id;
+ int request_id;
+ int64 db_handle;
+ std::string content_disposition;
+ std::string mime_type;
+ // The value of the content type header sent with the downloaded item. It
+ // may be different from |mime_type|, which may be set based on heuristics
+ // which may look at the file extension and first few bytes of the file.
+ std::string original_mime_type;
+
+ // True if we should display the 'save as...' UI and prompt the user
+ // for the download location.
+ // False if the UI should be supressed and the download performed to the
+ // default location.
+ bool prompt_user_for_save_location;
+ // Whether this download is potentially dangerous (ex: exe, dll, ...).
+ bool is_dangerous;
+ // The original name for a dangerous download.
+ FilePath original_name;
+ // Whether this download is for extension install or not.
+ bool is_extension_install;
+ // The charset of the referring page where the download request comes from.
+ // It's used to construct a suggested filename.
+ std::string referrer_charset;
+ // The download file save info.
+ DownloadSaveInfo save_info;
+};
+
+#endif // CHROME_BROWSER_HISTORY_DOWNLOAD_TYPES_H_
diff --git a/chrome/browser/history/expire_history_backend.cc b/chrome/browser/history/expire_history_backend.cc
new file mode 100644
index 0000000..bd471ad
--- /dev/null
+++ b/chrome/browser/history/expire_history_backend.cc
@@ -0,0 +1,698 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/expire_history_backend.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "base/compiler_specific.h"
+#include "base/file_util.h"
+#include "base/message_loop.h"
+#include "chrome/browser/bookmarks/bookmark_service.h"
+#include "chrome/browser/history/archived_database.h"
+#include "chrome/browser/history/history_database.h"
+#include "chrome/browser/history/history_notifications.h"
+#include "chrome/browser/history/text_database.h"
+#include "chrome/browser/history/text_database_manager.h"
+#include "chrome/browser/history/thumbnail_database.h"
+#include "chrome/common/notification_type.h"
+
+using base::Time;
+using base::TimeDelta;
+
+namespace history {
+
+namespace {
+
+// The number of days by which the expiration threshold is advanced for items
+// that we want to expire early, such as those of AUTO_SUBFRAME transition type.
+const int kEarlyExpirationAdvanceDays = 30;
+
+// Reads all types of visits starting from beginning of time to the given end
+// time. This is the most general reader.
+class AllVisitsReader : public ExpiringVisitsReader {
+ public:
+ virtual bool Read(Time end_time, HistoryDatabase* db,
+ VisitVector* visits, int max_visits) const {
+ DCHECK(db) << "must have a database to operate upon";
+ DCHECK(visits) << "visit vector has to exist in order to populate it";
+
+ db->GetAllVisitsInRange(Time(), end_time, max_visits, visits);
+ // When we got the maximum number of visits we asked for, we say there could
+ // be additional things to expire now.
+ return static_cast<int>(visits->size()) == max_visits;
+ }
+};
+
+// Reads only AUTO_SUBFRAME visits, within a computed range. The range is
+// computed as follows:
+// * |begin_time| is read from the meta table. This value is updated whenever
+// there are no more additional visits to expire by this reader.
+// * |end_time| is advanced forward by a constant (kEarlyExpirationAdvanceDay),
+// but not past the current time.
+class AutoSubframeVisitsReader : public ExpiringVisitsReader {
+ public:
+ virtual bool Read(Time end_time, HistoryDatabase* db,
+ VisitVector* visits, int max_visits) const {
+ DCHECK(db) << "must have a database to operate upon";
+ DCHECK(visits) << "visit vector has to exist in order to populate it";
+
+ Time begin_time = db->GetEarlyExpirationThreshold();
+ // Advance |end_time| to expire early.
+ Time early_end_time = end_time +
+ TimeDelta::FromDays(kEarlyExpirationAdvanceDays);
+
+ // We don't want to set the early expiration threshold to a time in the
+ // future.
+ Time now = Time::Now();
+ if (early_end_time > now)
+ early_end_time = now;
+
+ db->GetVisitsInRangeForTransition(begin_time, early_end_time,
+ max_visits,
+ PageTransition::AUTO_SUBFRAME,
+ visits);
+ bool more = static_cast<int>(visits->size()) == max_visits;
+ if (!more)
+ db->UpdateEarlyExpirationThreshold(early_end_time);
+
+ return more;
+ }
+};
+
+// Returns true if this visit is worth archiving. Otherwise, this visit is not
+// worth saving (for example, subframe navigations and redirects) and we can
+// just delete it when it gets old.
+bool ShouldArchiveVisit(const VisitRow& visit) {
+ int no_qualifier = PageTransition::StripQualifier(visit.transition);
+
+ // These types of transitions are always "important" and the user will want
+ // to see them.
+ if (no_qualifier == PageTransition::TYPED ||
+ no_qualifier == PageTransition::AUTO_BOOKMARK ||
+ no_qualifier == PageTransition::START_PAGE)
+ return true;
+
+ // Only archive these "less important" transitions when they were the final
+ // navigation and not part of a redirect chain.
+ if ((no_qualifier == PageTransition::LINK ||
+ no_qualifier == PageTransition::FORM_SUBMIT ||
+ no_qualifier == PageTransition::KEYWORD ||
+ no_qualifier == PageTransition::GENERATED) &&
+ visit.transition & PageTransition::CHAIN_END)
+ return true;
+
+ // The transition types we ignore are AUTO_SUBFRAME and MANUAL_SUBFRAME.
+ return false;
+}
+
+// The number of visits we will expire very time we check for old items. This
+// Prevents us from doing too much work any given time.
+const int kNumExpirePerIteration = 10;
+
+// The number of seconds between checking for items that should be expired when
+// we think there might be more items to expire. This timeout is used when the
+// last expiration found at least kNumExpirePerIteration and we want to check
+// again "soon."
+const int kExpirationDelaySec = 30;
+
+// The number of minutes between checking, as with kExpirationDelaySec, but
+// when we didn't find enough things to expire last time. If there was no
+// history to expire last iteration, it's likely there is nothing next
+// iteration, so we want to wait longer before checking to avoid wasting CPU.
+const int kExpirationEmptyDelayMin = 5;
+
+// The number of minutes that we wait for before scheduling a task to
+// delete old history index files.
+const int kIndexExpirationDelayMin = 2;
+
+// The number of the most recent months for which we do not want to delete
+// the history index files.
+const int kStoreHistoryIndexesForMonths = 12;
+
+} // namespace
+
+struct ExpireHistoryBackend::DeleteDependencies {
+ // The time range affected. These can be is_null() to be unbounded in one
+ // or both directions.
+ base::Time begin_time, end_time;
+
+ // ----- Filled by DeleteVisitRelatedInfo or manually if a function doesn't
+ // call that function. -----
+
+ // The unique URL rows affected by this delete.
+ std::map<URLID, URLRow> affected_urls;
+
+ // ----- Filled by DeleteOneURL -----
+
+ // The URLs deleted during this operation.
+ std::vector<URLRow> deleted_urls;
+
+ // The list of all favicon IDs that the affected URLs had. Favicons will be
+ // shared between all URLs with the same favicon, so this is the set of IDs
+ // that we will need to check when the delete operations are complete.
+ std::set<FavIconID> affected_favicons;
+
+ // Tracks the set of databases that have changed so we can optimize when
+ // when we're done.
+ TextDatabaseManager::ChangeSet text_db_changes;
+};
+
+ExpireHistoryBackend::ExpireHistoryBackend(
+ BroadcastNotificationDelegate* delegate,
+ BookmarkService* bookmark_service)
+ : delegate_(delegate),
+ main_db_(NULL),
+ archived_db_(NULL),
+ thumb_db_(NULL),
+ text_db_(NULL),
+ ALLOW_THIS_IN_INITIALIZER_LIST(factory_(this)),
+ bookmark_service_(bookmark_service) {
+}
+
+ExpireHistoryBackend::~ExpireHistoryBackend() {
+}
+
+void ExpireHistoryBackend::SetDatabases(HistoryDatabase* main_db,
+ ArchivedDatabase* archived_db,
+ ThumbnailDatabase* thumb_db,
+ TextDatabaseManager* text_db) {
+ main_db_ = main_db;
+ archived_db_ = archived_db;
+ thumb_db_ = thumb_db;
+ text_db_ = text_db;
+}
+
+void ExpireHistoryBackend::DeleteURL(const GURL& url) {
+ if (!main_db_)
+ return;
+
+ URLRow url_row;
+ if (!main_db_->GetRowForURL(url, &url_row))
+ return; // Nothing to delete.
+
+ // Collect all the visits and delete them. Note that we don't give up if
+ // there are no visits, since the URL could still have an entry that we should
+ // delete.
+ // TODO(brettw): bug 1171148: We should also delete from the archived DB.
+ VisitVector visits;
+ main_db_->GetVisitsForURL(url_row.id(), &visits);
+
+ DeleteDependencies dependencies;
+ DeleteVisitRelatedInfo(visits, &dependencies);
+
+ // We skip ExpireURLsForVisits (since we are deleting from the URL, and not
+ // starting with visits in a given time range). We therefore need to call the
+ // deletion and favicon update functions manually.
+
+ BookmarkService* bookmark_service = GetBookmarkService();
+ bool is_bookmarked =
+ (bookmark_service && bookmark_service->IsBookmarked(url));
+
+ DeleteOneURL(url_row, is_bookmarked, &dependencies);
+ if (!is_bookmarked)
+ DeleteFaviconsIfPossible(dependencies.affected_favicons);
+
+ if (text_db_)
+ text_db_->OptimizeChangedDatabases(dependencies.text_db_changes);
+
+ BroadcastDeleteNotifications(&dependencies);
+}
+
+void ExpireHistoryBackend::ExpireHistoryBetween(
+ const std::set<GURL>& restrict_urls, Time begin_time, Time end_time) {
+ if (!main_db_)
+ return;
+
+ // There may be stuff in the text database manager's temporary cache.
+ if (text_db_)
+ text_db_->DeleteFromUncommitted(restrict_urls, begin_time, end_time);
+
+ // Find the affected visits and delete them.
+ // TODO(brettw): bug 1171164: We should query the archived database here, too.
+ VisitVector visits;
+ main_db_->GetAllVisitsInRange(begin_time, end_time, 0, &visits);
+ if (!restrict_urls.empty()) {
+ std::set<URLID> url_ids;
+ for (std::set<GURL>::const_iterator url = restrict_urls.begin();
+ url != restrict_urls.end(); ++url)
+ url_ids.insert(main_db_->GetRowForURL(*url, NULL));
+ VisitVector all_visits;
+ all_visits.swap(visits);
+ for (VisitVector::iterator visit = all_visits.begin();
+ visit != all_visits.end(); ++visit) {
+ if (url_ids.find(visit->url_id) != url_ids.end())
+ visits.push_back(*visit);
+ }
+ }
+ if (visits.empty())
+ return;
+
+ DeleteDependencies dependencies;
+ DeleteVisitRelatedInfo(visits, &dependencies);
+
+ // Delete or update the URLs affected. We want to update the visit counts
+ // since this is called by the user who wants to delete their recent history,
+ // and we don't want to leave any evidence.
+ ExpireURLsForVisits(visits, &dependencies);
+ DeleteFaviconsIfPossible(dependencies.affected_favicons);
+
+ // An is_null begin time means that all history should be deleted.
+ BroadcastDeleteNotifications(&dependencies);
+
+ // Pick up any bits possibly left over.
+ ParanoidExpireHistory();
+}
+
+void ExpireHistoryBackend::ArchiveHistoryBefore(Time end_time) {
+ if (!main_db_)
+ return;
+
+ // Archive as much history as possible before the given date.
+ ArchiveSomeOldHistory(end_time, GetAllVisitsReader(),
+ std::numeric_limits<size_t>::max());
+ ParanoidExpireHistory();
+}
+
+void ExpireHistoryBackend::InitWorkQueue() {
+ DCHECK(work_queue_.empty()) << "queue has to be empty prior to init";
+
+ for (size_t i = 0; i < readers_.size(); i++)
+ work_queue_.push(readers_[i]);
+}
+
+const ExpiringVisitsReader* ExpireHistoryBackend::GetAllVisitsReader() {
+ if (!all_visits_reader_.get())
+ all_visits_reader_.reset(new AllVisitsReader());
+ return all_visits_reader_.get();
+}
+
+const ExpiringVisitsReader*
+ ExpireHistoryBackend::GetAutoSubframeVisitsReader() {
+ if (!auto_subframe_visits_reader_.get())
+ auto_subframe_visits_reader_.reset(new AutoSubframeVisitsReader());
+ return auto_subframe_visits_reader_.get();
+}
+
+void ExpireHistoryBackend::StartArchivingOldStuff(
+ TimeDelta expiration_threshold) {
+ expiration_threshold_ = expiration_threshold;
+
+ // Remove all readers, just in case this was method was called before.
+ readers_.clear();
+ // For now, we explicitly add all known readers. If we come up with more
+ // reader types (in case we want to expire different types of visits in
+ // different ways), we can make it be populated by creator/owner of
+ // ExpireHistoryBackend.
+ readers_.push_back(GetAllVisitsReader());
+ readers_.push_back(GetAutoSubframeVisitsReader());
+
+ // Initialize the queue with all tasks for the first set of iterations.
+ InitWorkQueue();
+ ScheduleArchive();
+ ScheduleExpireHistoryIndexFiles();
+}
+
+void ExpireHistoryBackend::DeleteFaviconsIfPossible(
+ const std::set<FavIconID>& favicon_set) {
+ if (!main_db_ || !thumb_db_)
+ return;
+
+ for (std::set<FavIconID>::const_iterator i = favicon_set.begin();
+ i != favicon_set.end(); ++i) {
+ if (!main_db_->IsFavIconUsed(*i))
+ thumb_db_->DeleteFavIcon(*i);
+ }
+}
+
+void ExpireHistoryBackend::BroadcastDeleteNotifications(
+ DeleteDependencies* dependencies) {
+ if (!dependencies->deleted_urls.empty()) {
+ // Broadcast the URL deleted notification. Note that we also broadcast when
+ // we were requested to delete everything even if that was a NOP, since
+ // some components care to know when history is deleted (it's up to them to
+ // determine if they care whether anything was deleted).
+ URLsDeletedDetails* deleted_details = new URLsDeletedDetails;
+ deleted_details->all_history = false;
+ std::vector<URLRow> typed_urls_changed; // Collect this for later.
+ for (size_t i = 0; i < dependencies->deleted_urls.size(); i++) {
+ deleted_details->urls.insert(dependencies->deleted_urls[i].url());
+ if (dependencies->deleted_urls[i].typed_count() > 0)
+ typed_urls_changed.push_back(dependencies->deleted_urls[i]);
+ }
+ delegate_->BroadcastNotifications(NotificationType::HISTORY_URLS_DELETED,
+ deleted_details);
+
+ // Broadcast the typed URL changed modification (this updates the inline
+ // autocomplete database).
+ //
+ // Note: if we ever need to broadcast changes to more than just typed URLs,
+ // this notification should be changed rather than a new "non-typed"
+ // notification added. The in-memory database can always do the filtering
+ // itself in that case.
+ if (!typed_urls_changed.empty()) {
+ URLsModifiedDetails* modified_details = new URLsModifiedDetails;
+ modified_details->changed_urls.swap(typed_urls_changed);
+ delegate_->BroadcastNotifications(
+ NotificationType::HISTORY_TYPED_URLS_MODIFIED,
+ modified_details);
+ }
+ }
+}
+
+void ExpireHistoryBackend::DeleteVisitRelatedInfo(
+ const VisitVector& visits,
+ DeleteDependencies* dependencies) {
+ for (size_t i = 0; i < visits.size(); i++) {
+ // Delete the visit itself.
+ main_db_->DeleteVisit(visits[i]);
+
+ // Add the URL row to the affected URL list.
+ std::map<URLID, URLRow>::const_iterator found =
+ dependencies->affected_urls.find(visits[i].url_id);
+ const URLRow* cur_row = NULL;
+ if (found == dependencies->affected_urls.end()) {
+ URLRow row;
+ if (!main_db_->GetURLRow(visits[i].url_id, &row))
+ continue;
+ dependencies->affected_urls[visits[i].url_id] = row;
+ cur_row = &dependencies->affected_urls[visits[i].url_id];
+ } else {
+ cur_row = &found->second;
+ }
+
+ // Delete any associated full-text indexed data.
+ if (visits[i].is_indexed && text_db_) {
+ text_db_->DeletePageData(visits[i].visit_time, cur_row->url(),
+ &dependencies->text_db_changes);
+ }
+ }
+}
+
+void ExpireHistoryBackend::DeleteOneURL(
+ const URLRow& url_row,
+ bool is_bookmarked,
+ DeleteDependencies* dependencies) {
+ main_db_->DeleteSegmentForURL(url_row.id());
+
+ // The URL may be in the text database manager's temporary cache.
+ if (text_db_) {
+ std::set<GURL> restrict_urls;
+ restrict_urls.insert(url_row.url());
+ text_db_->DeleteFromUncommitted(restrict_urls, base::Time(), base::Time());
+ }
+
+ if (!is_bookmarked) {
+ dependencies->deleted_urls.push_back(url_row);
+
+ // Delete stuff that references this URL.
+ if (thumb_db_)
+ thumb_db_->DeleteThumbnail(url_row.id());
+
+ // Collect shared information.
+ if (url_row.favicon_id())
+ dependencies->affected_favicons.insert(url_row.favicon_id());
+
+ // Last, delete the URL entry.
+ main_db_->DeleteURLRow(url_row.id());
+ }
+}
+
+URLID ExpireHistoryBackend::ArchiveOneURL(const URLRow& url_row) {
+ if (!archived_db_)
+ return 0;
+
+ // See if this URL is present in the archived database already. Note that
+ // we must look up by ID since the URL ID will be different.
+ URLRow archived_row;
+ if (archived_db_->GetRowForURL(url_row.url(), &archived_row)) {
+ // TODO(sky): bug 1168470, need to archive past search terms.
+ // FIXME(brettw) should be copy the visit counts over? This will mean that
+ // the main DB's visit counts are only for the last 3 months rather than
+ // accumulative.
+ archived_row.set_last_visit(url_row.last_visit());
+ archived_db_->UpdateURLRow(archived_row.id(), archived_row);
+ return archived_row.id();
+ }
+
+ // This row is not in the archived DB, add it.
+ return archived_db_->AddURL(url_row);
+}
+
+namespace {
+
+struct ChangedURL {
+ ChangedURL() : visit_count(0), typed_count(0) {}
+ int visit_count;
+ int typed_count;
+};
+
+} // namespace
+
+void ExpireHistoryBackend::ExpireURLsForVisits(
+ const VisitVector& visits,
+ DeleteDependencies* dependencies) {
+ // First find all unique URLs and the number of visits we're deleting for
+ // each one.
+ std::map<URLID, ChangedURL> changed_urls;
+ for (size_t i = 0; i < visits.size(); i++) {
+ ChangedURL& cur = changed_urls[visits[i].url_id];
+ cur.visit_count++;
+ // NOTE: This code must stay in sync with HistoryBackend::AddPageVisit().
+ // TODO(pkasting): http://b/1148304 We shouldn't be marking so many URLs as
+ // typed, which would eliminate the need for this code.
+ PageTransition::Type transition =
+ PageTransition::StripQualifier(visits[i].transition);
+ if ((transition == PageTransition::TYPED &&
+ !PageTransition::IsRedirect(visits[i].transition)) ||
+ transition == PageTransition::KEYWORD_GENERATED)
+ cur.typed_count++;
+ }
+
+ // Check each unique URL with deleted visits.
+ BookmarkService* bookmark_service = GetBookmarkService();
+ for (std::map<URLID, ChangedURL>::const_iterator i = changed_urls.begin();
+ i != changed_urls.end(); ++i) {
+ // The unique URL rows should already be filled into the dependencies.
+ URLRow& url_row = dependencies->affected_urls[i->first];
+ if (!url_row.id())
+ continue; // URL row doesn't exist in the database.
+
+ // Check if there are any other visits for this URL and update the time
+ // (the time change may not actually be synced to disk below when we're
+ // archiving).
+ VisitRow last_visit;
+ if (main_db_->GetMostRecentVisitForURL(url_row.id(), &last_visit))
+ url_row.set_last_visit(last_visit.visit_time);
+ else
+ url_row.set_last_visit(Time());
+
+ // Don't delete URLs with visits still in the DB, or bookmarked.
+ bool is_bookmarked =
+ (bookmark_service && bookmark_service->IsBookmarked(url_row.url()));
+ if (!is_bookmarked && url_row.last_visit().is_null()) {
+ // Not bookmarked and no more visits. Nuke the url.
+ DeleteOneURL(url_row, is_bookmarked, dependencies);
+ } else {
+ // NOTE: The calls to std::max() below are a backstop, but they should
+ // never actually be needed unless the database is corrupt (I think).
+ url_row.set_visit_count(
+ std::max(0, url_row.visit_count() - i->second.visit_count));
+ url_row.set_typed_count(
+ std::max(0, url_row.typed_count() - i->second.typed_count));
+
+ // Update the db with the new details.
+ main_db_->UpdateURLRow(url_row.id(), url_row);
+ }
+ }
+}
+
+void ExpireHistoryBackend::ArchiveURLsAndVisits(
+ const VisitVector& visits,
+ DeleteDependencies* dependencies) {
+ if (!archived_db_)
+ return;
+
+ // Make sure all unique URL rows are added to the dependency list and the
+ // archived database. We will also keep the mapping between the main DB URLID
+ // and the archived one.
+ std::map<URLID, URLID> main_id_to_archived_id;
+ for (size_t i = 0; i < visits.size(); i++) {
+ std::map<URLID, URLRow>::const_iterator found =
+ dependencies->affected_urls.find(visits[i].url_id);
+ if (found == dependencies->affected_urls.end()) {
+ // Unique URL encountered, archive it.
+ URLRow row; // Row in the main DB.
+ URLID archived_id; // ID in the archived DB.
+ if (!main_db_->GetURLRow(visits[i].url_id, &row) ||
+ !(archived_id = ArchiveOneURL(row))) {
+ // Failure archiving, skip this one.
+ continue;
+ }
+
+ // Only add URL to the dependency list once we know we successfully
+ // archived it.
+ main_id_to_archived_id[row.id()] = archived_id;
+ dependencies->affected_urls[row.id()] = row;
+ }
+ }
+
+ // Now archive the visits since we know the URL ID to make them reference.
+ // The source visit list should still reference the visits in the main DB, but
+ // we will update it to reflect only the visits that were successfully
+ // archived.
+ for (size_t i = 0; i < visits.size(); i++) {
+ // Construct the visit that we will add to the archived database. We do
+ // not store referring visits since we delete many of the visits when
+ // archiving.
+ VisitRow cur_visit(visits[i]);
+ cur_visit.url_id = main_id_to_archived_id[cur_visit.url_id];
+ cur_visit.referring_visit = 0;
+ archived_db_->AddVisit(&cur_visit);
+ // Ignore failures, we will delete it from the main DB no matter what.
+ }
+}
+
+void ExpireHistoryBackend::ScheduleArchive() {
+ TimeDelta delay;
+ if (work_queue_.empty()) {
+ // If work queue is empty, reset the work queue to contain all tasks and
+ // schedule next iteration after a longer delay.
+ InitWorkQueue();
+ delay = TimeDelta::FromMinutes(kExpirationEmptyDelayMin);
+ } else {
+ delay = TimeDelta::FromSeconds(kExpirationDelaySec);
+ }
+
+ MessageLoop::current()->PostDelayedTask(FROM_HERE, factory_.NewRunnableMethod(
+ &ExpireHistoryBackend::DoArchiveIteration), delay.InMilliseconds());
+}
+
+void ExpireHistoryBackend::DoArchiveIteration() {
+ DCHECK(!work_queue_.empty()) << "queue has to be non-empty";
+
+ const ExpiringVisitsReader* reader = work_queue_.front();
+ bool more_to_expire = ArchiveSomeOldHistory(GetCurrentArchiveTime(), reader,
+ kNumExpirePerIteration);
+
+ work_queue_.pop();
+ // If there are more items to expire, add the reader back to the queue, thus
+ // creating a new task for future iterations.
+ if (more_to_expire)
+ work_queue_.push(reader);
+
+ ScheduleArchive();
+}
+
+bool ExpireHistoryBackend::ArchiveSomeOldHistory(
+ base::Time end_time,
+ const ExpiringVisitsReader* reader,
+ int max_visits) {
+ if (!main_db_)
+ return false;
+
+ // Add an extra time unit to given end time, because
+ // GetAllVisitsInRange, et al. queries' end value is non-inclusive.
+ Time effective_end_time =
+ Time::FromInternalValue(end_time.ToInternalValue() + 1);
+
+ VisitVector affected_visits;
+ bool more_to_expire = reader->Read(effective_end_time, main_db_,
+ &affected_visits, max_visits);
+
+ // Some visits we'll delete while others we'll archive.
+ VisitVector deleted_visits, archived_visits;
+ for (size_t i = 0; i < affected_visits.size(); i++) {
+ if (ShouldArchiveVisit(affected_visits[i]))
+ archived_visits.push_back(affected_visits[i]);
+ else
+ deleted_visits.push_back(affected_visits[i]);
+ }
+
+ // Do the actual archiving.
+ DeleteDependencies archived_dependencies;
+ ArchiveURLsAndVisits(archived_visits, &archived_dependencies);
+ DeleteVisitRelatedInfo(archived_visits, &archived_dependencies);
+
+ DeleteDependencies deleted_dependencies;
+ DeleteVisitRelatedInfo(deleted_visits, &deleted_dependencies);
+
+ // This will remove or archive all the affected URLs. Must do the deleting
+ // cleanup before archiving so the delete dependencies structure references
+ // only those URLs that were actually deleted instead of having some visits
+ // archived and then the rest deleted.
+ ExpireURLsForVisits(deleted_visits, &deleted_dependencies);
+ ExpireURLsForVisits(archived_visits, &archived_dependencies);
+
+ // Create a union of all affected favicons (we don't store favicons for
+ // archived URLs) and delete them.
+ std::set<FavIconID> affected_favicons(
+ archived_dependencies.affected_favicons);
+ for (std::set<FavIconID>::const_iterator i =
+ deleted_dependencies.affected_favicons.begin();
+ i != deleted_dependencies.affected_favicons.end(); ++i) {
+ affected_favicons.insert(*i);
+ }
+ DeleteFaviconsIfPossible(affected_favicons);
+
+ // Send notifications for the stuff that was deleted. These won't normally be
+ // in history views since they were subframes, but they will be in the visited
+ // link system, which needs to be updated now. This function is smart enough
+ // to not do anything if nothing was deleted.
+ BroadcastDeleteNotifications(&deleted_dependencies);
+
+ return more_to_expire;
+}
+
+void ExpireHistoryBackend::ParanoidExpireHistory() {
+ // FIXME(brettw): Bug 1067331: write this to clean up any errors.
+}
+
+void ExpireHistoryBackend::ScheduleExpireHistoryIndexFiles() {
+ if (!text_db_) {
+ // Can't expire old history index files because we
+ // don't know where they're located.
+ return;
+ }
+
+ TimeDelta delay = TimeDelta::FromMinutes(kIndexExpirationDelayMin);
+ MessageLoop::current()->PostDelayedTask(
+ FROM_HERE, factory_.NewRunnableMethod(
+ &ExpireHistoryBackend::DoExpireHistoryIndexFiles),
+ delay.InMilliseconds());
+}
+
+void ExpireHistoryBackend::DoExpireHistoryIndexFiles() {
+ Time::Exploded exploded;
+ Time::Now().LocalExplode(&exploded);
+ int cutoff_month =
+ exploded.year * 12 + exploded.month - kStoreHistoryIndexesForMonths;
+ TextDatabase::DBIdent cutoff_id =
+ (cutoff_month / 12) * 100 + (cutoff_month % 12);
+
+ FilePath::StringType history_index_files_pattern = TextDatabase::file_base();
+ history_index_files_pattern.append(FILE_PATH_LITERAL("*"));
+ file_util::FileEnumerator file_enumerator(
+ text_db_->GetDir(), false, file_util::FileEnumerator::FILES,
+ history_index_files_pattern);
+ for (FilePath file = file_enumerator.Next(); !file.empty();
+ file = file_enumerator.Next()) {
+ TextDatabase::DBIdent file_id = TextDatabase::FileNameToID(file);
+ if (file_id < cutoff_id)
+ file_util::Delete(file, false);
+ }
+}
+
+BookmarkService* ExpireHistoryBackend::GetBookmarkService() {
+ // We use the bookmark service to determine if a URL is bookmarked. The
+ // bookmark service is loaded on a separate thread and may not be done by the
+ // time we get here. We therefor block until the bookmarks have finished
+ // loading.
+ if (bookmark_service_)
+ bookmark_service_->BlockTillLoaded();
+ return bookmark_service_;
+}
+
+} // namespace history
diff --git a/chrome/browser/history/expire_history_backend.h b/chrome/browser/history/expire_history_backend.h
new file mode 100644
index 0000000..9f060ed
--- /dev/null
+++ b/chrome/browser/history/expire_history_backend.h
@@ -0,0 +1,290 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_EXPIRE_HISTORY_BACKEND_H_
+#define CHROME_BROWSER_HISTORY_EXPIRE_HISTORY_BACKEND_H_
+
+#include <queue>
+#include <set>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/gtest_prod_util.h"
+#include "base/task.h"
+#include "base/time.h"
+#include "base/scoped_ptr.h"
+#include "chrome/browser/history/history_types.h"
+
+class BookmarkService;
+class GURL;
+class NotificationType;
+class TestingProfile;
+
+namespace history {
+
+class ArchivedDatabase;
+class HistoryDatabase;
+struct HistoryDetails;
+class TextDatabaseManager;
+class ThumbnailDatabase;
+
+// Delegate used to broadcast notifications to the main thread.
+class BroadcastNotificationDelegate {
+ public:
+ // Schedules a broadcast of the given notification on the application main
+ // thread. The details argument will have ownership taken by this function.
+ virtual void BroadcastNotifications(NotificationType type,
+ HistoryDetails* details_deleted) = 0;
+
+ protected:
+ virtual ~BroadcastNotificationDelegate() {}
+};
+
+// Encapsulates visit expiration criteria and type of visits to expire.
+class ExpiringVisitsReader {
+ public:
+ virtual ~ExpiringVisitsReader() {}
+ // Populates |visits| from |db|, using provided |end_time| and |max_visits|
+ // cap.
+ virtual bool Read(base::Time end_time, HistoryDatabase* db,
+ VisitVector* visits, int max_visits) const = 0;
+};
+
+typedef std::vector<const ExpiringVisitsReader*> ExpiringVisitsReaders;
+
+// Helper component to HistoryBackend that manages expiration and deleting of
+// history, as well as moving data from the main database to the archived
+// database as it gets old.
+//
+// It will automatically start periodically archiving old history once you call
+// StartArchivingOldStuff().
+class ExpireHistoryBackend {
+ public:
+ // The delegate pointer must be non-NULL. We will NOT take ownership of it.
+ // BookmarkService may be NULL. The BookmarkService is used when expiring
+ // URLs so that we don't remove any URLs or favicons that are bookmarked
+ // (visits are removed though).
+ ExpireHistoryBackend(BroadcastNotificationDelegate* delegate,
+ BookmarkService* bookmark_service);
+ ~ExpireHistoryBackend();
+
+ // Completes initialization by setting the databases that this class will use.
+ void SetDatabases(HistoryDatabase* main_db,
+ ArchivedDatabase* archived_db,
+ ThumbnailDatabase* thumb_db,
+ TextDatabaseManager* text_db);
+
+ // Begins periodic expiration of history older than the given threshold. This
+ // will continue until the object is deleted.
+ void StartArchivingOldStuff(base::TimeDelta expiration_threshold);
+
+ // Deletes everything associated with a URL.
+ void DeleteURL(const GURL& url);
+
+ // Removes all visits to restrict_urls (or all URLs if empty) in the given
+ // time range, updating the URLs accordingly,
+ void ExpireHistoryBetween(const std::set<GURL>& restrict_urls,
+ base::Time begin_time, base::Time end_time);
+
+ // Archives all visits before and including the given time, updating the URLs
+ // accordingly. This function is intended for migrating old databases
+ // (which encompased all time) to the tiered structure and testing, and
+ // probably isn't useful for anything else.
+ void ArchiveHistoryBefore(base::Time end_time);
+
+ // Returns the current time that we are archiving stuff to. This will return
+ // the threshold in absolute time rather than a delta, so the caller should
+ // not save it.
+ base::Time GetCurrentArchiveTime() const {
+ return base::Time::Now() - expiration_threshold_;
+ }
+
+ private:
+ FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, DeleteTextIndexForURL);
+ FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, DeleteFaviconsIfPossible);
+ FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, ArchiveSomeOldHistory);
+ FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, ExpiringVisitsReader);
+ friend class ::TestingProfile;
+
+ struct DeleteDependencies;
+
+ // Removes the data from the full text index associated with the given URL
+ // string/ID pair. If |update_visits| is set, the visits that reference the
+ // indexed data will be updated to reflect the fact that the indexed data is
+ // gone. Setting this to false is a performance optimization when the caller
+ // knows that the visits will be deleted after the call.
+ //
+ // TODO(brettw) when we have an "archived" history database, this should take
+ // a flag to optionally delete from there. This way it can be used for page
+ // re-indexing as well as for full URL deletion.
+ void DeleteTextIndexForURL(const GURL& url, URLID url_id, bool update_visits);
+
+ // Deletes the visit-related stuff for all the visits in the given list, and
+ // adds the rows for unique URLs affected to the affected_urls list in
+ // the dependencies structure.
+ //
+ // Deleted information is the visits themselves and the full-text index
+ // entries corresponding to them.
+ void DeleteVisitRelatedInfo(const VisitVector& visits,
+ DeleteDependencies* dependencies);
+
+ // Moves the given visits from the main database to the archived one.
+ void ArchiveVisits(const VisitVector& visits);
+
+ // Finds or deletes dependency information for the given URL. Information that
+ // is specific to this URL (URL row, thumbnails, full text indexed stuff,
+ // etc.) is deleted.
+ //
+ // This does not affect the visits! This is used for expiration as well as
+ // deleting from the UI, and they handle visits differently.
+ //
+ // Other information will be collected and returned in the output containers.
+ // This includes some of the things deleted that are needed elsewhere, plus
+ // some things like favicons that could be shared by many URLs, and need to
+ // be checked for deletion (this allows us to delete many URLs with only one
+ // check for shared information at the end).
+ //
+ // Assumes the main_db_ is non-NULL.
+ //
+ // NOTE: If the url is bookmarked only the segments and text db are updated,
+ // everything else is unchanged. This is done so that bookmarks retain their
+ // favicons and thumbnails.
+ void DeleteOneURL(const URLRow& url_row,
+ bool is_bookmarked,
+ DeleteDependencies* dependencies);
+
+ // Adds or merges the given URL row with the archived database, returning the
+ // ID of the URL in the archived database, or 0 on failure. The main (source)
+ // database will not be affected (the URL will have to be deleted later).
+ //
+ // Assumes the archived database is not NULL.
+ URLID ArchiveOneURL(const URLRow& url_row);
+
+ // Deletes all the URLs in the given vector and handles their dependencies.
+ // This will delete starred URLs
+ void DeleteURLs(const std::vector<URLRow>& urls,
+ DeleteDependencies* dependencies);
+
+ // Expiration involves removing visits, then propogating the visits out from
+ // there and delete any orphaned URLs. These will be added to the deleted URLs
+ // field of the dependencies and DeleteOneURL will handle deleting out from
+ // there. This function does not handle favicons.
+ //
+ // When a URL is not deleted and |archive| is not set, the last visit time and
+ // the visit and typed counts will be updated (we want to clear these when a
+ // user is deleting history manually, but not when we're normally expiring old
+ // things from history).
+ //
+ // The visits in the given vector should have already been deleted from the
+ // database, and the list of affected URLs already be filled into
+ // |depenencies->affected_urls|.
+ //
+ // Starred URLs will not be deleted. The information in the dependencies that
+ // DeleteOneURL fills in will be updated, and this function will also delete
+ // any now-unused favicons.
+ void ExpireURLsForVisits(const VisitVector& visits,
+ DeleteDependencies* dependencies);
+
+ // Creates entries in the archived database for the unique URLs referenced
+ // by the given visits. It will then add versions of the visits to that
+ // database. The source database WILL NOT BE MODIFIED. The source URLs and
+ // visits will have to be deleted in another pass.
+ //
+ // The affected URLs will be filled into the given dependencies structure.
+ void ArchiveURLsAndVisits(const VisitVector& visits,
+ DeleteDependencies* dependencies);
+
+ // Deletes the favicons listed in the set if unused. Fails silently (we don't
+ // care about favicons so much, so don't want to stop everything if it fails).
+ void DeleteFaviconsIfPossible(const std::set<FavIconID>& favicon_id);
+
+ // Broadcast the URL deleted notification.
+ void BroadcastDeleteNotifications(DeleteDependencies* dependencies);
+
+ // Schedules a call to DoArchiveIteration.
+ void ScheduleArchive();
+
+ // Calls ArchiveSomeOldHistory to expire some amount of old history, according
+ // to the items in work queue, and schedules another call to happen in the
+ // future.
+ void DoArchiveIteration();
+
+ // Tries to expire the oldest |max_visits| visits from history that are older
+ // than |time_threshold|. The return value indicates if we think there might
+ // be more history to expire with the current time threshold (it does not
+ // indicate success or failure).
+ bool ArchiveSomeOldHistory(base::Time end_time,
+ const ExpiringVisitsReader* reader,
+ int max_visits);
+
+ // Tries to detect possible bad history or inconsistencies in the database
+ // and deletes items. For example, URLs with no visits.
+ void ParanoidExpireHistory();
+
+ // Schedules a call to DoExpireHistoryIndexFiles.
+ void ScheduleExpireHistoryIndexFiles();
+
+ // Deletes old history index files.
+ void DoExpireHistoryIndexFiles();
+
+ // Returns the BookmarkService, blocking until it is loaded. This may return
+ // NULL.
+ BookmarkService* GetBookmarkService();
+
+ // Initializes periodic expiration work queue by populating it with with tasks
+ // for all known readers.
+ void InitWorkQueue();
+
+ // Returns the reader for all visits. This method is only used by the unit
+ // tests.
+ const ExpiringVisitsReader* GetAllVisitsReader();
+
+ // Returns the reader for AUTO_SUBFRAME visits. This method is only used by
+ // the unit tests.
+ const ExpiringVisitsReader* GetAutoSubframeVisitsReader();
+
+ // Non-owning pointer to the notification delegate (guaranteed non-NULL).
+ BroadcastNotificationDelegate* delegate_;
+
+ // Non-owning pointers to the databases we deal with (MAY BE NULL).
+ HistoryDatabase* main_db_; // Main history database.
+ ArchivedDatabase* archived_db_; // Old history.
+ ThumbnailDatabase* thumb_db_; // Thumbnails and favicons.
+ TextDatabaseManager* text_db_; // Full text index.
+
+ // Used to generate runnable methods to do timers on this class. They will be
+ // automatically canceled when this class is deleted.
+ ScopedRunnableMethodFactory<ExpireHistoryBackend> factory_;
+
+ // The threshold for "old" history where we will automatically expire it to
+ // the archived database.
+ base::TimeDelta expiration_threshold_;
+
+ // List of all distinct types of readers. This list is used to populate the
+ // work queue.
+ ExpiringVisitsReaders readers_;
+
+ // Work queue for periodic expiration tasks, used by DoArchiveIteration() to
+ // determine what to do at an iteration, as well as populate it for future
+ // iterations.
+ std::queue<const ExpiringVisitsReader*> work_queue_;
+
+ // Readers for various types of visits.
+ // TODO(dglazkov): If you are adding another one, please consider reorganizing
+ // into a map.
+ scoped_ptr<ExpiringVisitsReader> all_visits_reader_;
+ scoped_ptr<ExpiringVisitsReader> auto_subframe_visits_reader_;
+
+ // The BookmarkService; may be null. This is owned by the Profile.
+ //
+ // Use GetBookmarkService to access this, which makes sure the service is
+ // loaded.
+ BookmarkService* bookmark_service_;
+
+ DISALLOW_COPY_AND_ASSIGN(ExpireHistoryBackend);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_EXPIRE_HISTORY_BACKEND_H_
diff --git a/chrome/browser/history/expire_history_backend_unittest.cc b/chrome/browser/history/expire_history_backend_unittest.cc
new file mode 100644
index 0000000..ca822bc
--- /dev/null
+++ b/chrome/browser/history/expire_history_backend_unittest.cc
@@ -0,0 +1,812 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/basictypes.h"
+#include "base/compiler_specific.h"
+#include "base/file_path.h"
+#include "base/file_util.h"
+#include "base/path_service.h"
+#include "base/scoped_ptr.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/bookmarks/bookmark_model.h"
+#include "chrome/browser/history/archived_database.h"
+#include "chrome/browser/history/expire_history_backend.h"
+#include "chrome/browser/history/history_database.h"
+#include "chrome/browser/history/history_notifications.h"
+#include "chrome/browser/history/text_database_manager.h"
+#include "chrome/browser/history/thumbnail_database.h"
+#include "chrome/common/notification_service.h"
+#include "chrome/common/thumbnail_score.h"
+#include "chrome/tools/profiles/thumbnail-inl.h"
+#include "gfx/codec/jpeg_codec.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/skia/include/core/SkBitmap.h"
+
+using base::Time;
+using base::TimeDelta;
+using base::TimeTicks;
+
+// Filename constants.
+static const FilePath::CharType kTestDir[] = FILE_PATH_LITERAL("ExpireTest");
+static const FilePath::CharType kHistoryFile[] = FILE_PATH_LITERAL("History");
+static const FilePath::CharType kArchivedHistoryFile[] =
+ FILE_PATH_LITERAL("Archived History");
+static const FilePath::CharType kThumbnailFile[] =
+ FILE_PATH_LITERAL("Thumbnails");
+
+// The test must be in the history namespace for the gtest forward declarations
+// to work. It also eliminates a bunch of ugly "history::".
+namespace history {
+
+// ExpireHistoryTest -----------------------------------------------------------
+
+class ExpireHistoryTest : public testing::Test,
+ public BroadcastNotificationDelegate {
+ public:
+ ExpireHistoryTest()
+ : bookmark_model_(NULL),
+ ALLOW_THIS_IN_INITIALIZER_LIST(expirer_(this, &bookmark_model_)),
+ now_(Time::Now()) {
+ }
+
+ protected:
+ // Called by individual tests when they want data populated.
+ void AddExampleData(URLID url_ids[3], Time visit_times[4]);
+
+ // Returns true if the given favicon/thumanil has an entry in the DB.
+ bool HasFavIcon(FavIconID favicon_id);
+ bool HasThumbnail(URLID url_id);
+
+ // Returns the number of text matches for the given URL in the example data
+ // added by AddExampleData.
+ int CountTextMatchesForURL(const GURL& url);
+
+ // EXPECTs that each URL-specific history thing (basically, everything but
+ // favicons) is gone.
+ void EnsureURLInfoGone(const URLRow& row);
+
+ // Clears the list of notifications received.
+ void ClearLastNotifications() {
+ for (size_t i = 0; i < notifications_.size(); i++)
+ delete notifications_[i].second;
+ notifications_.clear();
+ }
+
+ void StarURL(const GURL& url) {
+ bookmark_model_.AddURL(
+ bookmark_model_.GetBookmarkBarNode(), 0, std::wstring(), url);
+ }
+
+ static bool IsStringInFile(const FilePath& filename, const char* str);
+
+ BookmarkModel bookmark_model_;
+
+ MessageLoop message_loop_;
+
+ ExpireHistoryBackend expirer_;
+
+ scoped_ptr<HistoryDatabase> main_db_;
+ scoped_ptr<ArchivedDatabase> archived_db_;
+ scoped_ptr<ThumbnailDatabase> thumb_db_;
+ scoped_ptr<TextDatabaseManager> text_db_;
+
+ // Time at the beginning of the test, so everybody agrees what "now" is.
+ const Time now_;
+
+ // Notifications intended to be broadcast, we can check these values to make
+ // sure that the deletor is doing the correct broadcasts. We own the details
+ // pointers.
+ typedef std::vector< std::pair<NotificationType, HistoryDetails*> >
+ NotificationList;
+ NotificationList notifications_;
+
+ // Directory for the history files.
+ FilePath dir_;
+
+ private:
+ void SetUp() {
+ FilePath temp_dir;
+ PathService::Get(base::DIR_TEMP, &temp_dir);
+ dir_ = temp_dir.Append(kTestDir);
+ file_util::Delete(dir_, true);
+ file_util::CreateDirectory(dir_);
+
+ FilePath history_name = dir_.Append(kHistoryFile);
+ main_db_.reset(new HistoryDatabase);
+ if (main_db_->Init(history_name, FilePath()) != sql::INIT_OK)
+ main_db_.reset();
+
+ FilePath archived_name = dir_.Append(kArchivedHistoryFile);
+ archived_db_.reset(new ArchivedDatabase);
+ if (!archived_db_->Init(archived_name))
+ archived_db_.reset();
+
+ FilePath thumb_name = dir_.Append(kThumbnailFile);
+ thumb_db_.reset(new ThumbnailDatabase);
+ if (thumb_db_->Init(thumb_name, NULL) != sql::INIT_OK)
+ thumb_db_.reset();
+
+ text_db_.reset(new TextDatabaseManager(dir_,
+ main_db_.get(), main_db_.get()));
+ if (!text_db_->Init(NULL))
+ text_db_.reset();
+
+ expirer_.SetDatabases(main_db_.get(), archived_db_.get(), thumb_db_.get(),
+ text_db_.get());
+ }
+
+ void TearDown() {
+ ClearLastNotifications();
+
+ expirer_.SetDatabases(NULL, NULL, NULL, NULL);
+
+ main_db_.reset();
+ archived_db_.reset();
+ thumb_db_.reset();
+ text_db_.reset();
+ file_util::Delete(dir_, true);
+ }
+
+ // BroadcastNotificationDelegate implementation.
+ void BroadcastNotifications(NotificationType type,
+ HistoryDetails* details_deleted) {
+ // This gets called when there are notifications to broadcast. Instead, we
+ // store them so we can tell that the correct notifications were sent.
+ notifications_.push_back(std::make_pair(type, details_deleted));
+ }
+};
+
+// The example data consists of 4 visits. The middle two visits are to the
+// same URL, while the first and last are for unique ones. This allows a test
+// for the oldest or newest to include both a URL that should get totally
+// deleted (the one on the end) with one that should only get a visit deleted
+// (with the one in the middle) when it picks the proper threshold time.
+//
+// Each visit has indexed data, each URL has thumbnail. The first two URLs will
+// share the same favicon, while the last one will have a unique favicon. The
+// second visit for the middle URL is typed.
+//
+// The IDs of the added URLs, and the times of the four added visits will be
+// added to the given arrays.
+void ExpireHistoryTest::AddExampleData(URLID url_ids[3], Time visit_times[4]) {
+ if (!main_db_.get() || !text_db_.get())
+ return;
+
+ // Four times for each visit.
+ visit_times[3] = Time::Now();
+ visit_times[2] = visit_times[3] - TimeDelta::FromDays(1);
+ visit_times[1] = visit_times[3] - TimeDelta::FromDays(2);
+ visit_times[0] = visit_times[3] - TimeDelta::FromDays(3);
+
+ // Two favicons. The first two URLs will share the same one, while the last
+ // one will have a unique favicon.
+ FavIconID favicon1 = thumb_db_->AddFavIcon(GURL("http://favicon/url1"));
+ FavIconID favicon2 = thumb_db_->AddFavIcon(GURL("http://favicon/url2"));
+
+ // Three URLs.
+ URLRow url_row1(GURL("http://www.google.com/1"));
+ url_row1.set_last_visit(visit_times[0]);
+ url_row1.set_favicon_id(favicon1);
+ url_row1.set_visit_count(1);
+ url_ids[0] = main_db_->AddURL(url_row1);
+
+ URLRow url_row2(GURL("http://www.google.com/2"));
+ url_row2.set_last_visit(visit_times[2]);
+ url_row2.set_favicon_id(favicon1);
+ url_row2.set_visit_count(2);
+ url_row2.set_typed_count(1);
+ url_ids[1] = main_db_->AddURL(url_row2);
+
+ URLRow url_row3(GURL("http://www.google.com/3"));
+ url_row3.set_last_visit(visit_times[3]);
+ url_row3.set_favicon_id(favicon2);
+ url_row3.set_visit_count(1);
+ url_ids[2] = main_db_->AddURL(url_row3);
+
+ // Thumbnails for each URL.
+ scoped_ptr<SkBitmap> thumbnail(
+ gfx::JPEGCodec::Decode(kGoogleThumbnail, sizeof(kGoogleThumbnail)));
+ ThumbnailScore score(0.25, true, true, Time::Now());
+
+ Time time;
+ GURL gurl;
+ thumb_db_->SetPageThumbnail(gurl, url_ids[0], *thumbnail, score, time);
+ thumb_db_->SetPageThumbnail(gurl, url_ids[1], *thumbnail, score, time);
+ thumb_db_->SetPageThumbnail(gurl, url_ids[2], *thumbnail, score, time);
+
+ // Four visits.
+ VisitRow visit_row1;
+ visit_row1.url_id = url_ids[0];
+ visit_row1.visit_time = visit_times[0];
+ visit_row1.is_indexed = true;
+ main_db_->AddVisit(&visit_row1);
+
+ VisitRow visit_row2;
+ visit_row2.url_id = url_ids[1];
+ visit_row2.visit_time = visit_times[1];
+ visit_row2.is_indexed = true;
+ main_db_->AddVisit(&visit_row2);
+
+ VisitRow visit_row3;
+ visit_row3.url_id = url_ids[1];
+ visit_row3.visit_time = visit_times[2];
+ visit_row3.is_indexed = true;
+ visit_row3.transition = PageTransition::TYPED;
+ main_db_->AddVisit(&visit_row3);
+
+ VisitRow visit_row4;
+ visit_row4.url_id = url_ids[2];
+ visit_row4.visit_time = visit_times[3];
+ visit_row4.is_indexed = true;
+ main_db_->AddVisit(&visit_row4);
+
+ // Full text index for each visit.
+ text_db_->AddPageData(url_row1.url(), visit_row1.url_id, visit_row1.visit_id,
+ visit_row1.visit_time, UTF8ToUTF16("title"),
+ UTF8ToUTF16("body"));
+
+ text_db_->AddPageData(url_row2.url(), visit_row2.url_id, visit_row2.visit_id,
+ visit_row2.visit_time, UTF8ToUTF16("title"),
+ UTF8ToUTF16("body"));
+ text_db_->AddPageData(url_row2.url(), visit_row3.url_id, visit_row3.visit_id,
+ visit_row3.visit_time, UTF8ToUTF16("title"),
+ UTF8ToUTF16("body"));
+
+ // Note the special text in this URL. We'll search the file for this string
+ // to make sure it doesn't hang around after the delete.
+ text_db_->AddPageData(url_row3.url(), visit_row4.url_id, visit_row4.visit_id,
+ visit_row4.visit_time, UTF8ToUTF16("title"),
+ UTF8ToUTF16("goats body"));
+}
+
+bool ExpireHistoryTest::HasFavIcon(FavIconID favicon_id) {
+ if (!thumb_db_.get())
+ return false;
+ Time last_updated;
+ std::vector<unsigned char> icon_data_unused;
+ GURL icon_url;
+ return thumb_db_->GetFavIcon(favicon_id, &last_updated, &icon_data_unused,
+ &icon_url);
+}
+
+bool ExpireHistoryTest::HasThumbnail(URLID url_id) {
+ std::vector<unsigned char> temp_data;
+ return thumb_db_->GetPageThumbnail(url_id, &temp_data);
+}
+
+int ExpireHistoryTest::CountTextMatchesForURL(const GURL& url) {
+ if (!text_db_.get())
+ return 0;
+
+ // "body" should match all pages in the example data.
+ std::vector<TextDatabase::Match> results;
+ QueryOptions options;
+ Time first_time;
+ text_db_->GetTextMatches(UTF8ToUTF16("body"), options,
+ &results, &first_time);
+
+ int count = 0;
+ for (size_t i = 0; i < results.size(); i++) {
+ if (results[i].url == url)
+ count++;
+ }
+ return count;
+}
+
+void ExpireHistoryTest::EnsureURLInfoGone(const URLRow& row) {
+ // Verify the URL no longer exists.
+ URLRow temp_row;
+ EXPECT_FALSE(main_db_->GetURLRow(row.id(), &temp_row));
+
+ // The indexed data should be gone.
+ EXPECT_EQ(0, CountTextMatchesForURL(row.url()));
+
+ // There should be no visits.
+ VisitVector visits;
+ main_db_->GetVisitsForURL(row.id(), &visits);
+ EXPECT_EQ(0U, visits.size());
+
+ // Thumbnail should be gone.
+ EXPECT_FALSE(HasThumbnail(row.id()));
+
+ // Check the notifications. There should be a delete notification with this
+ // URL in it. There should also be a "typed URL changed" notification if the
+ // row is marked typed.
+ bool found_delete_notification = false;
+ bool found_typed_changed_notification = false;
+ for (size_t i = 0; i < notifications_.size(); i++) {
+ if (notifications_[i].first == NotificationType::HISTORY_URLS_DELETED) {
+ const URLsDeletedDetails* deleted_details =
+ reinterpret_cast<URLsDeletedDetails*>(notifications_[i].second);
+ if (deleted_details->urls.find(row.url()) !=
+ deleted_details->urls.end()) {
+ found_delete_notification = true;
+ }
+ } else if (notifications_[i].first ==
+ NotificationType::HISTORY_TYPED_URLS_MODIFIED) {
+ // See if we got a typed URL changed notification.
+ const URLsModifiedDetails* modified_details =
+ reinterpret_cast<URLsModifiedDetails*>(notifications_[i].second);
+ for (size_t cur_url = 0; cur_url < modified_details->changed_urls.size();
+ cur_url++) {
+ if (modified_details->changed_urls[cur_url].url() == row.url())
+ found_typed_changed_notification = true;
+ }
+ } else if (notifications_[i].first ==
+ NotificationType::HISTORY_URL_VISITED) {
+ // See if we got a visited URL notification.
+ const URLVisitedDetails* visited_details =
+ reinterpret_cast<URLVisitedDetails*>(notifications_[i].second);
+ if (visited_details->row.url() == row.url())
+ found_typed_changed_notification = true;
+ }
+ }
+ EXPECT_TRUE(found_delete_notification);
+ EXPECT_EQ(row.typed_count() > 0, found_typed_changed_notification);
+}
+
+TEST_F(ExpireHistoryTest, DeleteFaviconsIfPossible) {
+ // Add a favicon record.
+ const GURL favicon_url("http://www.google.com/favicon.ico");
+ FavIconID icon_id = thumb_db_->AddFavIcon(favicon_url);
+ EXPECT_TRUE(icon_id);
+ EXPECT_TRUE(HasFavIcon(icon_id));
+
+ // The favicon should be deletable with no users.
+ std::set<FavIconID> favicon_set;
+ favicon_set.insert(icon_id);
+ expirer_.DeleteFaviconsIfPossible(favicon_set);
+ EXPECT_FALSE(HasFavIcon(icon_id));
+
+ // Add back the favicon.
+ icon_id = thumb_db_->AddFavIcon(favicon_url);
+ EXPECT_TRUE(icon_id);
+ EXPECT_TRUE(HasFavIcon(icon_id));
+
+ // Add a page that references the favicon.
+ URLRow row(GURL("http://www.google.com/2"));
+ row.set_visit_count(1);
+ row.set_favicon_id(icon_id);
+ EXPECT_TRUE(main_db_->AddURL(row));
+
+ // Favicon should not be deletable.
+ favicon_set.clear();
+ favicon_set.insert(icon_id);
+ expirer_.DeleteFaviconsIfPossible(favicon_set);
+ EXPECT_TRUE(HasFavIcon(icon_id));
+}
+
+// static
+bool ExpireHistoryTest::IsStringInFile(const FilePath& filename,
+ const char* str) {
+ std::string contents;
+ EXPECT_TRUE(file_util::ReadFileToString(filename, &contents));
+ return contents.find(str) != std::string::npos;
+}
+
+// Deletes a URL with a favicon that it is the last referencer of, so that it
+// should also get deleted.
+// Fails near end of month. http://crbug.com/43586
+TEST_F(ExpireHistoryTest, FLAKY_DeleteURLAndFavicon) {
+ URLID url_ids[3];
+ Time visit_times[4];
+ AddExampleData(url_ids, visit_times);
+
+ // Verify things are the way we expect with a URL row, favicon, thumbnail.
+ URLRow last_row;
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &last_row));
+ EXPECT_TRUE(HasFavIcon(last_row.favicon_id()));
+ EXPECT_TRUE(HasThumbnail(url_ids[2]));
+
+ VisitVector visits;
+ main_db_->GetVisitsForURL(url_ids[2], &visits);
+ ASSERT_EQ(1U, visits.size());
+ EXPECT_EQ(1, CountTextMatchesForURL(last_row.url()));
+
+ // In this test we also make sure that any pending entries in the text
+ // database manager are removed.
+ text_db_->AddPageURL(last_row.url(), last_row.id(), visits[0].visit_id,
+ visits[0].visit_time);
+
+ // Compute the text DB filename.
+ FilePath fts_filename = dir_.Append(
+ TextDatabase::IDToFileName(text_db_->TimeToID(visit_times[3])));
+
+ // When checking the file, the database must be closed. We then re-initialize
+ // it just like the test set-up did.
+ text_db_.reset();
+ EXPECT_TRUE(IsStringInFile(fts_filename, "goats"));
+ text_db_.reset(new TextDatabaseManager(dir_,
+ main_db_.get(), main_db_.get()));
+ ASSERT_TRUE(text_db_->Init(NULL));
+ expirer_.SetDatabases(main_db_.get(), archived_db_.get(), thumb_db_.get(),
+ text_db_.get());
+
+ // Delete the URL and its dependencies.
+ expirer_.DeleteURL(last_row.url());
+
+ // The string should be removed from the file. FTS can mark it as gone but
+ // doesn't remove it from the file, we want to be sure we're doing the latter.
+ text_db_.reset();
+ EXPECT_FALSE(IsStringInFile(fts_filename, "goats"));
+ text_db_.reset(new TextDatabaseManager(dir_,
+ main_db_.get(), main_db_.get()));
+ ASSERT_TRUE(text_db_->Init(NULL));
+ expirer_.SetDatabases(main_db_.get(), archived_db_.get(), thumb_db_.get(),
+ text_db_.get());
+
+ // Run the text database expirer. This will flush any pending entries so we
+ // can check that nothing was committed. We use a time far in the future so
+ // that anything added recently will get flushed.
+ TimeTicks expiration_time = TimeTicks::Now() + TimeDelta::FromDays(1);
+ text_db_->FlushOldChangesForTime(expiration_time);
+
+ // All the normal data + the favicon should be gone.
+ EnsureURLInfoGone(last_row);
+ EXPECT_FALSE(HasFavIcon(last_row.favicon_id()));
+}
+
+// Deletes a URL with a favicon that other URLs reference, so that the favicon
+// should not get deleted. This also tests deleting more than one visit.
+TEST_F(ExpireHistoryTest, DeleteURLWithoutFavicon) {
+ URLID url_ids[3];
+ Time visit_times[4];
+ AddExampleData(url_ids, visit_times);
+
+ // Verify things are the way we expect with a URL row, favicon, thumbnail.
+ URLRow last_row;
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &last_row));
+ EXPECT_TRUE(HasFavIcon(last_row.favicon_id()));
+ EXPECT_TRUE(HasThumbnail(url_ids[1]));
+
+ VisitVector visits;
+ main_db_->GetVisitsForURL(url_ids[1], &visits);
+ EXPECT_EQ(2U, visits.size());
+ EXPECT_EQ(1, CountTextMatchesForURL(last_row.url()));
+
+ // Delete the URL and its dependencies.
+ expirer_.DeleteURL(last_row.url());
+
+ // All the normal data + the favicon should be gone.
+ EnsureURLInfoGone(last_row);
+ EXPECT_TRUE(HasFavIcon(last_row.favicon_id()));
+}
+
+// DeleteURL should not delete starred urls.
+TEST_F(ExpireHistoryTest, DontDeleteStarredURL) {
+ URLID url_ids[3];
+ Time visit_times[4];
+ AddExampleData(url_ids, visit_times);
+
+ URLRow url_row;
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row));
+
+ // Star the last URL.
+ StarURL(url_row.url());
+
+ // Attempt to delete the url.
+ expirer_.DeleteURL(url_row.url());
+
+ // Because the url is starred, it shouldn't be deleted.
+ GURL url = url_row.url();
+ ASSERT_TRUE(main_db_->GetRowForURL(url, &url_row));
+
+ // And the favicon should exist.
+ EXPECT_TRUE(HasFavIcon(url_row.favicon_id()));
+
+ // But there should be no fts.
+ ASSERT_EQ(0, CountTextMatchesForURL(url_row.url()));
+
+ // And no visits.
+ VisitVector visits;
+ main_db_->GetVisitsForURL(url_row.id(), &visits);
+ ASSERT_EQ(0U, visits.size());
+
+ // Should still have the thumbnail.
+ ASSERT_TRUE(HasThumbnail(url_row.id()));
+
+ // Unstar the URL and delete again.
+ bookmark_model_.SetURLStarred(url, std::wstring(), false);
+ expirer_.DeleteURL(url);
+
+ // Now it should be completely deleted.
+ EnsureURLInfoGone(url_row);
+}
+
+// Expires all URLs more recent than a given time, with no starred items.
+// Our time threshold is such that one URL should be updated (we delete one of
+// the two visits) and one is deleted.
+TEST_F(ExpireHistoryTest, FlushRecentURLsUnstarred) {
+ URLID url_ids[3];
+ Time visit_times[4];
+ AddExampleData(url_ids, visit_times);
+
+ URLRow url_row1, url_row2;
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1));
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row2));
+
+ // In this test we also make sure that any pending entries in the text
+ // database manager are removed.
+ VisitVector visits;
+ main_db_->GetVisitsForURL(url_ids[2], &visits);
+ ASSERT_EQ(1U, visits.size());
+ text_db_->AddPageURL(url_row2.url(), url_row2.id(), visits[0].visit_id,
+ visits[0].visit_time);
+
+ // This should delete the last two visits.
+ std::set<GURL> restrict_urls;
+ expirer_.ExpireHistoryBetween(restrict_urls, visit_times[2], Time());
+
+ // Run the text database expirer. This will flush any pending entries so we
+ // can check that nothing was committed. We use a time far in the future so
+ // that anything added recently will get flushed.
+ TimeTicks expiration_time = TimeTicks::Now() + TimeDelta::FromDays(1);
+ text_db_->FlushOldChangesForTime(expiration_time);
+
+ // Verify that the middle URL had its last visit deleted only.
+ visits.clear();
+ main_db_->GetVisitsForURL(url_ids[1], &visits);
+ EXPECT_EQ(1U, visits.size());
+ EXPECT_EQ(0, CountTextMatchesForURL(url_row1.url()));
+
+ // Verify that the middle URL visit time and visit counts were updated.
+ URLRow temp_row;
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &temp_row));
+ EXPECT_TRUE(visit_times[2] == url_row1.last_visit()); // Previous value.
+ EXPECT_TRUE(visit_times[1] == temp_row.last_visit()); // New value.
+ EXPECT_EQ(2, url_row1.visit_count());
+ EXPECT_EQ(1, temp_row.visit_count());
+ EXPECT_EQ(1, url_row1.typed_count());
+ EXPECT_EQ(0, temp_row.typed_count());
+
+ // Verify that the middle URL's favicon and thumbnail is still there.
+ EXPECT_TRUE(HasFavIcon(url_row1.favicon_id()));
+ EXPECT_TRUE(HasThumbnail(url_row1.id()));
+
+ // Verify that the last URL was deleted.
+ EnsureURLInfoGone(url_row2);
+ EXPECT_FALSE(HasFavIcon(url_row2.favicon_id()));
+}
+
+// Expires only a specific URLs more recent than a given time, with no starred
+// items. Our time threshold is such that the URL should be updated (we delete
+// one of the two visits).
+TEST_F(ExpireHistoryTest, FlushRecentURLsUnstarredRestricted) {
+ URLID url_ids[3];
+ Time visit_times[4];
+ AddExampleData(url_ids, visit_times);
+
+ URLRow url_row1, url_row2;
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1));
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row2));
+
+ // In this test we also make sure that any pending entries in the text
+ // database manager are removed.
+ VisitVector visits;
+ main_db_->GetVisitsForURL(url_ids[2], &visits);
+ ASSERT_EQ(1U, visits.size());
+ text_db_->AddPageURL(url_row2.url(), url_row2.id(), visits[0].visit_id,
+ visits[0].visit_time);
+
+ // This should delete the last two visits.
+ std::set<GURL> restrict_urls;
+ restrict_urls.insert(url_row1.url());
+ expirer_.ExpireHistoryBetween(restrict_urls, visit_times[2], Time());
+
+ // Run the text database expirer. This will flush any pending entries so we
+ // can check that nothing was committed. We use a time far in the future so
+ // that anything added recently will get flushed.
+ TimeTicks expiration_time = TimeTicks::Now() + TimeDelta::FromDays(1);
+ text_db_->FlushOldChangesForTime(expiration_time);
+
+ // Verify that the middle URL had its last visit deleted only.
+ visits.clear();
+ main_db_->GetVisitsForURL(url_ids[1], &visits);
+ EXPECT_EQ(1U, visits.size());
+ EXPECT_EQ(0, CountTextMatchesForURL(url_row1.url()));
+
+ // Verify that the middle URL visit time and visit counts were updated.
+ URLRow temp_row;
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &temp_row));
+ EXPECT_TRUE(visit_times[2] == url_row1.last_visit()); // Previous value.
+ EXPECT_TRUE(visit_times[1] == temp_row.last_visit()); // New value.
+ EXPECT_EQ(2, url_row1.visit_count());
+ EXPECT_EQ(1, temp_row.visit_count());
+ EXPECT_EQ(1, url_row1.typed_count());
+ EXPECT_EQ(0, temp_row.typed_count());
+
+ // Verify that the middle URL's favicon and thumbnail is still there.
+ EXPECT_TRUE(HasFavIcon(url_row1.favicon_id()));
+ EXPECT_TRUE(HasThumbnail(url_row1.id()));
+
+ // Verify that the last URL was not touched.
+ EXPECT_TRUE(main_db_->GetURLRow(url_ids[2], &temp_row));
+ EXPECT_TRUE(HasFavIcon(url_row2.favicon_id()));
+ EXPECT_TRUE(HasThumbnail(url_row2.id()));
+}
+
+// Expire a starred URL, it shouldn't get deleted
+TEST_F(ExpireHistoryTest, FlushRecentURLsStarred) {
+ URLID url_ids[3];
+ Time visit_times[4];
+ AddExampleData(url_ids, visit_times);
+
+ URLRow url_row1, url_row2;
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1));
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row2));
+
+ // Star the last two URLs.
+ StarURL(url_row1.url());
+ StarURL(url_row2.url());
+
+ // This should delete the last two visits.
+ std::set<GURL> restrict_urls;
+ expirer_.ExpireHistoryBetween(restrict_urls, visit_times[2], Time());
+
+ // The URL rows should still exist.
+ URLRow new_url_row1, new_url_row2;
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &new_url_row1));
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &new_url_row2));
+
+ // The visit times should be updated.
+ EXPECT_TRUE(new_url_row1.last_visit() == visit_times[1]);
+ EXPECT_TRUE(new_url_row2.last_visit().is_null()); // No last visit time.
+
+ // Visit/typed count should not be updated for bookmarks.
+ EXPECT_EQ(0, new_url_row1.typed_count());
+ EXPECT_EQ(1, new_url_row1.visit_count());
+ EXPECT_EQ(0, new_url_row2.typed_count());
+ EXPECT_EQ(0, new_url_row2.visit_count());
+
+ // Thumbnails and favicons should still exist. Note that we keep thumbnails
+ // that may have been updated since the time threshold. Since the URL still
+ // exists in history, this should not be a privacy problem, we only update
+ // the visit counts in this case for consistency anyway.
+ EXPECT_TRUE(HasFavIcon(new_url_row1.favicon_id()));
+ EXPECT_TRUE(HasThumbnail(new_url_row1.id()));
+ EXPECT_TRUE(HasFavIcon(new_url_row2.favicon_id()));
+ EXPECT_TRUE(HasThumbnail(new_url_row2.id()));
+}
+
+TEST_F(ExpireHistoryTest, ArchiveHistoryBeforeUnstarred) {
+ URLID url_ids[3];
+ Time visit_times[4];
+ AddExampleData(url_ids, visit_times);
+
+ URLRow url_row1, url_row2;
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1));
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row2));
+
+ // Archive the oldest two visits. This will actually result in deleting them
+ // since their transition types are empty (not important).
+ expirer_.ArchiveHistoryBefore(visit_times[1]);
+
+ // The first URL should be deleted, the second should not be affected.
+ URLRow temp_row;
+ EXPECT_FALSE(main_db_->GetURLRow(url_ids[0], &temp_row));
+ EXPECT_TRUE(main_db_->GetURLRow(url_ids[1], &temp_row));
+ EXPECT_TRUE(main_db_->GetURLRow(url_ids[2], &temp_row));
+
+ // Make sure the archived database has nothing in it.
+ EXPECT_FALSE(archived_db_->GetRowForURL(url_row1.url(), NULL));
+ EXPECT_FALSE(archived_db_->GetRowForURL(url_row2.url(), NULL));
+
+ // Now archive one more visit so that the middle URL should be removed. This
+ // one will actually be archived instead of deleted.
+ expirer_.ArchiveHistoryBefore(visit_times[2]);
+ EXPECT_FALSE(main_db_->GetURLRow(url_ids[1], &temp_row));
+ EXPECT_TRUE(main_db_->GetURLRow(url_ids[2], &temp_row));
+
+ // Make sure the archived database has an entry for the second URL.
+ URLRow archived_row;
+ // Note that the ID is different in the archived DB, so look up by URL.
+ EXPECT_TRUE(archived_db_->GetRowForURL(url_row1.url(), &archived_row));
+ VisitVector archived_visits;
+ archived_db_->GetVisitsForURL(archived_row.id(), &archived_visits);
+ EXPECT_EQ(1U, archived_visits.size());
+}
+
+TEST_F(ExpireHistoryTest, ArchiveHistoryBeforeStarred) {
+ URLID url_ids[3];
+ Time visit_times[4];
+ AddExampleData(url_ids, visit_times);
+
+ URLRow url_row0, url_row1;
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[0], &url_row0));
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1));
+
+ // Star the URLs.
+ StarURL(url_row0.url());
+ StarURL(url_row1.url());
+
+ // Now archive the first three visits (first two URLs). The first two visits
+ // should be, the third deleted, but the URL records should not.
+ expirer_.ArchiveHistoryBefore(visit_times[2]);
+
+ // The first URL should have its visit deleted, but it should still be present
+ // in the main DB and not in the archived one since it is starred.
+ URLRow temp_row;
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[0], &temp_row));
+ // Note that the ID is different in the archived DB, so look up by URL.
+ EXPECT_FALSE(archived_db_->GetRowForURL(temp_row.url(), NULL));
+ VisitVector visits;
+ main_db_->GetVisitsForURL(temp_row.id(), &visits);
+ EXPECT_EQ(0U, visits.size());
+
+ // The second URL should have its first visit deleted and its second visit
+ // archived. It should be present in both the main DB (because it's starred)
+ // and the archived DB (for the archived visit).
+ ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &temp_row));
+ main_db_->GetVisitsForURL(temp_row.id(), &visits);
+ EXPECT_EQ(0U, visits.size());
+
+ // Note that the ID is different in the archived DB, so look up by URL.
+ ASSERT_TRUE(archived_db_->GetRowForURL(temp_row.url(), &temp_row));
+ archived_db_->GetVisitsForURL(temp_row.id(), &visits);
+ ASSERT_EQ(1U, visits.size());
+ EXPECT_TRUE(visit_times[2] == visits[0].visit_time);
+
+ // The third URL should be unchanged.
+ EXPECT_TRUE(main_db_->GetURLRow(url_ids[2], &temp_row));
+ EXPECT_FALSE(archived_db_->GetRowForURL(temp_row.url(), NULL));
+}
+
+// Tests the return values from ArchiveSomeOldHistory. The rest of the
+// functionality of this function is tested by the ArchiveHistoryBefore*
+// tests which use this function internally.
+TEST_F(ExpireHistoryTest, ArchiveSomeOldHistory) {
+ URLID url_ids[3];
+ Time visit_times[4];
+ AddExampleData(url_ids, visit_times);
+ const ExpiringVisitsReader* reader = expirer_.GetAllVisitsReader();
+
+ // Deleting a time range with no URLs should return false (nothing found).
+ EXPECT_FALSE(expirer_.ArchiveSomeOldHistory(
+ visit_times[0] - TimeDelta::FromDays(100), reader, 1));
+
+ // Deleting a time range with not up the the max results should also return
+ // false (there will only be one visit deleted in this range).
+ EXPECT_FALSE(expirer_.ArchiveSomeOldHistory(visit_times[0], reader, 2));
+
+ // Deleting a time range with the max number of results should return true
+ // (max deleted).
+ EXPECT_TRUE(expirer_.ArchiveSomeOldHistory(visit_times[2], reader, 1));
+}
+
+TEST_F(ExpireHistoryTest, ExpiringVisitsReader) {
+ URLID url_ids[3];
+ Time visit_times[4];
+ AddExampleData(url_ids, visit_times);
+
+ const ExpiringVisitsReader* all = expirer_.GetAllVisitsReader();
+ const ExpiringVisitsReader* auto_subframes =
+ expirer_.GetAutoSubframeVisitsReader();
+
+ VisitVector visits;
+ Time now = Time::Now();
+
+ // Verify that the early expiration threshold, stored in the meta table is
+ // initialized.
+ EXPECT_TRUE(main_db_->GetEarlyExpirationThreshold() ==
+ Time::FromInternalValue(1L));
+
+ // First, attempt reading AUTO_SUBFRAME visits. We should get none.
+ EXPECT_FALSE(auto_subframes->Read(now, main_db_.get(), &visits, 1));
+ EXPECT_EQ(0U, visits.size());
+
+ // Verify that the early expiration threshold was updated, since there are no
+ // AUTO_SUBFRAME visits in the given time range.
+ EXPECT_TRUE(now <= main_db_->GetEarlyExpirationThreshold());
+
+ // Now, read all visits and verify that there's at least one.
+ EXPECT_TRUE(all->Read(now, main_db_.get(), &visits, 1));
+ EXPECT_EQ(1U, visits.size());
+}
+
+// TODO(brettw) add some visits with no URL to make sure everything is updated
+// properly. Have the visits also refer to nonexistant FTS rows.
+//
+// Maybe also refer to invalid favicons.
+
+} // namespace history
diff --git a/chrome/browser/history/history.cc b/chrome/browser/history/history.cc
new file mode 100644
index 0000000..0b92b6f
--- /dev/null
+++ b/chrome/browser/history/history.cc
@@ -0,0 +1,762 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The history system runs on a background thread so that potentially slow
+// database operations don't delay the browser. This backend processing is
+// represented by HistoryBackend. The HistoryService's job is to dispatch to
+// that thread.
+//
+// Main thread History thread
+// ----------- --------------
+// HistoryService <----------------> HistoryBackend
+// -> HistoryDatabase
+// -> SQLite connection to History
+// -> ArchivedDatabase
+// -> SQLite connection to Archived History
+// -> TextDatabaseManager
+// -> SQLite connection to one month's data
+// -> SQLite connection to one month's data
+// ...
+// -> ThumbnailDatabase
+// -> SQLite connection to Thumbnails
+// (and favicons)
+
+#include "chrome/browser/history/history.h"
+
+#include "app/l10n_util.h"
+#include "base/callback.h"
+#include "base/message_loop.h"
+#include "base/path_service.h"
+#include "base/ref_counted.h"
+#include "base/task.h"
+#include "chrome/browser/autocomplete/history_url_provider.h"
+#include "chrome/browser/browser_list.h"
+#include "chrome/browser/browser_process.h"
+#include "chrome/browser/browser_window.h"
+#include "chrome/browser/chrome_thread.h"
+#include "chrome/browser/history/download_types.h"
+#include "chrome/browser/history/history_backend.h"
+#include "chrome/browser/history/history_notifications.h"
+#include "chrome/browser/history/history_types.h"
+#include "chrome/browser/history/in_memory_database.h"
+#include "chrome/browser/history/in_memory_history_backend.h"
+#include "chrome/browser/history/top_sites.h"
+#include "chrome/browser/profile.h"
+#include "chrome/browser/visitedlink_master.h"
+#include "chrome/common/chrome_constants.h"
+#include "chrome/common/notification_service.h"
+#include "chrome/common/thumbnail_score.h"
+#include "chrome/common/url_constants.h"
+#include "grit/chromium_strings.h"
+#include "grit/generated_resources.h"
+#include "third_party/skia/include/core/SkBitmap.h"
+
+using base::Time;
+using history::HistoryBackend;
+
+namespace {
+
+static const char* kHistoryThreadName = "Chrome_HistoryThread";
+
+} // namespace
+
+// Sends messages from the backend to us on the main thread. This must be a
+// separate class from the history service so that it can hold a reference to
+// the history service (otherwise we would have to manually AddRef and
+// Release when the Backend has a reference to us).
+class HistoryService::BackendDelegate : public HistoryBackend::Delegate {
+ public:
+ explicit BackendDelegate(HistoryService* history_service)
+ : history_service_(history_service),
+ message_loop_(MessageLoop::current()) {
+ }
+
+ virtual void NotifyProfileError(int message_id) {
+ // Send the backend to the history service on the main thread.
+ message_loop_->PostTask(FROM_HERE, NewRunnableMethod(history_service_.get(),
+ &HistoryService::NotifyProfileError, message_id));
+ }
+
+ virtual void SetInMemoryBackend(
+ history::InMemoryHistoryBackend* backend) {
+ // Send the backend to the history service on the main thread.
+ message_loop_->PostTask(FROM_HERE, NewRunnableMethod(history_service_.get(),
+ &HistoryService::SetInMemoryBackend, backend));
+ }
+
+ virtual void BroadcastNotifications(NotificationType type,
+ history::HistoryDetails* details) {
+ // Send the notification on the history thread.
+ if (NotificationService::current()) {
+ Details<history::HistoryDetails> det(details);
+ NotificationService::current()->Notify(type,
+ NotificationService::AllSources(),
+ det);
+ }
+ // Send the notification to the history service on the main thread.
+ message_loop_->PostTask(FROM_HERE, NewRunnableMethod(history_service_.get(),
+ &HistoryService::BroadcastNotifications, type, details));
+ }
+
+ virtual void DBLoaded() {
+ message_loop_->PostTask(FROM_HERE, NewRunnableMethod(history_service_.get(),
+ &HistoryService::OnDBLoaded));
+ }
+
+ virtual void StartTopSitesMigration() {
+ message_loop_->PostTask(FROM_HERE, NewRunnableMethod(history_service_.get(),
+ &HistoryService::StartTopSitesMigration));
+ }
+
+ private:
+ scoped_refptr<HistoryService> history_service_;
+ MessageLoop* message_loop_;
+};
+
+// static
+const history::StarID HistoryService::kBookmarkBarID = 1;
+
+// The history thread is intentionally not a ChromeThread because the
+// sync integration unit tests depend on being able to create more than one
+// history thread.
+HistoryService::HistoryService()
+ : thread_(new base::Thread(kHistoryThreadName)),
+ profile_(NULL),
+ backend_loaded_(false),
+ bookmark_service_(NULL),
+ no_db_(false) {
+ // Is NULL when running generate_profile.
+ if (NotificationService::current()) {
+ registrar_.Add(this, NotificationType::HISTORY_URLS_DELETED,
+ Source<Profile>(profile_));
+ }
+}
+
+HistoryService::HistoryService(Profile* profile)
+ : thread_(new base::Thread(kHistoryThreadName)),
+ profile_(profile),
+ backend_loaded_(false),
+ bookmark_service_(NULL),
+ no_db_(false) {
+ registrar_.Add(this, NotificationType::HISTORY_URLS_DELETED,
+ Source<Profile>(profile_));
+}
+
+HistoryService::~HistoryService() {
+ // Shutdown the backend. This does nothing if Cleanup was already invoked.
+ Cleanup();
+}
+
+bool HistoryService::BackendLoaded() {
+ // NOTE: We start the backend loading even though it completes asynchronously
+ // and thus won't affect the return value of this function. This is because
+ // callers of this assume that if the backend isn't yet loaded it will be
+ // soon, so they will either listen for notifications or just retry this call
+ // later. If we've purged the backend, we haven't necessarily restarted it
+ // loading by now, so we need to trigger the load in order to maintain that
+ // expectation.
+ LoadBackendIfNecessary();
+ return backend_loaded_;
+}
+
+void HistoryService::UnloadBackend() {
+ if (!history_backend_)
+ return; // Already unloaded.
+
+ // Get rid of the in-memory backend.
+ in_memory_backend_.reset();
+
+ // The backend's destructor must run on the history thread since it is not
+ // threadsafe. So this thread must not be the last thread holding a reference
+ // to the backend, or a crash could happen.
+ //
+ // We have a reference to the history backend. There is also an extra
+ // reference held by our delegate installed in the backend, which
+ // HistoryBackend::Closing will release. This means if we scheduled a call
+ // to HistoryBackend::Closing and *then* released our backend reference, there
+ // will be a race between us and the backend's Closing function to see who is
+ // the last holder of a reference. If the backend thread's Closing manages to
+ // run before we release our backend refptr, the last reference will be held
+ // by this thread and the destructor will be called from here.
+ //
+ // Therefore, we create a task to run the Closing operation first. This holds
+ // a reference to the backend. Then we release our reference, then we schedule
+ // the task to run. After the task runs, it will delete its reference from
+ // the history thread, ensuring everything works properly.
+ Task* closing_task =
+ NewRunnableMethod(history_backend_.get(), &HistoryBackend::Closing);
+ history_backend_ = NULL;
+ ScheduleTask(PRIORITY_NORMAL, closing_task);
+}
+
+void HistoryService::Cleanup() {
+ if (!thread_) {
+ // We've already cleaned up.
+ return;
+ }
+
+ // Unload the backend.
+ UnloadBackend();
+
+ // Delete the thread, which joins with the background thread. We defensively
+ // NULL the pointer before deleting it in case somebody tries to use it
+ // during shutdown, but this shouldn't happen.
+ base::Thread* thread = thread_;
+ thread_ = NULL;
+ delete thread;
+}
+
+void HistoryService::NotifyRenderProcessHostDestruction(const void* host) {
+ ScheduleAndForget(PRIORITY_NORMAL,
+ &HistoryBackend::NotifyRenderProcessHostDestruction, host);
+}
+
+history::URLDatabase* HistoryService::InMemoryDatabase() {
+ // NOTE: See comments in BackendLoaded() as to why we call
+ // LoadBackendIfNecessary() here even though it won't affect the return value
+ // for this call.
+ LoadBackendIfNecessary();
+ if (in_memory_backend_.get())
+ return in_memory_backend_->db();
+ return NULL;
+}
+
+void HistoryService::SetSegmentPresentationIndex(int64 segment_id, int index) {
+ ScheduleAndForget(PRIORITY_UI,
+ &HistoryBackend::SetSegmentPresentationIndex,
+ segment_id, index);
+}
+
+void HistoryService::SetKeywordSearchTermsForURL(const GURL& url,
+ TemplateURL::IDType keyword_id,
+ const string16& term) {
+ ScheduleAndForget(PRIORITY_UI,
+ &HistoryBackend::SetKeywordSearchTermsForURL,
+ url, keyword_id, term);
+}
+
+void HistoryService::DeleteAllSearchTermsForKeyword(
+ TemplateURL::IDType keyword_id) {
+ ScheduleAndForget(PRIORITY_UI,
+ &HistoryBackend::DeleteAllSearchTermsForKeyword,
+ keyword_id);
+}
+
+HistoryService::Handle HistoryService::GetMostRecentKeywordSearchTerms(
+ TemplateURL::IDType keyword_id,
+ const string16& prefix,
+ int max_count,
+ CancelableRequestConsumerBase* consumer,
+ GetMostRecentKeywordSearchTermsCallback* callback) {
+ return Schedule(PRIORITY_UI, &HistoryBackend::GetMostRecentKeywordSearchTerms,
+ consumer,
+ new history::GetMostRecentKeywordSearchTermsRequest(callback),
+ keyword_id, prefix, max_count);
+}
+
+void HistoryService::URLsNoLongerBookmarked(const std::set<GURL>& urls) {
+ ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::URLsNoLongerBookmarked,
+ urls);
+}
+
+HistoryService::Handle HistoryService::ScheduleDBTask(
+ HistoryDBTask* task,
+ CancelableRequestConsumerBase* consumer) {
+ history::HistoryDBTaskRequest* request = new history::HistoryDBTaskRequest(
+ NewCallback(task, &HistoryDBTask::DoneRunOnMainThread));
+ request->value = task; // The value is the task to execute.
+ return Schedule(PRIORITY_UI, &HistoryBackend::ProcessDBTask, consumer,
+ request);
+}
+
+HistoryService::Handle HistoryService::QuerySegmentUsageSince(
+ CancelableRequestConsumerBase* consumer,
+ const Time from_time,
+ int max_result_count,
+ SegmentQueryCallback* callback) {
+ return Schedule(PRIORITY_UI, &HistoryBackend::QuerySegmentUsage,
+ consumer, new history::QuerySegmentUsageRequest(callback),
+ from_time, max_result_count);
+}
+
+void HistoryService::SetOnBackendDestroyTask(Task* task) {
+ ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::SetOnBackendDestroyTask,
+ MessageLoop::current(), task);
+}
+
+void HistoryService::AddPage(const GURL& url,
+ const void* id_scope,
+ int32 page_id,
+ const GURL& referrer,
+ PageTransition::Type transition,
+ const history::RedirectList& redirects,
+ bool did_replace_entry) {
+ AddPage(url, Time::Now(), id_scope, page_id, referrer, transition, redirects,
+ did_replace_entry);
+}
+
+void HistoryService::AddPage(const GURL& url,
+ Time time,
+ const void* id_scope,
+ int32 page_id,
+ const GURL& referrer,
+ PageTransition::Type transition,
+ const history::RedirectList& redirects,
+ bool did_replace_entry) {
+ DCHECK(thread_) << "History service being called after cleanup";
+
+ // Filter out unwanted URLs. We don't add auto-subframe URLs. They are a
+ // large part of history (think iframes for ads) and we never display them in
+ // history UI. We will still add manual subframes, which are ones the user
+ // has clicked on to get.
+ if (!CanAddURL(url))
+ return;
+
+ // Add link & all redirects to visited link list.
+ VisitedLinkMaster* visited_links;
+ if (profile_ && (visited_links = profile_->GetVisitedLinkMaster())) {
+ visited_links->AddURL(url);
+
+ if (!redirects.empty()) {
+ // We should not be asked to add a page in the middle of a redirect chain.
+ DCHECK(redirects[redirects.size() - 1] == url);
+
+ // We need the !redirects.empty() condition above since size_t is unsigned
+ // and will wrap around when we subtract one from a 0 size.
+ for (size_t i = 0; i < redirects.size() - 1; i++)
+ visited_links->AddURL(redirects[i]);
+ }
+ }
+
+ scoped_refptr<history::HistoryAddPageArgs> request(
+ new history::HistoryAddPageArgs(url, time, id_scope, page_id,
+ referrer, redirects, transition,
+ did_replace_entry));
+ ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::AddPage, request);
+}
+
+void HistoryService::SetPageTitle(const GURL& url,
+ const string16& title) {
+ ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::SetPageTitle, url, title);
+}
+
+void HistoryService::AddPageWithDetails(const GURL& url,
+ const string16& title,
+ int visit_count,
+ int typed_count,
+ Time last_visit,
+ bool hidden) {
+ // Filter out unwanted URLs.
+ if (!CanAddURL(url))
+ return;
+
+ // Add to the visited links system.
+ VisitedLinkMaster* visited_links;
+ if (profile_ && (visited_links = profile_->GetVisitedLinkMaster()))
+ visited_links->AddURL(url);
+
+ history::URLRow row(url);
+ row.set_title(title);
+ row.set_visit_count(visit_count);
+ row.set_typed_count(typed_count);
+ row.set_last_visit(last_visit);
+ row.set_hidden(hidden);
+
+ std::vector<history::URLRow> rows;
+ rows.push_back(row);
+
+ ScheduleAndForget(PRIORITY_NORMAL,
+ &HistoryBackend::AddPagesWithDetails, rows);
+}
+
+void HistoryService::AddPagesWithDetails(
+ const std::vector<history::URLRow>& info) {
+
+ // Add to the visited links system.
+ VisitedLinkMaster* visited_links;
+ if (profile_ && (visited_links = profile_->GetVisitedLinkMaster())) {
+ std::vector<GURL> urls;
+ urls.reserve(info.size());
+ for (std::vector<history::URLRow>::const_iterator i = info.begin();
+ i != info.end();
+ ++i)
+ urls.push_back(i->url());
+
+ visited_links->AddURLs(urls);
+ }
+
+ ScheduleAndForget(PRIORITY_NORMAL,
+ &HistoryBackend::AddPagesWithDetails, info);
+}
+
+void HistoryService::SetPageContents(const GURL& url,
+ const string16& contents) {
+ if (!CanAddURL(url))
+ return;
+
+ ScheduleAndForget(PRIORITY_LOW, &HistoryBackend::SetPageContents,
+ url, contents);
+}
+
+void HistoryService::SetPageThumbnail(const GURL& page_url,
+ const SkBitmap& thumbnail,
+ const ThumbnailScore& score) {
+ if (!CanAddURL(page_url))
+ return;
+
+ ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::SetPageThumbnail,
+ page_url, thumbnail, score);
+}
+
+HistoryService::Handle HistoryService::GetPageThumbnail(
+ const GURL& page_url,
+ CancelableRequestConsumerBase* consumer,
+ ThumbnailDataCallback* callback) {
+ return Schedule(PRIORITY_NORMAL, &HistoryBackend::GetPageThumbnail, consumer,
+ new history::GetPageThumbnailRequest(callback), page_url);
+}
+
+void HistoryService::GetFavicon(FaviconService::GetFaviconRequest* request,
+ const GURL& icon_url) {
+ Schedule(PRIORITY_NORMAL, &HistoryBackend::GetFavIcon, NULL, request,
+ icon_url);
+}
+
+void HistoryService::UpdateFaviconMappingAndFetch(
+ FaviconService::GetFaviconRequest* request,
+ const GURL& page_url,
+ const GURL& icon_url) {
+ Schedule(PRIORITY_NORMAL, &HistoryBackend::UpdateFavIconMappingAndFetch, NULL,
+ request, page_url, icon_url);
+}
+
+void HistoryService::GetFaviconForURL(
+ FaviconService::GetFaviconRequest* request,
+ const GURL& page_url) {
+ Schedule(PRIORITY_NORMAL, &HistoryBackend::GetFavIconForURL, NULL, request,
+ page_url);
+}
+
+void HistoryService::SetFavicon(const GURL& page_url,
+ const GURL& icon_url,
+ const std::vector<unsigned char>& image_data) {
+ if (!CanAddURL(page_url))
+ return;
+
+ ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::SetFavIcon,
+ page_url, icon_url,
+ scoped_refptr<RefCountedMemory>(new RefCountedBytes(image_data)));
+}
+
+void HistoryService::SetFaviconOutOfDateForPage(const GURL& page_url) {
+ ScheduleAndForget(PRIORITY_NORMAL,
+ &HistoryBackend::SetFavIconOutOfDateForPage, page_url);
+}
+
+void HistoryService::SetImportedFavicons(
+ const std::vector<history::ImportedFavIconUsage>& favicon_usage) {
+ ScheduleAndForget(PRIORITY_NORMAL,
+ &HistoryBackend::SetImportedFavicons, favicon_usage);
+}
+
+void HistoryService::IterateURLs(URLEnumerator* enumerator) {
+ ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::IterateURLs, enumerator);
+}
+
+HistoryService::Handle HistoryService::QueryURL(
+ const GURL& url,
+ bool want_visits,
+ CancelableRequestConsumerBase* consumer,
+ QueryURLCallback* callback) {
+ return Schedule(PRIORITY_UI, &HistoryBackend::QueryURL, consumer,
+ new history::QueryURLRequest(callback), url, want_visits);
+}
+
+// Downloads -------------------------------------------------------------------
+
+// Handle creation of a download by creating an entry in the history service's
+// 'downloads' table.
+HistoryService::Handle HistoryService::CreateDownload(
+ const DownloadCreateInfo& create_info,
+ CancelableRequestConsumerBase* consumer,
+ HistoryService::DownloadCreateCallback* callback) {
+ return Schedule(PRIORITY_NORMAL, &HistoryBackend::CreateDownload, consumer,
+ new history::DownloadCreateRequest(callback), create_info);
+}
+
+// Handle queries for a list of all downloads in the history database's
+// 'downloads' table.
+HistoryService::Handle HistoryService::QueryDownloads(
+ CancelableRequestConsumerBase* consumer,
+ DownloadQueryCallback* callback) {
+ return Schedule(PRIORITY_NORMAL, &HistoryBackend::QueryDownloads, consumer,
+ new history::DownloadQueryRequest(callback));
+}
+
+// Changes all IN_PROGRESS in the database entries to CANCELED.
+// IN_PROGRESS entries are the corrupted entries, not updated by next function
+// because of the crash or some other extremal exit.
+void HistoryService::CleanUpInProgressEntries() {
+ ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::CleanUpInProgressEntries);
+}
+
+// Handle updates for a particular download. This is a 'fire and forget'
+// operation, so we don't need to be called back.
+void HistoryService::UpdateDownload(int64 received_bytes,
+ int32 state,
+ int64 db_handle) {
+ ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::UpdateDownload,
+ received_bytes, state, db_handle);
+}
+
+void HistoryService::UpdateDownloadPath(const FilePath& path,
+ int64 db_handle) {
+ ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::UpdateDownloadPath,
+ path, db_handle);
+}
+
+void HistoryService::RemoveDownload(int64 db_handle) {
+ ScheduleAndForget(PRIORITY_NORMAL,
+ &HistoryBackend::RemoveDownload, db_handle);
+}
+
+void HistoryService::RemoveDownloadsBetween(Time remove_begin,
+ Time remove_end) {
+ ScheduleAndForget(PRIORITY_NORMAL,
+ &HistoryBackend::RemoveDownloadsBetween,
+ remove_begin,
+ remove_end);
+}
+
+HistoryService::Handle HistoryService::SearchDownloads(
+ const string16& search_text,
+ CancelableRequestConsumerBase* consumer,
+ DownloadSearchCallback* callback) {
+ return Schedule(PRIORITY_NORMAL, &HistoryBackend::SearchDownloads, consumer,
+ new history::DownloadSearchRequest(callback), search_text);
+}
+
+HistoryService::Handle HistoryService::QueryHistory(
+ const string16& text_query,
+ const history::QueryOptions& options,
+ CancelableRequestConsumerBase* consumer,
+ QueryHistoryCallback* callback) {
+ return Schedule(PRIORITY_UI, &HistoryBackend::QueryHistory, consumer,
+ new history::QueryHistoryRequest(callback),
+ text_query, options);
+}
+
+HistoryService::Handle HistoryService::QueryRedirectsFrom(
+ const GURL& from_url,
+ CancelableRequestConsumerBase* consumer,
+ QueryRedirectsCallback* callback) {
+ return Schedule(PRIORITY_UI, &HistoryBackend::QueryRedirectsFrom, consumer,
+ new history::QueryRedirectsRequest(callback), from_url);
+}
+
+HistoryService::Handle HistoryService::QueryRedirectsTo(
+ const GURL& to_url,
+ CancelableRequestConsumerBase* consumer,
+ QueryRedirectsCallback* callback) {
+ return Schedule(PRIORITY_NORMAL, &HistoryBackend::QueryRedirectsTo, consumer,
+ new history::QueryRedirectsRequest(callback), to_url);
+}
+
+HistoryService::Handle HistoryService::GetVisitCountToHost(
+ const GURL& url,
+ CancelableRequestConsumerBase* consumer,
+ GetVisitCountToHostCallback* callback) {
+ return Schedule(PRIORITY_UI, &HistoryBackend::GetVisitCountToHost, consumer,
+ new history::GetVisitCountToHostRequest(callback), url);
+}
+
+HistoryService::Handle HistoryService::QueryTopURLsAndRedirects(
+ int result_count,
+ CancelableRequestConsumerBase* consumer,
+ QueryTopURLsAndRedirectsCallback* callback) {
+ return Schedule(PRIORITY_NORMAL, &HistoryBackend::QueryTopURLsAndRedirects,
+ consumer, new history::QueryTopURLsAndRedirectsRequest(callback),
+ result_count);
+}
+
+HistoryService::Handle HistoryService::QueryMostVisitedURLs(
+ int result_count,
+ int days_back,
+ CancelableRequestConsumerBase* consumer,
+ QueryMostVisitedURLsCallback* callback) {
+ return Schedule(PRIORITY_NORMAL, &HistoryBackend::QueryMostVisitedURLs,
+ consumer,
+ new history::QueryMostVisitedURLsRequest(callback),
+ result_count, days_back);
+}
+
+void HistoryService::Observe(NotificationType type,
+ const NotificationSource& source,
+ const NotificationDetails& details) {
+ if (type != NotificationType::HISTORY_URLS_DELETED) {
+ NOTREACHED();
+ return;
+ }
+
+ // Update the visited link system for deleted URLs. We will update the
+ // visited link system for added URLs as soon as we get the add
+ // notification (we don't have to wait for the backend, which allows us to
+ // be faster to update the state).
+ //
+ // For deleted URLs, we don't typically know what will be deleted since
+ // delete notifications are by time. We would also like to be more
+ // respectful of privacy and never tell the user something is gone when it
+ // isn't. Therefore, we update the delete URLs after the fact.
+ if (!profile_)
+ return; // No profile, probably unit testing.
+ Details<history::URLsDeletedDetails> deleted_details(details);
+ VisitedLinkMaster* visited_links = profile_->GetVisitedLinkMaster();
+ if (!visited_links)
+ return; // Nobody to update.
+ if (deleted_details->all_history)
+ visited_links->DeleteAllURLs();
+ else // Delete individual ones.
+ visited_links->DeleteURLs(deleted_details->urls);
+}
+
+bool HistoryService::Init(const FilePath& history_dir,
+ BookmarkService* bookmark_service,
+ bool no_db) {
+ if (!thread_->Start()) {
+ Cleanup();
+ return false;
+ }
+
+ history_dir_ = history_dir;
+ bookmark_service_ = bookmark_service;
+ no_db_ = no_db;
+
+ // Create the history backend.
+ LoadBackendIfNecessary();
+ return true;
+}
+
+void HistoryService::ScheduleAutocomplete(HistoryURLProvider* provider,
+ HistoryURLProviderParams* params) {
+ ScheduleAndForget(PRIORITY_UI, &HistoryBackend::ScheduleAutocomplete,
+ scoped_refptr<HistoryURLProvider>(provider), params);
+}
+
+void HistoryService::ScheduleTask(SchedulePriority priority,
+ Task* task) {
+ // FIXME(brettw) do prioritization.
+ thread_->message_loop()->PostTask(FROM_HERE, task);
+}
+
+// static
+bool HistoryService::CanAddURL(const GURL& url) {
+ if (!url.is_valid())
+ return false;
+
+ // TODO: We should allow kChromeUIScheme URLs if they have been explicitly
+ // typed. Right now, however, these are marked as typed even when triggered
+ // by a shortcut or menu action.
+ if (url.SchemeIs(chrome::kJavaScriptScheme) ||
+ url.SchemeIs(chrome::kChromeUIScheme) ||
+ url.SchemeIs(chrome::kViewSourceScheme) ||
+ url.SchemeIs(chrome::kChromeInternalScheme))
+ return false;
+
+ if (url.SchemeIs(chrome::kAboutScheme)) {
+ if (LowerCaseEqualsASCII(url.path(), "blank"))
+ return false;
+ // We allow all other about URLs since the user may like to see things
+ // like "about:memory" or "about:histograms" in their history and
+ // autocomplete.
+ }
+
+ return true;
+}
+
+void HistoryService::SetInMemoryBackend(
+ history::InMemoryHistoryBackend* mem_backend) {
+ DCHECK(!in_memory_backend_.get()) << "Setting mem DB twice";
+ in_memory_backend_.reset(mem_backend);
+
+ // The database requires additional initialization once we own it.
+ in_memory_backend_->AttachToHistoryService(profile_);
+}
+
+void HistoryService::NotifyProfileError(int message_id) {
+ Source<HistoryService> source(this);
+ NotificationService::current()->Notify(NotificationType::PROFILE_ERROR,
+ source, Details<int>(&message_id));
+}
+
+void HistoryService::DeleteURL(const GURL& url) {
+ // We will update the visited links when we observe the delete notifications.
+ ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::DeleteURL, url);
+}
+
+void HistoryService::ExpireHistoryBetween(
+ const std::set<GURL>& restrict_urls,
+ Time begin_time, Time end_time,
+ CancelableRequestConsumerBase* consumer,
+ ExpireHistoryCallback* callback) {
+
+ // We will update the visited links when we observe the delete notifications.
+ Schedule(PRIORITY_UI, &HistoryBackend::ExpireHistoryBetween, consumer,
+ new history::ExpireHistoryRequest(callback),
+ restrict_urls, begin_time, end_time);
+}
+
+void HistoryService::BroadcastNotifications(
+ NotificationType type,
+ history::HistoryDetails* details_deleted) {
+ // We take ownership of the passed-in pointer and delete it. It was made for
+ // us on another thread, so the caller doesn't know when we will handle it.
+ scoped_ptr<history::HistoryDetails> details(details_deleted);
+ // TODO(evanm): this is currently necessitated by generate_profile, which
+ // runs without a browser process. generate_profile should really create
+ // a browser process, at which point this check can then be nuked.
+ if (!g_browser_process)
+ return;
+
+ // The source of all of our notifications is the profile. Note that this
+ // pointer is NULL in unit tests.
+ Source<Profile> source(profile_);
+
+ // The details object just contains the pointer to the object that the
+ // backend has allocated for us. The receiver of the notification will cast
+ // this to the proper type.
+ Details<history::HistoryDetails> det(details_deleted);
+
+ NotificationService::current()->Notify(type, source, det);
+}
+
+void HistoryService::LoadBackendIfNecessary() {
+ if (!thread_ || history_backend_)
+ return; // Failed to init, or already started loading.
+
+ scoped_refptr<HistoryBackend> backend(
+ new HistoryBackend(history_dir_,
+ new BackendDelegate(this),
+ bookmark_service_));
+ history_backend_.swap(backend);
+
+ ScheduleAndForget(PRIORITY_UI, &HistoryBackend::Init, no_db_);
+}
+
+void HistoryService::OnDBLoaded() {
+ LOG(INFO) << "History backend finished loading";
+ backend_loaded_ = true;
+ NotificationService::current()->Notify(NotificationType::HISTORY_LOADED,
+ Source<Profile>(profile_),
+ Details<HistoryService>(this));
+}
+
+void HistoryService::StartTopSitesMigration() {
+ history::TopSites* ts = profile_->GetTopSites();
+ ts->StartMigration();
+}
+
+void HistoryService::OnTopSitesReady() {
+ ScheduleAndForget(PRIORITY_NORMAL,
+ &HistoryBackend::MigrateThumbnailsDatabase);
+}
diff --git a/chrome/browser/history/history.h b/chrome/browser/history/history.h
new file mode 100644
index 0000000..9548f65
--- /dev/null
+++ b/chrome/browser/history/history.h
@@ -0,0 +1,852 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_HISTORY_H_
+#define CHROME_BROWSER_HISTORY_HISTORY_H_
+
+#include <string>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/callback.h"
+#include "base/file_path.h"
+#include "base/ref_counted.h"
+#include "base/scoped_ptr.h"
+#include "base/string16.h"
+#include "base/task.h"
+#include "chrome/browser/cancelable_request.h"
+#include "chrome/browser/favicon_service.h"
+#include "chrome/browser/history/history_types.h"
+#include "chrome/browser/search_engines/template_url.h"
+#include "chrome/common/notification_registrar.h"
+#include "chrome/common/page_transition_types.h"
+#include "chrome/common/ref_counted_util.h"
+
+class BookmarkService;
+struct DownloadCreateInfo;
+class FilePath;
+class GURL;
+class HistoryURLProvider;
+struct HistoryURLProviderParams;
+class InMemoryURLDatabase;
+class MainPagesRequest;
+class PageUsageData;
+class PageUsageRequest;
+class Profile;
+class SkBitmap;
+struct ThumbnailScore;
+
+namespace base {
+class Thread;
+class Time;
+}
+
+namespace browser_sync {
+class HistoryModelWorker;
+class TypedUrlDataTypeController;
+}
+
+namespace history {
+
+class InMemoryHistoryBackend;
+class HistoryBackend;
+class HistoryDatabase;
+struct HistoryDetails;
+class HistoryQueryTest;
+class URLDatabase;
+
+} // namespace history
+
+
+// HistoryDBTask can be used to process arbitrary work on the history backend
+// thread. HistoryDBTask is scheduled using HistoryService::ScheduleDBTask.
+// When HistoryBackend processes the task it invokes RunOnDBThread. Once the
+// task completes and has not been canceled, DoneRunOnMainThread is invoked back
+// on the main thread.
+class HistoryDBTask : public base::RefCountedThreadSafe<HistoryDBTask> {
+ public:
+ // Invoked on the database thread. The return value indicates whether the
+ // task is done. A return value of true signals the task is done and
+ // RunOnDBThread should NOT be invoked again. A return value of false
+ // indicates the task is not done, and should be run again after other
+ // tasks are given a chance to be processed.
+ virtual bool RunOnDBThread(history::HistoryBackend* backend,
+ history::HistoryDatabase* db) = 0;
+
+ // Invoked on the main thread once RunOnDBThread has returned false. This is
+ // only invoked if the request was not canceled and returned true from
+ // RunOnDBThread.
+ virtual void DoneRunOnMainThread() = 0;
+
+ protected:
+ friend class base::RefCountedThreadSafe<HistoryDBTask>;
+
+ virtual ~HistoryDBTask() {}
+};
+
+// The history service records page titles, and visit times, as well as
+// (eventually) information about autocomplete.
+//
+// This service is thread safe. Each request callback is invoked in the
+// thread that made the request.
+class HistoryService : public CancelableRequestProvider,
+ public NotificationObserver,
+ public base::RefCountedThreadSafe<HistoryService> {
+ public:
+ // Miscellaneous commonly-used types.
+ typedef std::vector<PageUsageData*> PageUsageDataList;
+
+ // ID (both star_id and group_id) of the bookmark bar.
+ // This entry always exists.
+ static const history::StarID kBookmarkBarID;
+
+ // Must call Init after construction.
+ explicit HistoryService(Profile* profile);
+ // The empty constructor is provided only for testing.
+ HistoryService();
+
+ // Initializes the history service, returning true on success. On false, do
+ // not call any other functions. The given directory will be used for storing
+ // the history files. The BookmarkService is used when deleting URLs to
+ // test if a URL is bookmarked; it may be NULL during testing.
+ bool Init(const FilePath& history_dir, BookmarkService* bookmark_service) {
+ return Init(history_dir, bookmark_service, false);
+ }
+
+ // Triggers the backend to load if it hasn't already, and then returns whether
+ // it's finished loading.
+ bool BackendLoaded();
+
+ // Unloads the backend without actually shutting down the history service.
+ // This can be used to temporarily reduce the browser process' memory
+ // footprint.
+ void UnloadBackend();
+
+ // Called on shutdown, this will tell the history backend to complete and
+ // will release pointers to it. No other functions should be called once
+ // cleanup has happened that may dispatch to the history thread (because it
+ // will be NULL).
+ //
+ // In practice, this will be called by the service manager (BrowserProcess)
+ // when it is being destroyed. Because that reference is being destroyed, it
+ // should be impossible for anybody else to call the service, even if it is
+ // still in memory (pending requests may be holding a reference to us).
+ void Cleanup();
+
+ // RenderProcessHost pointers are used to scope page IDs (see AddPage). These
+ // objects must tell us when they are being destroyed so that we can clear
+ // out any cached data associated with that scope.
+ //
+ // The given pointer will not be dereferenced, it is only used for
+ // identification purposes, hence it is a void*.
+ void NotifyRenderProcessHostDestruction(const void* host);
+
+ // Triggers the backend to load if it hasn't already, and then returns the
+ // in-memory URL database. The returned pointer MAY BE NULL if the in-memory
+ // database has not been loaded yet. This pointer is owned by the history
+ // system. Callers should not store or cache this value.
+ //
+ // TODO(brettw) this should return the InMemoryHistoryBackend.
+ history::URLDatabase* InMemoryDatabase();
+
+ // Navigation ----------------------------------------------------------------
+
+ // Adds the given canonical URL to history with the current time as the visit
+ // time. Referrer may be the empty string.
+ //
+ // The supplied render process host is used to scope the given page ID. Page
+ // IDs are only unique inside a given render process, so we need that to
+ // differentiate them. This pointer should not be dereferenced by the history
+ // system. Since render view host pointers may be reused (if one gets deleted
+ // and a new one created at the same address), TabContents should notify
+ // us when they are being destroyed through NotifyTabContentsDestruction.
+ //
+ // The scope/ids can be NULL if there is no meaningful tracking information
+ // that can be performed on the given URL. The 'page_id' should be the ID of
+ // the current session history entry in the given process.
+ //
+ // 'redirects' is an array of redirect URLs leading to this page, with the
+ // page itself as the last item (so when there is no redirect, it will have
+ // one entry). If there are no redirects, this array may also be empty for
+ // the convenience of callers.
+ //
+ // 'did_replace_entry' is true when the navigation entry for this page has
+ // replaced the existing entry. A non-user initiated redirect causes such
+ // replacement.
+ //
+ // All "Add Page" functions will update the visited link database.
+ void AddPage(const GURL& url,
+ const void* id_scope,
+ int32 page_id,
+ const GURL& referrer,
+ PageTransition::Type transition,
+ const history::RedirectList& redirects,
+ bool did_replace_entry);
+
+ // For adding pages to history with a specific time. This is for testing
+ // purposes. Call the previous one to use the current time.
+ void AddPage(const GURL& url,
+ base::Time time,
+ const void* id_scope,
+ int32 page_id,
+ const GURL& referrer,
+ PageTransition::Type transition,
+ const history::RedirectList& redirects,
+ bool did_replace_entry);
+
+ // For adding pages to history where no tracking information can be done.
+ void AddPage(const GURL& url) {
+ AddPage(url, NULL, 0, GURL(), PageTransition::LINK, history::RedirectList(),
+ false);
+ }
+
+ // Sets the title for the given page. The page should be in history. If it
+ // is not, this operation is ignored. This call will not update the full
+ // text index. The last title set when the page is indexed will be the
+ // title in the full text index.
+ void SetPageTitle(const GURL& url, const string16& title);
+
+ // Indexing ------------------------------------------------------------------
+
+ // Notifies history of the body text of the given recently-visited URL.
+ // If the URL was not visited "recently enough," the history system may
+ // discard it.
+ void SetPageContents(const GURL& url, const string16& contents);
+
+ // Querying ------------------------------------------------------------------
+
+ // Callback class that a client can implement to iterate over URLs. The
+ // callbacks WILL BE CALLED ON THE BACKGROUND THREAD! Your implementation
+ // should handle this appropriately.
+ class URLEnumerator {
+ public:
+ virtual ~URLEnumerator() {}
+
+ // Indicates that a URL is available. There will be exactly one call for
+ // every URL in history.
+ virtual void OnURL(const GURL& url) = 0;
+
+ // Indicates we are done iterating over URLs. Once called, there will be no
+ // more callbacks made. This call is guaranteed to occur, even if there are
+ // no URLs. If all URLs were iterated, success will be true.
+ virtual void OnComplete(bool success) = 0;
+ };
+
+ // Enumerate all URLs in history. The given iterator will be owned by the
+ // caller, so the caller should ensure it exists until OnComplete is called.
+ // You should not generally use this since it will be slow to slurp all URLs
+ // in from the database. It is designed for rebuilding the visited link
+ // database from history.
+ void IterateURLs(URLEnumerator* iterator);
+
+ // Returns the information about the requested URL. If the URL is found,
+ // success will be true and the information will be in the URLRow parameter.
+ // On success, the visits, if requested, will be sorted by date. If they have
+ // not been requested, the pointer will be valid, but the vector will be
+ // empty.
+ //
+ // If success is false, neither the row nor the vector will be valid.
+ typedef Callback4<Handle,
+ bool, // Success flag, when false, nothing else is valid.
+ const history::URLRow*,
+ history::VisitVector*>::Type
+ QueryURLCallback;
+
+ // Queries the basic information about the URL in the history database. If
+ // the caller is interested in the visits (each time the URL is visited),
+ // set |want_visits| to true. If these are not needed, the function will be
+ // faster by setting this to false.
+ Handle QueryURL(const GURL& url,
+ bool want_visits,
+ CancelableRequestConsumerBase* consumer,
+ QueryURLCallback* callback);
+
+ // Provides the result of a query. See QueryResults in history_types.h.
+ // The common use will be to use QueryResults.Swap to suck the contents of
+ // the results out of the passed in parameter and take ownership of them.
+ typedef Callback2<Handle, history::QueryResults*>::Type
+ QueryHistoryCallback;
+
+ // Queries all history with the given options (see QueryOptions in
+ // history_types.h). If non-empty, the full-text database will be queried with
+ // the given |text_query|. If empty, all results matching the given options
+ // will be returned.
+ //
+ // This isn't totally hooked up yet, this will query the "new" full text
+ // database (see SetPageContents) which won't generally be set yet.
+ Handle QueryHistory(const string16& text_query,
+ const history::QueryOptions& options,
+ CancelableRequestConsumerBase* consumer,
+ QueryHistoryCallback* callback);
+
+ // Called when the results of QueryRedirectsFrom are available.
+ // The given vector will contain a list of all redirects, not counting
+ // the original page. If A redirects to B, the vector will contain only B,
+ // and A will be in 'source_url'.
+ //
+ // If there is no such URL in the database or the most recent visit has no
+ // redirect, the vector will be empty. If the history system failed for
+ // some reason, success will additionally be false. If the given page
+ // has redirected to multiple destinations, this will pick a random one.
+ typedef Callback4<Handle,
+ GURL, // from_url
+ bool, // success
+ history::RedirectList*>::Type
+ QueryRedirectsCallback;
+
+ // Schedules a query for the most recent redirect coming out of the given
+ // URL. See the RedirectQuerySource above, which is guaranteed to be called
+ // if the request is not canceled.
+ Handle QueryRedirectsFrom(const GURL& from_url,
+ CancelableRequestConsumerBase* consumer,
+ QueryRedirectsCallback* callback);
+
+ // Schedules a query to get the most recent redirects ending at the given
+ // URL.
+ Handle QueryRedirectsTo(const GURL& to_url,
+ CancelableRequestConsumerBase* consumer,
+ QueryRedirectsCallback* callback);
+
+ typedef Callback4<Handle,
+ bool, // Were we able to determine the # of visits?
+ int, // Number of visits.
+ base::Time>::Type // Time of first visit. Only first bool is
+ // true and int is > 0.
+ GetVisitCountToHostCallback;
+
+ // Requests the number of visits to all urls on the scheme/host/post
+ // identified by url. This is only valid for http and https urls.
+ Handle GetVisitCountToHost(const GURL& url,
+ CancelableRequestConsumerBase* consumer,
+ GetVisitCountToHostCallback* callback);
+
+ // Called when QueryTopURLsAndRedirects completes. The vector contains a list
+ // of the top |result_count| URLs. For each of these URLs, there is an entry
+ // in the map containing redirects from the URL. For example, if we have the
+ // redirect chain A -> B -> C and A is a top visited URL, then A will be in
+ // the vector and "A => {B -> C}" will be in the map.
+ typedef Callback4<Handle,
+ bool, // Did we get the top urls and redirects?
+ std::vector<GURL>*, // List of top URLs.
+ history::RedirectMap*>::Type // Redirects for top URLs.
+ QueryTopURLsAndRedirectsCallback;
+
+ // Request the top |result_count| most visited URLs and the chain of redirects
+ // leading to each of these URLs.
+ // TODO(Nik): remove this. Use QueryMostVisitedURLs instead.
+ Handle QueryTopURLsAndRedirects(int result_count,
+ CancelableRequestConsumerBase* consumer,
+ QueryTopURLsAndRedirectsCallback* callback);
+
+ typedef Callback2<Handle, history::MostVisitedURLList>::Type
+ QueryMostVisitedURLsCallback;
+
+ // Request the |result_count| most visited URLs and the chain of
+ // redirects leading to each of these URLs. |days_back| is the
+ // number of days of history to use. Used by TopSites.
+ Handle QueryMostVisitedURLs(int result_count, int days_back,
+ CancelableRequestConsumerBase* consumer,
+ QueryMostVisitedURLsCallback* callback);
+
+ // Thumbnails ----------------------------------------------------------------
+
+ // Implemented by consumers to get thumbnail data. Called when a request for
+ // the thumbnail data is complete. Once this callback is made, the request
+ // will be completed and no other calls will be made for that handle.
+ //
+ // This function will be called even on error conditions or if there is no
+ // thumbnail for that page. In these cases, the data pointer will be NULL.
+ typedef Callback2<Handle, scoped_refptr<RefCountedBytes> >::Type
+ ThumbnailDataCallback;
+
+ // Sets the thumbnail for a given URL. The URL must be in the history
+ // database or the request will be ignored.
+ void SetPageThumbnail(const GURL& url,
+ const SkBitmap& thumbnail,
+ const ThumbnailScore& score);
+
+ // Requests a page thumbnail. See ThumbnailDataCallback definition above.
+ Handle GetPageThumbnail(const GURL& page_url,
+ CancelableRequestConsumerBase* consumer,
+ ThumbnailDataCallback* callback);
+
+ // Database management operations --------------------------------------------
+
+ // Delete all the information related to a single url.
+ void DeleteURL(const GURL& url);
+
+ // Implemented by the caller of ExpireHistoryBetween, and
+ // is called when the history service has deleted the history.
+ typedef Callback0::Type ExpireHistoryCallback;
+
+ // Removes all visits in the selected time range (including the start time),
+ // updating the URLs accordingly. This deletes the associated data, including
+ // the full text index. This function also deletes the associated favicons,
+ // if they are no longer referenced. |callback| runs when the expiration is
+ // complete. You may use null Time values to do an unbounded delete in
+ // either direction.
+ // If |restrict_urls| is not empty, only visits to the URLs in this set are
+ // removed.
+ void ExpireHistoryBetween(const std::set<GURL>& restrict_urls,
+ base::Time begin_time, base::Time end_time,
+ CancelableRequestConsumerBase* consumer,
+ ExpireHistoryCallback* callback);
+
+ // Downloads -----------------------------------------------------------------
+
+ // Implemented by the caller of 'CreateDownload' below, and is called when the
+ // history service has created a new entry for a download in the history db.
+ typedef Callback2<DownloadCreateInfo, int64>::Type DownloadCreateCallback;
+
+ // Begins a history request to create a new persistent entry for a download.
+ // 'info' contains all the download's creation state, and 'callback' runs
+ // when the history service request is complete.
+ Handle CreateDownload(const DownloadCreateInfo& info,
+ CancelableRequestConsumerBase* consumer,
+ DownloadCreateCallback* callback);
+
+ // Implemented by the caller of 'QueryDownloads' below, and is called when the
+ // history service has retrieved a list of all download state. The call
+ typedef Callback1<std::vector<DownloadCreateInfo>*>::Type
+ DownloadQueryCallback;
+
+ // Begins a history request to retrieve the state of all downloads in the
+ // history db. 'callback' runs when the history service request is complete,
+ // at which point 'info' contains an array of DownloadCreateInfo, one per
+ // download.
+ Handle QueryDownloads(CancelableRequestConsumerBase* consumer,
+ DownloadQueryCallback* callback);
+
+ // Begins a request to clean up entries that has been corrupted (because of
+ // the crash, for example).
+ void CleanUpInProgressEntries();
+
+ // Called to update the history service about the current state of a download.
+ // This is a 'fire and forget' query, so just pass the relevant state info to
+ // the database with no need for a callback.
+ void UpdateDownload(int64 received_bytes, int32 state, int64 db_handle);
+
+ // Called to update the history service about the path of a download.
+ // This is a 'fire and forget' query.
+ void UpdateDownloadPath(const FilePath& path, int64 db_handle);
+
+ // Permanently remove a download from the history system. This is a 'fire and
+ // forget' operation.
+ void RemoveDownload(int64 db_handle);
+
+ // Permanently removes all completed download from the history system within
+ // the specified range. This function does not delete downloads that are in
+ // progress or in the process of being cancelled. This is a 'fire and forget'
+ // operation. You can pass is_null times to get unbounded time in either or
+ // both directions.
+ void RemoveDownloadsBetween(base::Time remove_begin, base::Time remove_end);
+
+ // Implemented by the caller of 'SearchDownloads' below, and is called when
+ // the history system has retrieved the search results.
+ typedef Callback2<Handle, std::vector<int64>*>::Type DownloadSearchCallback;
+
+ // Search for downloads that match the search text.
+ Handle SearchDownloads(const string16& search_text,
+ CancelableRequestConsumerBase* consumer,
+ DownloadSearchCallback* callback);
+
+ // Visit Segments ------------------------------------------------------------
+
+ typedef Callback2<Handle, std::vector<PageUsageData*>*>::Type
+ SegmentQueryCallback;
+
+ // Query usage data for all visit segments since the provided time.
+ //
+ // The request is performed asynchronously and can be cancelled by using the
+ // returned handle.
+ //
+ // The vector provided to the callback and its contents is owned by the
+ // history system. It will be deeply deleted after the callback is invoked.
+ // If you want to preserve any PageUsageData instance, simply remove them
+ // from the vector.
+ //
+ // The vector contains a list of PageUsageData. Each PageUsageData ID is set
+ // to the segment ID. The URL and all the other information is set to the page
+ // representing the segment.
+ Handle QuerySegmentUsageSince(CancelableRequestConsumerBase* consumer,
+ const base::Time from_time,
+ int max_result_count,
+ SegmentQueryCallback* callback);
+
+ // Set the presentation index for the segment identified by |segment_id|.
+ void SetSegmentPresentationIndex(int64 segment_id, int index);
+
+ // Keyword search terms -----------------------------------------------------
+
+ // Sets the search terms for the specified url and keyword. url_id gives the
+ // id of the url, keyword_id the id of the keyword and term the search term.
+ void SetKeywordSearchTermsForURL(const GURL& url,
+ TemplateURL::IDType keyword_id,
+ const string16& term);
+
+ // Deletes all search terms for the specified keyword.
+ void DeleteAllSearchTermsForKeyword(TemplateURL::IDType keyword_id);
+
+ typedef Callback2<Handle, std::vector<history::KeywordSearchTermVisit>*>::Type
+ GetMostRecentKeywordSearchTermsCallback;
+
+ // Returns up to max_count of the most recent search terms starting with the
+ // specified text. The matching is case insensitive. The results are ordered
+ // in descending order up to |max_count| with the most recent search term
+ // first.
+ Handle GetMostRecentKeywordSearchTerms(
+ TemplateURL::IDType keyword_id,
+ const string16& prefix,
+ int max_count,
+ CancelableRequestConsumerBase* consumer,
+ GetMostRecentKeywordSearchTermsCallback* callback);
+
+ // Bookmarks -----------------------------------------------------------------
+
+ // Notification that a URL is no longer bookmarked.
+ void URLsNoLongerBookmarked(const std::set<GURL>& urls);
+
+ // Generic Stuff -------------------------------------------------------------
+
+ typedef Callback0::Type HistoryDBTaskCallback;
+
+ // Schedules a HistoryDBTask for running on the history backend thread. See
+ // HistoryDBTask for details on what this does.
+ virtual Handle ScheduleDBTask(HistoryDBTask* task,
+ CancelableRequestConsumerBase* consumer);
+
+ // Testing -------------------------------------------------------------------
+
+ // Designed for unit tests, this passes the given task on to the history
+ // backend to be called once the history backend has terminated. This allows
+ // callers to know when the history thread is complete and the database files
+ // can be deleted and the next test run. Otherwise, the history thread may
+ // still be running, causing problems in subsequent tests.
+ //
+ // There can be only one closing task, so this will override any previously
+ // set task. We will take ownership of the pointer and delete it when done.
+ // The task will be run on the calling thread (this function is threadsafe).
+ void SetOnBackendDestroyTask(Task* task);
+
+ // Used for unit testing and potentially importing to get known information
+ // into the database. This assumes the URL doesn't exist in the database
+ //
+ // Calling this function many times may be slow because each call will
+ // dispatch to the history thread and will be a separate database
+ // transaction. If this functionality is needed for importing many URLs, a
+ // version that takes an array should probably be added.
+ void AddPageWithDetails(const GURL& url,
+ const string16& title,
+ int visit_count,
+ int typed_count,
+ base::Time last_visit,
+ bool hidden);
+
+ // The same as AddPageWithDetails() but takes a vector.
+ void AddPagesWithDetails(const std::vector<history::URLRow>& info);
+
+ // Starts the TopSites migration in the HistoryThread. Called by the
+ // BackendDelegate.
+ void StartTopSitesMigration();
+
+ // Called by TopSites after the thumbnails were read and it is safe
+ // to delete the thumbnails DB.
+ void OnTopSitesReady();
+
+ // Returns true if this looks like the type of URL we want to add to the
+ // history. We filter out some URLs such as JavaScript.
+ static bool CanAddURL(const GURL& url);
+
+ protected:
+ ~HistoryService();
+
+ // These are not currently used, hopefully we can do something in the future
+ // to ensure that the most important things happen first.
+ enum SchedulePriority {
+ PRIORITY_UI, // The highest priority (must respond to UI events).
+ PRIORITY_NORMAL, // Normal stuff like adding a page.
+ PRIORITY_LOW, // Low priority things like indexing or expiration.
+ };
+
+ private:
+ class BackendDelegate;
+ friend class base::RefCountedThreadSafe<HistoryService>;
+ friend class BackendDelegate;
+ friend class FaviconService;
+ friend class history::HistoryBackend;
+ friend class history::HistoryQueryTest;
+ friend class HistoryOperation;
+ friend class HistoryURLProvider;
+ friend class HistoryURLProviderTest;
+ template<typename Info, typename Callback> friend class DownloadRequest;
+ friend class PageUsageRequest;
+ friend class RedirectRequest;
+ friend class FavIconRequest;
+ friend class TestingProfile;
+
+ // Implementation of NotificationObserver.
+ virtual void Observe(NotificationType type,
+ const NotificationSource& source,
+ const NotificationDetails& details);
+
+ // Low-level Init(). Same as the public version, but adds a |no_db| parameter
+ // that is only set by unittests which causes the backend to not init its DB.
+ bool Init(const FilePath& history_dir,
+ BookmarkService* bookmark_service,
+ bool no_db);
+
+ // Called by the HistoryURLProvider class to schedule an autocomplete, it
+ // will be called back on the internal history thread with the history
+ // database so it can query. See history_autocomplete.cc for a diagram.
+ void ScheduleAutocomplete(HistoryURLProvider* provider,
+ HistoryURLProviderParams* params);
+
+ // Broadcasts the given notification. This is called by the backend so that
+ // the notification will be broadcast on the main thread.
+ //
+ // The |details_deleted| pointer will be sent as the "details" for the
+ // notification. The function takes ownership of the pointer and deletes it
+ // when the notification is sent (it is coming from another thread, so must
+ // be allocated on the heap).
+ void BroadcastNotifications(NotificationType type,
+ history::HistoryDetails* details_deleted);
+
+ // Initializes the backend.
+ void LoadBackendIfNecessary();
+
+ // Notification from the backend that it has finished loading. Sends
+ // notification (NOTIFY_HISTORY_LOADED) and sets backend_loaded_ to true.
+ void OnDBLoaded();
+
+ // FavIcon -------------------------------------------------------------------
+
+ // These favicon methods are exposed to the FaviconService. Instead of calling
+ // these methods directly you should call the respective method on the
+ // FaviconService.
+
+ // Used by the FaviconService to get a favicon from the history backend.
+ void GetFavicon(FaviconService::GetFaviconRequest* request,
+ const GURL& icon_url);
+
+ // Used by the FaviconService to update the favicon mappings on the history
+ // backend.
+ void UpdateFaviconMappingAndFetch(FaviconService::GetFaviconRequest* request,
+ const GURL& page_url,
+ const GURL& icon_url);
+
+ // Used by the FaviconService to get a favicon from the history backend.
+ void GetFaviconForURL(FaviconService::GetFaviconRequest* request,
+ const GURL& page_url);
+
+ // Used by the FaviconService to mark the favicon for the page as being out
+ // of date.
+ void SetFaviconOutOfDateForPage(const GURL& page_url);
+
+ // Used by the FaviconService for importing many favicons for many pages at
+ // once. The pages must exist, any favicon sets for unknown pages will be
+ // discarded. Existing favicons will not be overwritten.
+ void SetImportedFavicons(
+ const std::vector<history::ImportedFavIconUsage>& favicon_usage);
+
+ // Used by the FaviconService to set the favicon for a page on the history
+ // backend.
+ void SetFavicon(const GURL& page_url,
+ const GURL& icon_url,
+ const std::vector<unsigned char>& image_data);
+
+
+ // Sets the in-memory URL database. This is called by the backend once the
+ // database is loaded to make it available.
+ void SetInMemoryBackend(history::InMemoryHistoryBackend* mem_backend);
+
+ // Called by our BackendDelegate when there is a problem reading the database.
+ // |message_id| is the relevant message in the string table to display.
+ void NotifyProfileError(int message_id);
+
+ // Call to schedule a given task for running on the history thread with the
+ // specified priority. The task will have ownership taken.
+ void ScheduleTask(SchedulePriority priority, Task* task);
+
+ // Schedule ------------------------------------------------------------------
+ //
+ // Functions for scheduling operations on the history thread that have a
+ // handle and may be cancelable. For fire-and-forget operations, see
+ // ScheduleAndForget below.
+
+ template<typename BackendFunc, class RequestType>
+ Handle Schedule(SchedulePriority priority,
+ BackendFunc func, // Function to call on the HistoryBackend.
+ CancelableRequestConsumerBase* consumer,
+ RequestType* request) {
+ DCHECK(thread_) << "History service being called after cleanup";
+ LoadBackendIfNecessary();
+ if (consumer)
+ AddRequest(request, consumer);
+ ScheduleTask(priority,
+ NewRunnableMethod(history_backend_.get(), func,
+ scoped_refptr<RequestType>(request)));
+ return request->handle();
+ }
+
+ template<typename BackendFunc, class RequestType, typename ArgA>
+ Handle Schedule(SchedulePriority priority,
+ BackendFunc func, // Function to call on the HistoryBackend.
+ CancelableRequestConsumerBase* consumer,
+ RequestType* request,
+ const ArgA& a) {
+ DCHECK(thread_) << "History service being called after cleanup";
+ LoadBackendIfNecessary();
+ if (consumer)
+ AddRequest(request, consumer);
+ ScheduleTask(priority,
+ NewRunnableMethod(history_backend_.get(), func,
+ scoped_refptr<RequestType>(request),
+ a));
+ return request->handle();
+ }
+
+ template<typename BackendFunc,
+ class RequestType, // Descendant of CancelableRequstBase.
+ typename ArgA,
+ typename ArgB>
+ Handle Schedule(SchedulePriority priority,
+ BackendFunc func, // Function to call on the HistoryBackend.
+ CancelableRequestConsumerBase* consumer,
+ RequestType* request,
+ const ArgA& a,
+ const ArgB& b) {
+ DCHECK(thread_) << "History service being called after cleanup";
+ LoadBackendIfNecessary();
+ if (consumer)
+ AddRequest(request, consumer);
+ ScheduleTask(priority,
+ NewRunnableMethod(history_backend_.get(), func,
+ scoped_refptr<RequestType>(request),
+ a, b));
+ return request->handle();
+ }
+
+ template<typename BackendFunc,
+ class RequestType, // Descendant of CancelableRequstBase.
+ typename ArgA,
+ typename ArgB,
+ typename ArgC>
+ Handle Schedule(SchedulePriority priority,
+ BackendFunc func, // Function to call on the HistoryBackend.
+ CancelableRequestConsumerBase* consumer,
+ RequestType* request,
+ const ArgA& a,
+ const ArgB& b,
+ const ArgC& c) {
+ DCHECK(thread_) << "History service being called after cleanup";
+ LoadBackendIfNecessary();
+ if (consumer)
+ AddRequest(request, consumer);
+ ScheduleTask(priority,
+ NewRunnableMethod(history_backend_.get(), func,
+ scoped_refptr<RequestType>(request),
+ a, b, c));
+ return request->handle();
+ }
+
+ // ScheduleAndForget ---------------------------------------------------------
+ //
+ // Functions for scheduling operations on the history thread that do not need
+ // any callbacks and are not cancelable.
+
+ template<typename BackendFunc>
+ void ScheduleAndForget(SchedulePriority priority,
+ BackendFunc func) { // Function to call on backend.
+ DCHECK(thread_) << "History service being called after cleanup";
+ LoadBackendIfNecessary();
+ ScheduleTask(priority, NewRunnableMethod(history_backend_.get(), func));
+ }
+
+ template<typename BackendFunc, typename ArgA>
+ void ScheduleAndForget(SchedulePriority priority,
+ BackendFunc func, // Function to call on backend.
+ const ArgA& a) {
+ DCHECK(thread_) << "History service being called after cleanup";
+ LoadBackendIfNecessary();
+ ScheduleTask(priority, NewRunnableMethod(history_backend_.get(), func, a));
+ }
+
+ template<typename BackendFunc, typename ArgA, typename ArgB>
+ void ScheduleAndForget(SchedulePriority priority,
+ BackendFunc func, // Function to call on backend.
+ const ArgA& a,
+ const ArgB& b) {
+ DCHECK(thread_) << "History service being called after cleanup";
+ LoadBackendIfNecessary();
+ ScheduleTask(priority, NewRunnableMethod(history_backend_.get(), func,
+ a, b));
+ }
+
+ template<typename BackendFunc, typename ArgA, typename ArgB, typename ArgC>
+ void ScheduleAndForget(SchedulePriority priority,
+ BackendFunc func, // Function to call on backend.
+ const ArgA& a,
+ const ArgB& b,
+ const ArgC& c) {
+ DCHECK(thread_) << "History service being called after cleanup";
+ LoadBackendIfNecessary();
+ ScheduleTask(priority, NewRunnableMethod(history_backend_.get(), func,
+ a, b, c));
+ }
+
+ template<typename BackendFunc,
+ typename ArgA,
+ typename ArgB,
+ typename ArgC,
+ typename ArgD>
+ void ScheduleAndForget(SchedulePriority priority,
+ BackendFunc func, // Function to call on backend.
+ const ArgA& a,
+ const ArgB& b,
+ const ArgC& c,
+ const ArgD& d) {
+ DCHECK(thread_) << "History service being called after cleanup";
+ LoadBackendIfNecessary();
+ ScheduleTask(priority, NewRunnableMethod(history_backend_.get(), func,
+ a, b, c, d));
+ }
+
+ NotificationRegistrar registrar_;
+
+ // Some void primitives require some internal processing in the main thread
+ // when done. We use this internal consumer for this purpose.
+ CancelableRequestConsumer internal_consumer_;
+
+ // The thread used by the history service to run complicated operations
+ base::Thread* thread_;
+
+ // This class has most of the implementation and runs on the 'thread_'.
+ // You MUST communicate with this class ONLY through the thread_'s
+ // message_loop().
+ //
+ // This pointer will be NULL once Cleanup() has been called, meaning no
+ // more calls should be made to the history thread.
+ scoped_refptr<history::HistoryBackend> history_backend_;
+
+ // A cache of the user-typed URLs kept in memory that is used by the
+ // autocomplete system. This will be NULL until the database has been created
+ // on the background thread.
+ scoped_ptr<history::InMemoryHistoryBackend> in_memory_backend_;
+
+ // The profile, may be null when testing.
+ Profile* profile_;
+
+ // Has the backend finished loading? The backend is loaded once Init has
+ // completed.
+ bool backend_loaded_;
+
+ // Cached values from Init(), used whenever we need to reload the backend.
+ FilePath history_dir_;
+ BookmarkService* bookmark_service_;
+ bool no_db_;
+
+ DISALLOW_COPY_AND_ASSIGN(HistoryService);
+};
+
+#endif // CHROME_BROWSER_HISTORY_HISTORY_H_
diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc
new file mode 100644
index 0000000..0f512db
--- /dev/null
+++ b/chrome/browser/history/history_backend.cc
@@ -0,0 +1,2164 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/history_backend.h"
+
+#include <set>
+
+#include "base/command_line.h"
+#include "base/compiler_specific.h"
+#include "base/file_util.h"
+#include "base/histogram.h"
+#include "base/message_loop.h"
+#include "base/scoped_ptr.h"
+#include "base/scoped_vector.h"
+#include "base/string_util.h"
+#include "base/time.h"
+#include "chrome/browser/autocomplete/history_url_provider.h"
+#include "chrome/browser/bookmarks/bookmark_service.h"
+#include "chrome/browser/history/download_types.h"
+#include "chrome/browser/history/history_notifications.h"
+#include "chrome/browser/history/history_publisher.h"
+#include "chrome/browser/history/in_memory_history_backend.h"
+#include "chrome/browser/history/page_usage_data.h"
+#include "chrome/common/chrome_constants.h"
+#include "chrome/common/chrome_switches.h"
+#include "chrome/common/notification_type.h"
+#include "chrome/common/url_constants.h"
+#include "googleurl/src/gurl.h"
+#include "grit/chromium_strings.h"
+#include "grit/generated_resources.h"
+#include "net/base/registry_controlled_domain.h"
+
+using base::Time;
+using base::TimeDelta;
+using base::TimeTicks;
+
+/* The HistoryBackend consists of a number of components:
+
+ HistoryDatabase (stores past 3 months of history)
+ URLDatabase (stores a list of URLs)
+ DownloadDatabase (stores a list of downloads)
+ VisitDatabase (stores a list of visits for the URLs)
+ VisitSegmentDatabase (stores groups of URLs for the most visited view).
+
+ ArchivedDatabase (stores history older than 3 months)
+ URLDatabase (stores a list of URLs)
+ DownloadDatabase (stores a list of downloads)
+ VisitDatabase (stores a list of visits for the URLs)
+
+ (this does not store visit segments as they expire after 3 mos.)
+
+ TextDatabaseManager (manages multiple text database for different times)
+ TextDatabase (represents a single month of full-text index).
+ ...more TextDatabase objects...
+
+ ExpireHistoryBackend (manages moving things from HistoryDatabase to
+ the ArchivedDatabase and deleting)
+*/
+
+namespace history {
+
+// How long we keep segment data for in days. Currently 3 months.
+// This value needs to be greater or equal to
+// MostVisitedModel::kMostVisitedScope but we don't want to introduce a direct
+// dependency between MostVisitedModel and the history backend.
+static const int kSegmentDataRetention = 90;
+
+// The number of milliseconds we'll wait to do a commit, so that things are
+// batched together.
+static const int kCommitIntervalMs = 10000;
+
+// The amount of time before we re-fetch the favicon.
+static const int kFavIconRefetchDays = 7;
+
+// GetSessionTabs returns all open tabs, or tabs closed kSessionCloseTimeWindow
+// seconds ago.
+static const int kSessionCloseTimeWindowSecs = 10;
+
+// The maximum number of items we'll allow in the redirect list before
+// deleting some.
+static const int kMaxRedirectCount = 32;
+
+// The number of days old a history entry can be before it is considered "old"
+// and is archived.
+static const int kArchiveDaysThreshold = 90;
+
+// Converts from PageUsageData to MostVisitedURL. |redirects| is a
+// list of redirects for this URL. Empty list means no redirects.
+MostVisitedURL MakeMostVisitedURL(const PageUsageData& page_data,
+ const RedirectList& redirects) {
+ MostVisitedURL mv;
+ mv.url = page_data.GetURL();
+ mv.title = page_data.GetTitle();
+ if (redirects.empty()) {
+ // Redirects must contain at least the target url.
+ mv.redirects.push_back(mv.url);
+ } else {
+ mv.redirects = redirects;
+ if (mv.redirects[mv.redirects.size() - 1] != mv.url) {
+ // The last url must be the target url.
+ mv.redirects.push_back(mv.url);
+ }
+ }
+ return mv;
+}
+
+// This task is run on a timer so that commits happen at regular intervals
+// so they are batched together. The important thing about this class is that
+// it supports canceling of the task so the reference to the backend will be
+// freed. The problem is that when history is shutting down, there is likely
+// to be one of these commits still pending and holding a reference.
+//
+// The backend can call Cancel to have this task release the reference. The
+// task will still run (if we ever get to processing the event before
+// shutdown), but it will not do anything.
+//
+// Note that this is a refcounted object and is not a task in itself. It should
+// be assigned to a RunnableMethod.
+//
+// TODO(brettw): bug 1165182: This should be replaced with a
+// ScopedRunnableMethodFactory which will handle everything automatically (like
+// we do in ExpireHistoryBackend).
+class CommitLaterTask : public base::RefCounted<CommitLaterTask> {
+ public:
+ explicit CommitLaterTask(HistoryBackend* history_backend)
+ : history_backend_(history_backend) {
+ }
+
+ // The backend will call this function if it is being destroyed so that we
+ // release our reference.
+ void Cancel() {
+ history_backend_ = NULL;
+ }
+
+ void RunCommit() {
+ if (history_backend_.get())
+ history_backend_->Commit();
+ }
+
+ private:
+ friend class base::RefCounted<CommitLaterTask>;
+
+ ~CommitLaterTask() {}
+
+ scoped_refptr<HistoryBackend> history_backend_;
+};
+
+// Handles querying first the main database, then the full text database if that
+// fails. It will optionally keep track of all URLs seen so duplicates can be
+// eliminated. This is used by the querying sub-functions.
+//
+// TODO(brettw): This class may be able to be simplified or eliminated. After
+// this was written, QueryResults can efficiently look up by URL, so the need
+// for this extra set of previously queried URLs is less important.
+class HistoryBackend::URLQuerier {
+ public:
+ URLQuerier(URLDatabase* main_db, URLDatabase* archived_db, bool track_unique)
+ : main_db_(main_db),
+ archived_db_(archived_db),
+ track_unique_(track_unique) {
+ }
+
+ // When we're tracking unique URLs, returns true if this URL has been
+ // previously queried. Only call when tracking unique URLs.
+ bool HasURL(const GURL& url) {
+ DCHECK(track_unique_);
+ return unique_urls_.find(url) != unique_urls_.end();
+ }
+
+ bool GetRowForURL(const GURL& url, URLRow* row) {
+ if (!main_db_->GetRowForURL(url, row)) {
+ if (!archived_db_ || !archived_db_->GetRowForURL(url, row)) {
+ // This row is neither in the main nor the archived DB.
+ return false;
+ }
+ }
+
+ if (track_unique_)
+ unique_urls_.insert(url);
+ return true;
+ }
+
+ private:
+ URLDatabase* main_db_; // Guaranteed non-NULL.
+ URLDatabase* archived_db_; // Possibly NULL.
+
+ bool track_unique_;
+
+ // When track_unique_ is set, this is updated with every URL seen so far.
+ std::set<GURL> unique_urls_;
+
+ DISALLOW_COPY_AND_ASSIGN(URLQuerier);
+};
+
+// HistoryBackend --------------------------------------------------------------
+
+HistoryBackend::HistoryBackend(const FilePath& history_dir,
+ Delegate* delegate,
+ BookmarkService* bookmark_service)
+ : delegate_(delegate),
+ history_dir_(history_dir),
+ ALLOW_THIS_IN_INITIALIZER_LIST(expirer_(this, bookmark_service)),
+ recent_redirects_(kMaxRedirectCount),
+ backend_destroy_message_loop_(NULL),
+ backend_destroy_task_(NULL),
+ segment_queried_(false),
+ bookmark_service_(bookmark_service) {
+}
+
+HistoryBackend::~HistoryBackend() {
+ DCHECK(!scheduled_commit_) << "Deleting without cleanup";
+ ReleaseDBTasks();
+
+ // First close the databases before optionally running the "destroy" task.
+ if (db_.get()) {
+ // Commit the long-running transaction.
+ db_->CommitTransaction();
+ db_.reset();
+ }
+ if (thumbnail_db_.get()) {
+ thumbnail_db_->CommitTransaction();
+ thumbnail_db_.reset();
+ }
+ if (archived_db_.get()) {
+ archived_db_->CommitTransaction();
+ archived_db_.reset();
+ }
+ if (text_database_.get()) {
+ text_database_->CommitTransaction();
+ text_database_.reset();
+ }
+
+ if (backend_destroy_task_) {
+ // Notify an interested party (typically a unit test) that we're done.
+ DCHECK(backend_destroy_message_loop_);
+ backend_destroy_message_loop_->PostTask(FROM_HERE, backend_destroy_task_);
+ }
+}
+
+void HistoryBackend::Init(bool force_fail) {
+ if (!force_fail)
+ InitImpl();
+ delegate_->DBLoaded();
+}
+
+void HistoryBackend::SetOnBackendDestroyTask(MessageLoop* message_loop,
+ Task* task) {
+ if (backend_destroy_task_) {
+ DLOG(WARNING) << "Setting more than one destroy task, overriding";
+ delete backend_destroy_task_;
+ }
+ backend_destroy_message_loop_ = message_loop;
+ backend_destroy_task_ = task;
+}
+
+void HistoryBackend::Closing() {
+ // Any scheduled commit will have a reference to us, we must make it
+ // release that reference before we can be destroyed.
+ CancelScheduledCommit();
+
+ // Release our reference to the delegate, this reference will be keeping the
+ // history service alive.
+ delegate_.reset();
+}
+
+void HistoryBackend::NotifyRenderProcessHostDestruction(const void* host) {
+ tracker_.NotifyRenderProcessHostDestruction(host);
+}
+
+FilePath HistoryBackend::GetThumbnailFileName() const {
+ return history_dir_.Append(chrome::kThumbnailsFilename);
+}
+
+FilePath HistoryBackend::GetFaviconsFileName() const {
+ return history_dir_.Append(chrome::kFaviconsFilename);
+}
+
+FilePath HistoryBackend::GetArchivedFileName() const {
+ return history_dir_.Append(chrome::kArchivedHistoryFilename);
+}
+
+SegmentID HistoryBackend::GetLastSegmentID(VisitID from_visit) {
+ // Set is used to detect referrer loops. Should not happen, but can
+ // if the database is corrupt.
+ std::set<VisitID> visit_set;
+ VisitID visit_id = from_visit;
+ while (visit_id) {
+ VisitRow row;
+ if (!db_->GetRowForVisit(visit_id, &row))
+ return 0;
+ if (row.segment_id)
+ return row.segment_id; // Found a visit in this change with a segment.
+
+ // Check the referrer of this visit, if any.
+ visit_id = row.referring_visit;
+
+ if (visit_set.find(visit_id) != visit_set.end()) {
+ NOTREACHED() << "Loop in referer chain, giving up";
+ break;
+ }
+ visit_set.insert(visit_id);
+ }
+ return 0;
+}
+
+SegmentID HistoryBackend::UpdateSegments(const GURL& url,
+ VisitID from_visit,
+ VisitID visit_id,
+ PageTransition::Type transition_type,
+ const Time ts) {
+ if (!db_.get())
+ return 0;
+
+ // We only consider main frames.
+ if (!PageTransition::IsMainFrame(transition_type))
+ return 0;
+
+ SegmentID segment_id = 0;
+ PageTransition::Type t = PageTransition::StripQualifier(transition_type);
+
+ // Are we at the beginning of a new segment?
+ if (t == PageTransition::TYPED || t == PageTransition::AUTO_BOOKMARK) {
+ // If so, create or get the segment.
+ std::string segment_name = db_->ComputeSegmentName(url);
+ URLID url_id = db_->GetRowForURL(url, NULL);
+ if (!url_id)
+ return 0;
+
+ if (!(segment_id = db_->GetSegmentNamed(segment_name))) {
+ if (!(segment_id = db_->CreateSegment(url_id, segment_name))) {
+ NOTREACHED();
+ return 0;
+ }
+ } else {
+ // Note: if we update an existing segment, we update the url used to
+ // represent that segment in order to minimize stale most visited
+ // images.
+ db_->UpdateSegmentRepresentationURL(segment_id, url_id);
+ }
+ } else {
+ // Note: it is possible there is no segment ID set for this visit chain.
+ // This can happen if the initial navigation wasn't AUTO_BOOKMARK or
+ // TYPED. (For example GENERATED). In this case this visit doesn't count
+ // toward any segment.
+ if (!(segment_id = GetLastSegmentID(from_visit)))
+ return 0;
+ }
+
+ // Set the segment in the visit.
+ if (!db_->SetSegmentID(visit_id, segment_id)) {
+ NOTREACHED();
+ return 0;
+ }
+
+ // Finally, increase the counter for that segment / day.
+ if (!db_->IncreaseSegmentVisitCount(segment_id, ts, 1)) {
+ NOTREACHED();
+ return 0;
+ }
+ return segment_id;
+}
+
+void HistoryBackend::AddPage(scoped_refptr<HistoryAddPageArgs> request) {
+ DLOG(INFO) << "Adding page " << request->url.possibly_invalid_spec();
+
+ if (!db_.get())
+ return;
+
+ // Will be filled with the URL ID and the visit ID of the last addition.
+ std::pair<URLID, VisitID> last_ids(0, tracker_.GetLastVisit(
+ request->id_scope, request->page_id, request->referrer));
+
+ VisitID from_visit_id = last_ids.second;
+
+ // If a redirect chain is given, we expect the last item in that chain to be
+ // the final URL.
+ DCHECK(request->redirects.size() == 0 ||
+ request->redirects.back() == request->url);
+
+ // Avoid duplicating times in the database, at least as long as pages are
+ // added in order. However, we don't want to disallow pages from recording
+ // times earlier than our last_recorded_time_, because someone might set
+ // their machine's clock back.
+ if (last_requested_time_ == request->time) {
+ last_recorded_time_ = last_recorded_time_ + TimeDelta::FromMicroseconds(1);
+ } else {
+ last_requested_time_ = request->time;
+ last_recorded_time_ = last_requested_time_;
+ }
+
+ // If the user is adding older history, we need to make sure our times
+ // are correct.
+ if (request->time < first_recorded_time_)
+ first_recorded_time_ = request->time;
+
+ PageTransition::Type transition =
+ PageTransition::StripQualifier(request->transition);
+ bool is_keyword_generated = (transition == PageTransition::KEYWORD_GENERATED);
+
+ if (request->redirects.size() <= 1) {
+ // The single entry is both a chain start and end.
+ PageTransition::Type t = request->transition |
+ PageTransition::CHAIN_START | PageTransition::CHAIN_END;
+
+ // No redirect case (one element means just the page itself).
+ last_ids = AddPageVisit(request->url, last_recorded_time_,
+ last_ids.second, t);
+
+ // Update the segment for this visit. KEYWORD_GENERATED visits should not
+ // result in changing most visited, so we don't update segments (most
+ // visited db).
+ if (!is_keyword_generated) {
+ UpdateSegments(request->url, from_visit_id, last_ids.second, t,
+ last_recorded_time_);
+ }
+ } else {
+ // Redirect case. Add the redirect chain.
+
+ PageTransition::Type redirect_info = PageTransition::CHAIN_START;
+
+ if (request->redirects[0].SchemeIs(chrome::kAboutScheme)) {
+ // When the redirect source + referrer is "about" we skip it. This
+ // happens when a page opens a new frame/window to about:blank and then
+ // script sets the URL to somewhere else (used to hide the referrer). It
+ // would be nice to keep all these redirects properly but we don't ever
+ // see the initial about:blank load, so we don't know where the
+ // subsequent client redirect came from.
+ //
+ // In this case, we just don't bother hooking up the source of the
+ // redirects, so we remove it.
+ request->redirects.erase(request->redirects.begin());
+ } else if (request->transition & PageTransition::CLIENT_REDIRECT) {
+ redirect_info = PageTransition::CLIENT_REDIRECT;
+ // The first entry in the redirect chain initiated a client redirect.
+ // We don't add this to the database since the referrer is already
+ // there, so we skip over it but change the transition type of the first
+ // transition to client redirect.
+ //
+ // The referrer is invalid when restoring a session that features an
+ // https tab that redirects to a different host or to http. In this
+ // case we don't need to reconnect the new redirect with the existing
+ // chain.
+ if (request->referrer.is_valid()) {
+ DCHECK(request->referrer == request->redirects[0]);
+ request->redirects.erase(request->redirects.begin());
+
+ // If the navigation entry for this visit has replaced that for the
+ // first visit, remove the CHAIN_END marker from the first visit. This
+ // can be called a lot, for example, the page cycler, and most of the
+ // time we won't have changed anything.
+ VisitRow visit_row;
+ if (request->did_replace_entry &&
+ db_->GetRowForVisit(last_ids.second, &visit_row) &&
+ visit_row.transition | PageTransition::CHAIN_END) {
+ visit_row.transition &= ~PageTransition::CHAIN_END;
+ db_->UpdateVisitRow(visit_row);
+ }
+ }
+ }
+
+ for (size_t redirect_index = 0; redirect_index < request->redirects.size();
+ redirect_index++) {
+ PageTransition::Type t = transition | redirect_info;
+
+ // If this is the last transition, add a CHAIN_END marker
+ if (redirect_index == (request->redirects.size() - 1))
+ t = t | PageTransition::CHAIN_END;
+
+ // Record all redirect visits with the same timestamp. We don't display
+ // them anyway, and if we ever decide to, we can reconstruct their order
+ // from the redirect chain.
+ last_ids = AddPageVisit(request->redirects[redirect_index],
+ last_recorded_time_, last_ids.second, t);
+ if (t & PageTransition::CHAIN_START) {
+ // Update the segment for this visit.
+ UpdateSegments(request->redirects[redirect_index],
+ from_visit_id, last_ids.second, t, last_recorded_time_);
+ }
+
+ // Subsequent transitions in the redirect list must all be sever
+ // redirects.
+ redirect_info = PageTransition::SERVER_REDIRECT;
+ }
+
+ // Last, save this redirect chain for later so we can set titles & favicons
+ // on the redirected pages properly. It is indexed by the destination page.
+ recent_redirects_.Put(request->url, request->redirects);
+ }
+
+ // TODO(brettw) bug 1140015: Add an "add page" notification so the history
+ // views can keep in sync.
+
+ // Add the last visit to the tracker so we can get outgoing transitions.
+ // TODO(evanm): Due to http://b/1194536 we lose the referrers of a subframe
+ // navigation anyway, so last_visit_id is always zero for them. But adding
+ // them here confuses main frame history, so we skip them for now.
+ if (transition != PageTransition::AUTO_SUBFRAME &&
+ transition != PageTransition::MANUAL_SUBFRAME && !is_keyword_generated) {
+ tracker_.AddVisit(request->id_scope, request->page_id, request->url,
+ last_ids.second);
+ }
+
+ if (text_database_.get()) {
+ text_database_->AddPageURL(request->url, last_ids.first, last_ids.second,
+ last_recorded_time_);
+ }
+
+ ScheduleCommit();
+}
+
+void HistoryBackend::InitImpl() {
+ DCHECK(!db_.get()) << "Initializing HistoryBackend twice";
+ // In the rare case where the db fails to initialize a dialog may get shown
+ // the blocks the caller, yet allows other messages through. For this reason
+ // we only set db_ to the created database if creation is successful. That
+ // way other methods won't do anything as db_ is still NULL.
+
+ TimeTicks beginning_time = TimeTicks::Now();
+
+ // Compute the file names. Note that the index file can be removed when the
+ // text db manager is finished being hooked up.
+ FilePath history_name = history_dir_.Append(chrome::kHistoryFilename);
+ FilePath thumbnail_name = GetThumbnailFileName();
+ FilePath archived_name = GetArchivedFileName();
+ FilePath tmp_bookmarks_file = history_dir_.Append(
+ chrome::kHistoryBookmarksFileName);
+
+ // History database.
+ db_.reset(new HistoryDatabase());
+ switch (db_->Init(history_name, tmp_bookmarks_file)) {
+ case sql::INIT_OK:
+ break;
+ case sql::INIT_FAILURE:
+ // A NULL db_ will cause all calls on this object to notice this error
+ // and to not continue.
+ delegate_->NotifyProfileError(IDS_COULDNT_OPEN_PROFILE_ERROR);
+ db_.reset();
+ return;
+ case sql::INIT_TOO_NEW:
+ delegate_->NotifyProfileError(IDS_PROFILE_TOO_NEW_ERROR);
+ db_.reset();
+ return;
+ default:
+ NOTREACHED();
+ }
+
+ // Fill the in-memory database and send it back to the history service on the
+ // main thread.
+ InMemoryHistoryBackend* mem_backend = new InMemoryHistoryBackend;
+ if (mem_backend->Init(history_name))
+ delegate_->SetInMemoryBackend(mem_backend); // Takes ownership of pointer.
+ else
+ delete mem_backend; // Error case, run without the in-memory DB.
+ db_->BeginExclusiveMode(); // Must be after the mem backend read the data.
+
+ // Create the history publisher which needs to be passed on to the text and
+ // thumbnail databases for publishing history.
+ history_publisher_.reset(new HistoryPublisher());
+ if (!history_publisher_->Init()) {
+ // The init may fail when there are no indexers wanting our history.
+ // Hence no need to log the failure.
+ history_publisher_.reset();
+ }
+
+ // Full-text database. This has to be first so we can pass it to the
+ // HistoryDatabase for migration.
+ text_database_.reset(new TextDatabaseManager(history_dir_,
+ db_.get(), db_.get()));
+ if (!text_database_->Init(history_publisher_.get())) {
+ LOG(WARNING) << "Text database initialization failed, running without it.";
+ text_database_.reset();
+ }
+ if (db_->needs_version_17_migration()) {
+ // See needs_version_17_migration() decl for more. In this case, we want
+ // to erase all the text database files. This must be done after the text
+ // database manager has been initialized, since it knows about all the
+ // files it manages.
+ text_database_->DeleteAll();
+ }
+
+ // Thumbnail database.
+ thumbnail_db_.reset(new ThumbnailDatabase());
+ if (CommandLine::ForCurrentProcess()->HasSwitch(switches::kTopSites)) {
+ if (!db_->needs_version_18_migration()) {
+ // No convertion needed - use new filename right away.
+ thumbnail_name = GetFaviconsFileName();
+ }
+ }
+ if (thumbnail_db_->Init(thumbnail_name,
+ history_publisher_.get()) != sql::INIT_OK) {
+ // Unlike the main database, we don't error out when the database is too
+ // new because this error is much less severe. Generally, this shouldn't
+ // happen since the thumbnail and main datbase versions should be in sync.
+ // We'll just continue without thumbnails & favicons in this case or any
+ // other error.
+ LOG(WARNING) << "Could not initialize the thumbnail database.";
+ thumbnail_db_.reset();
+ }
+
+ if (CommandLine::ForCurrentProcess()->HasSwitch(switches::kTopSites)) {
+ if (db_->needs_version_18_migration()) {
+ LOG(INFO) << "Starting TopSites migration";
+ delegate_->StartTopSitesMigration();
+ }
+ }
+
+ // Archived database.
+ if (db_->needs_version_17_migration()) {
+ // See needs_version_17_migration() decl for more. In this case, we want
+ // to delete the archived database and need to do so before we try to
+ // open the file. We can ignore any error (maybe the file doesn't exist).
+ file_util::Delete(archived_name, false);
+ }
+ archived_db_.reset(new ArchivedDatabase());
+ if (!archived_db_->Init(archived_name)) {
+ LOG(WARNING) << "Could not initialize the archived database.";
+ archived_db_.reset();
+ }
+
+ // Tell the expiration module about all the nice databases we made. This must
+ // happen before db_->Init() is called since the callback ForceArchiveHistory
+ // may need to expire stuff.
+ //
+ // *sigh*, this can all be cleaned up when that migration code is removed.
+ // The main DB initialization should intuitively be first (not that it
+ // actually matters) and the expirer should be set last.
+ expirer_.SetDatabases(db_.get(), archived_db_.get(),
+ thumbnail_db_.get(), text_database_.get());
+
+ // Open the long-running transaction.
+ db_->BeginTransaction();
+ if (thumbnail_db_.get())
+ thumbnail_db_->BeginTransaction();
+ if (archived_db_.get())
+ archived_db_->BeginTransaction();
+ if (text_database_.get())
+ text_database_->BeginTransaction();
+
+ // Get the first item in our database.
+ db_->GetStartDate(&first_recorded_time_);
+
+ // Start expiring old stuff.
+ expirer_.StartArchivingOldStuff(TimeDelta::FromDays(kArchiveDaysThreshold));
+
+ HISTOGRAM_TIMES("History.InitTime",
+ TimeTicks::Now() - beginning_time);
+}
+
+std::pair<URLID, VisitID> HistoryBackend::AddPageVisit(
+ const GURL& url,
+ Time time,
+ VisitID referring_visit,
+ PageTransition::Type transition) {
+ // Top-level frame navigations are visible, everything else is hidden
+ bool new_hidden = !PageTransition::IsMainFrame(transition);
+
+ // NOTE: This code must stay in sync with
+ // ExpireHistoryBackend::ExpireURLsForVisits().
+ // TODO(pkasting): http://b/1148304 We shouldn't be marking so many URLs as
+ // typed, which would eliminate the need for this code.
+ int typed_increment = 0;
+ PageTransition::Type transition_type =
+ PageTransition::StripQualifier(transition);
+ if ((transition_type == PageTransition::TYPED &&
+ !PageTransition::IsRedirect(transition)) ||
+ transition_type == PageTransition::KEYWORD_GENERATED)
+ typed_increment = 1;
+
+ // See if this URL is already in the DB.
+ URLRow url_info(url);
+ URLID url_id = db_->GetRowForURL(url, &url_info);
+ if (url_id) {
+ // Update of an existing row.
+ if (PageTransition::StripQualifier(transition) != PageTransition::RELOAD)
+ url_info.set_visit_count(url_info.visit_count() + 1);
+ if (typed_increment)
+ url_info.set_typed_count(url_info.typed_count() + typed_increment);
+ url_info.set_last_visit(time);
+
+ // Only allow un-hiding of pages, never hiding.
+ if (!new_hidden)
+ url_info.set_hidden(false);
+
+ db_->UpdateURLRow(url_id, url_info);
+ } else {
+ // Addition of a new row.
+ url_info.set_visit_count(1);
+ url_info.set_typed_count(typed_increment);
+ url_info.set_last_visit(time);
+ url_info.set_hidden(new_hidden);
+
+ url_id = db_->AddURL(url_info);
+ if (!url_id) {
+ NOTREACHED() << "Adding URL failed.";
+ return std::make_pair(0, 0);
+ }
+ url_info.id_ = url_id;
+
+ // We don't actually add the URL to the full text index at this point. It
+ // might be nice to do this so that even if we get no title or body, the
+ // user can search for URL components and get the page.
+ //
+ // However, in most cases, we'll get at least a title and usually contents,
+ // and this add will be redundant, slowing everything down. As a result,
+ // we ignore this edge case.
+ }
+
+ // Add the visit with the time to the database.
+ VisitRow visit_info(url_id, time, referring_visit, transition, 0);
+ VisitID visit_id = db_->AddVisit(&visit_info);
+
+ if (visit_info.visit_time < first_recorded_time_)
+ first_recorded_time_ = visit_info.visit_time;
+
+ // Broadcast a notification of the visit.
+ if (visit_id) {
+ URLVisitedDetails* details = new URLVisitedDetails;
+ details->transition = transition;
+ details->row = url_info;
+ // TODO(meelapshah) Disabled due to potential PageCycler regression.
+ // Re-enable this.
+ // GetMostRecentRedirectsTo(url, &details->redirects);
+ BroadcastNotifications(NotificationType::HISTORY_URL_VISITED, details);
+ }
+
+ return std::make_pair(url_id, visit_id);
+}
+
+void HistoryBackend::AddPagesWithDetails(const std::vector<URLRow>& urls) {
+ if (!db_.get())
+ return;
+
+ scoped_ptr<URLsModifiedDetails> modified(new URLsModifiedDetails);
+ for (std::vector<URLRow>::const_iterator i = urls.begin();
+ i != urls.end(); ++i) {
+ DCHECK(!i->last_visit().is_null());
+
+ // We will add to either the archived database or the main one depending on
+ // the date of the added visit.
+ URLDatabase* url_database;
+ VisitDatabase* visit_database;
+ if (i->last_visit() < expirer_.GetCurrentArchiveTime()) {
+ if (!archived_db_.get())
+ return; // No archived database to save it to, just forget this.
+ url_database = archived_db_.get();
+ visit_database = archived_db_.get();
+ } else {
+ url_database = db_.get();
+ visit_database = db_.get();
+ }
+
+ URLRow existing_url;
+ URLID url_id = url_database->GetRowForURL(i->url(), &existing_url);
+ if (!url_id) {
+ // Add the page if it doesn't exist.
+ url_id = url_database->AddURL(*i);
+ if (!url_id) {
+ NOTREACHED() << "Could not add row to DB";
+ return;
+ }
+
+ if (i->typed_count() > 0)
+ modified->changed_urls.push_back(*i);
+ }
+
+ // Add the page to the full text index. This function is also used for
+ // importing. Even though we don't have page contents, we can at least
+ // add the title and URL to the index so they can be searched. We don't
+ // bother to delete any already-existing FTS entries for the URL, since
+ // this is normally called on import.
+ //
+ // If you ever import *after* first run (selecting import from the menu),
+ // then these additional entries will "shadow" the originals when querying
+ // for the most recent match only, and the user won't get snippets. This is
+ // a very minor issue, and fixing it will make import slower, so we don't
+ // bother.
+ bool has_indexed = false;
+ if (text_database_.get()) {
+ // We do not have to make it update the visit database, below, we will
+ // create the visit entry with the indexed flag set.
+ has_indexed = text_database_->AddPageData(i->url(), url_id, 0,
+ i->last_visit(),
+ i->title(), string16());
+ }
+
+ // Make up a visit to correspond to that page.
+ VisitRow visit_info(url_id, i->last_visit(), 0,
+ PageTransition::LINK | PageTransition::CHAIN_START |
+ PageTransition::CHAIN_END, 0);
+ visit_info.is_indexed = has_indexed;
+ if (!visit_database->AddVisit(&visit_info)) {
+ NOTREACHED() << "Adding visit failed.";
+ return;
+ }
+
+ if (visit_info.visit_time < first_recorded_time_)
+ first_recorded_time_ = visit_info.visit_time;
+ }
+
+ // Broadcast a notification for typed URLs that have been modified. This
+ // will be picked up by the in-memory URL database on the main thread.
+ //
+ // TODO(brettw) bug 1140015: Add an "add page" notification so the history
+ // views can keep in sync.
+ BroadcastNotifications(NotificationType::HISTORY_TYPED_URLS_MODIFIED,
+ modified.release());
+
+ ScheduleCommit();
+}
+
+void HistoryBackend::SetPageTitle(const GURL& url,
+ const string16& title) {
+ if (!db_.get())
+ return;
+
+ // Search for recent redirects which should get the same title. We make a
+ // dummy list containing the exact URL visited if there are no redirects so
+ // the processing below can be the same.
+ history::RedirectList dummy_list;
+ history::RedirectList* redirects;
+ RedirectCache::iterator iter = recent_redirects_.Get(url);
+ if (iter != recent_redirects_.end()) {
+ redirects = &iter->second;
+
+ // This redirect chain should have the destination URL as the last item.
+ DCHECK(!redirects->empty());
+ DCHECK(redirects->back() == url);
+ } else {
+ // No redirect chain stored, make up one containing the URL we want so we
+ // can use the same logic below.
+ dummy_list.push_back(url);
+ redirects = &dummy_list;
+ }
+
+ bool typed_url_changed = false;
+ std::vector<URLRow> changed_urls;
+ for (size_t i = 0; i < redirects->size(); i++) {
+ URLRow row;
+ URLID row_id = db_->GetRowForURL(redirects->at(i), &row);
+ if (row_id && row.title() != title) {
+ row.set_title(title);
+ db_->UpdateURLRow(row_id, row);
+ changed_urls.push_back(row);
+ if (row.typed_count() > 0)
+ typed_url_changed = true;
+ }
+ }
+
+ // Broadcast notifications for typed URLs that have changed. This will
+ // update the in-memory database.
+ //
+ // TODO(brettw) bug 1140020: Broadcast for all changes (not just typed),
+ // in which case some logic can be removed.
+ if (typed_url_changed) {
+ URLsModifiedDetails* modified =
+ new URLsModifiedDetails;
+ for (size_t i = 0; i < changed_urls.size(); i++) {
+ if (changed_urls[i].typed_count() > 0)
+ modified->changed_urls.push_back(changed_urls[i]);
+ }
+ BroadcastNotifications(NotificationType::HISTORY_TYPED_URLS_MODIFIED,
+ modified);
+ }
+
+ // Update the full text index.
+ if (text_database_.get())
+ text_database_->AddPageTitle(url, title);
+
+ // Only bother committing if things changed.
+ if (!changed_urls.empty())
+ ScheduleCommit();
+}
+
+void HistoryBackend::IterateURLs(HistoryService::URLEnumerator* iterator) {
+ if (db_.get()) {
+ HistoryDatabase::URLEnumerator e;
+ if (db_->InitURLEnumeratorForEverything(&e)) {
+ URLRow info;
+ while (e.GetNextURL(&info)) {
+ iterator->OnURL(info.url());
+ }
+ iterator->OnComplete(true); // Success.
+ return;
+ }
+ }
+ iterator->OnComplete(false); // Failure.
+}
+
+bool HistoryBackend::GetAllTypedURLs(std::vector<history::URLRow>* urls) {
+ if (db_.get())
+ return db_->GetAllTypedUrls(urls);
+ return false;
+}
+
+bool HistoryBackend::GetVisitsForURL(URLID id, VisitVector* visits) {
+ if (db_.get())
+ return db_->GetVisitsForURL(id, visits);
+ return false;
+}
+
+bool HistoryBackend::UpdateURL(URLID id, const history::URLRow& url) {
+ if (db_.get())
+ return db_->UpdateURLRow(id, url);
+ return false;
+}
+
+bool HistoryBackend::AddVisits(const GURL& url,
+ const std::vector<base::Time>& visits) {
+ if (db_.get()) {
+ for (std::vector<base::Time>::const_iterator visit = visits.begin();
+ visit != visits.end(); ++visit) {
+ if (!AddPageVisit(url, *visit, 0, 0).first) {
+ return false;
+ }
+ }
+ ScheduleCommit();
+ return true;
+ }
+ return false;
+}
+
+bool HistoryBackend::RemoveVisits(const VisitVector& visits) {
+ if (db_.get()) {
+ std::map<URLID, int> url_visits_removed;
+ for (VisitVector::const_iterator visit = visits.begin();
+ visit != visits.end(); ++visit) {
+ db_->DeleteVisit(*visit);
+ std::map<URLID, int>::iterator visit_count =
+ url_visits_removed.find(visit->url_id);
+ if (visit_count == url_visits_removed.end()) {
+ url_visits_removed[visit->url_id] = 1;
+ } else {
+ ++visit_count->second;
+ }
+ }
+ for (std::map<URLID, int>::iterator count = url_visits_removed.begin();
+ count != url_visits_removed.end(); ++count) {
+ history::URLRow url_row;
+ if (!db_->GetURLRow(count->first, &url_row)) {
+ return false;
+ }
+ DCHECK(count->second <= url_row.visit_count());
+ url_row.set_visit_count(url_row.visit_count() - count->second);
+ if (!db_->UpdateURLRow(url_row.id(), url_row)) {
+ return false;
+ }
+ }
+ ScheduleCommit();
+ return true;
+ }
+ return false;
+}
+
+bool HistoryBackend::GetURL(const GURL& url, history::URLRow* url_row) {
+ if (db_.get())
+ return db_->GetRowForURL(url, url_row) != 0;
+ return false;
+}
+
+void HistoryBackend::QueryURL(scoped_refptr<QueryURLRequest> request,
+ const GURL& url,
+ bool want_visits) {
+ if (request->canceled())
+ return;
+
+ bool success = false;
+ URLRow* row = &request->value.a;
+ VisitVector* visits = &request->value.b;
+ if (db_.get()) {
+ if (db_->GetRowForURL(url, row)) {
+ // Have a row.
+ success = true;
+
+ // Optionally query the visits.
+ if (want_visits)
+ db_->GetVisitsForURL(row->id(), visits);
+ }
+ }
+ request->ForwardResult(QueryURLRequest::TupleType(request->handle(), success,
+ row, visits));
+}
+
+// Segment usage ---------------------------------------------------------------
+
+void HistoryBackend::DeleteOldSegmentData() {
+ if (db_.get())
+ db_->DeleteSegmentData(Time::Now() -
+ TimeDelta::FromDays(kSegmentDataRetention));
+}
+
+void HistoryBackend::SetSegmentPresentationIndex(SegmentID segment_id,
+ int index) {
+ if (db_.get())
+ db_->SetSegmentPresentationIndex(segment_id, index);
+}
+
+void HistoryBackend::QuerySegmentUsage(
+ scoped_refptr<QuerySegmentUsageRequest> request,
+ const Time from_time,
+ int max_result_count) {
+ if (request->canceled())
+ return;
+
+ if (db_.get()) {
+ db_->QuerySegmentUsage(from_time, max_result_count, &request->value.get());
+
+ // If this is the first time we query segments, invoke
+ // DeleteOldSegmentData asynchronously. We do this to cleanup old
+ // entries.
+ if (!segment_queried_) {
+ segment_queried_ = true;
+ MessageLoop::current()->PostTask(FROM_HERE,
+ NewRunnableMethod(this, &HistoryBackend::DeleteOldSegmentData));
+ }
+ }
+ request->ForwardResult(
+ QuerySegmentUsageRequest::TupleType(request->handle(),
+ &request->value.get()));
+}
+
+// Keyword visits --------------------------------------------------------------
+
+void HistoryBackend::SetKeywordSearchTermsForURL(const GURL& url,
+ TemplateURL::IDType keyword_id,
+ const string16& term) {
+ if (!db_.get())
+ return;
+
+ // Get the ID for this URL.
+ URLRow url_row;
+ if (!db_->GetRowForURL(url, &url_row)) {
+ // There is a small possibility the url was deleted before the keyword
+ // was added. Ignore the request.
+ return;
+ }
+
+ db_->SetKeywordSearchTermsForURL(url_row.id(), keyword_id, term);
+ ScheduleCommit();
+}
+
+void HistoryBackend::DeleteAllSearchTermsForKeyword(
+ TemplateURL::IDType keyword_id) {
+ if (!db_.get())
+ return;
+
+ db_->DeleteAllSearchTermsForKeyword(keyword_id);
+ // TODO(sky): bug 1168470. Need to move from archive dbs too.
+ ScheduleCommit();
+}
+
+void HistoryBackend::GetMostRecentKeywordSearchTerms(
+ scoped_refptr<GetMostRecentKeywordSearchTermsRequest> request,
+ TemplateURL::IDType keyword_id,
+ const string16& prefix,
+ int max_count) {
+ if (request->canceled())
+ return;
+
+ if (db_.get()) {
+ db_->GetMostRecentKeywordSearchTerms(keyword_id, prefix, max_count,
+ &(request->value));
+ }
+ request->ForwardResult(
+ GetMostRecentKeywordSearchTermsRequest::TupleType(request->handle(),
+ &request->value));
+}
+
+// Downloads -------------------------------------------------------------------
+
+// Get all the download entries from the database.
+void HistoryBackend::QueryDownloads(
+ scoped_refptr<DownloadQueryRequest> request) {
+ if (request->canceled())
+ return;
+ if (db_.get())
+ db_->QueryDownloads(&request->value);
+ request->ForwardResult(DownloadQueryRequest::TupleType(&request->value));
+}
+
+// Clean up entries that has been corrupted (because of the crash, for example).
+void HistoryBackend::CleanUpInProgressEntries() {
+ if (db_.get()) {
+ // If some "in progress" entries were not updated when Chrome exited, they
+ // need to be cleaned up.
+ db_->CleanUpInProgressEntries();
+ }
+}
+
+// Update a particular download entry.
+void HistoryBackend::UpdateDownload(int64 received_bytes,
+ int32 state,
+ int64 db_handle) {
+ if (db_.get())
+ db_->UpdateDownload(received_bytes, state, db_handle);
+}
+
+// Update the path of a particular download entry.
+void HistoryBackend::UpdateDownloadPath(const FilePath& path,
+ int64 db_handle) {
+ if (db_.get())
+ db_->UpdateDownloadPath(path, db_handle);
+}
+
+// Create a new download entry and pass back the db_handle to it.
+void HistoryBackend::CreateDownload(
+ scoped_refptr<DownloadCreateRequest> request,
+ const DownloadCreateInfo& create_info) {
+ int64 db_handle = 0;
+ if (!request->canceled()) {
+ if (db_.get())
+ db_handle = db_->CreateDownload(create_info);
+ request->ForwardResult(DownloadCreateRequest::TupleType(create_info,
+ db_handle));
+ }
+}
+
+void HistoryBackend::RemoveDownload(int64 db_handle) {
+ if (db_.get())
+ db_->RemoveDownload(db_handle);
+}
+
+void HistoryBackend::RemoveDownloadsBetween(const Time remove_begin,
+ const Time remove_end) {
+ if (db_.get())
+ db_->RemoveDownloadsBetween(remove_begin, remove_end);
+}
+
+void HistoryBackend::SearchDownloads(
+ scoped_refptr<DownloadSearchRequest> request,
+ const string16& search_text) {
+ if (request->canceled())
+ return;
+ if (db_.get())
+ db_->SearchDownloads(&request->value, search_text);
+ request->ForwardResult(DownloadSearchRequest::TupleType(request->handle(),
+ &request->value));
+}
+
+void HistoryBackend::QueryHistory(scoped_refptr<QueryHistoryRequest> request,
+ const string16& text_query,
+ const QueryOptions& options) {
+ if (request->canceled())
+ return;
+
+ TimeTicks beginning_time = TimeTicks::Now();
+
+ if (db_.get()) {
+ if (text_query.empty()) {
+ // Basic history query for the main database.
+ QueryHistoryBasic(db_.get(), db_.get(), options, &request->value);
+
+ // Now query the archived database. This is a bit tricky because we don't
+ // want to query it if the queried time range isn't going to find anything
+ // in it.
+ // TODO(brettw) bug 1171036: do blimpie querying for the archived database
+ // as well.
+ // if (archived_db_.get() &&
+ // expirer_.GetCurrentArchiveTime() - TimeDelta::FromDays(7)) {
+ } else {
+ // Full text history query.
+ QueryHistoryFTS(text_query, options, &request->value);
+ }
+ }
+
+ request->ForwardResult(QueryHistoryRequest::TupleType(request->handle(),
+ &request->value));
+
+ UMA_HISTOGRAM_TIMES("History.QueryHistory",
+ TimeTicks::Now() - beginning_time);
+}
+
+// Basic time-based querying of history.
+void HistoryBackend::QueryHistoryBasic(URLDatabase* url_db,
+ VisitDatabase* visit_db,
+ const QueryOptions& options,
+ QueryResults* result) {
+ // First get all visits.
+ VisitVector visits;
+ visit_db->GetVisibleVisitsInRange(options.begin_time, options.end_time,
+ options.max_count, &visits);
+ DCHECK(options.max_count == 0 ||
+ static_cast<int>(visits.size()) <= options.max_count);
+
+ // Now add them and the URL rows to the results.
+ URLResult url_result;
+ for (size_t i = 0; i < visits.size(); i++) {
+ const VisitRow visit = visits[i];
+
+ // Add a result row for this visit, get the URL info from the DB.
+ if (!url_db->GetURLRow(visit.url_id, &url_result))
+ continue; // DB out of sync and URL doesn't exist, try to recover.
+ if (!url_result.url().is_valid())
+ continue; // Don't report invalid URLs in case of corruption.
+
+ // The archived database may be out of sync with respect to starring,
+ // titles, last visit date, etc. Therefore, we query the main DB if the
+ // current URL database is not the main one.
+ if (url_db == db_.get()) {
+ // Currently querying the archived DB, update with the main database to
+ // catch any interesting stuff. This will update it if it exists in the
+ // main DB, and do nothing otherwise.
+ db_->GetRowForURL(url_result.url(), &url_result);
+ }
+
+ url_result.set_visit_time(visit.visit_time);
+
+ // We don't set any of the query-specific parts of the URLResult, since
+ // snippets and stuff don't apply to basic querying.
+ result->AppendURLBySwapping(&url_result);
+ }
+
+ if (options.begin_time <= first_recorded_time_)
+ result->set_reached_beginning(true);
+}
+
+void HistoryBackend::QueryHistoryFTS(const string16& text_query,
+ const QueryOptions& options,
+ QueryResults* result) {
+ if (!text_database_.get())
+ return;
+
+ // Full text query, first get all the FTS results in the time range.
+ std::vector<TextDatabase::Match> fts_matches;
+ Time first_time_searched;
+ text_database_->GetTextMatches(text_query, options,
+ &fts_matches, &first_time_searched);
+
+ URLQuerier querier(db_.get(), archived_db_.get(), true);
+
+ // Now get the row and visit information for each one.
+ URLResult url_result; // Declare outside loop to prevent re-construction.
+ for (size_t i = 0; i < fts_matches.size(); i++) {
+ if (options.max_count != 0 &&
+ static_cast<int>(result->size()) >= options.max_count)
+ break; // Got too many items.
+
+ // Get the URL, querying the main and archived databases as necessary. If
+ // this is not found, the history and full text search databases are out
+ // of sync and we give up with this result.
+ if (!querier.GetRowForURL(fts_matches[i].url, &url_result))
+ continue;
+
+ if (!url_result.url().is_valid())
+ continue; // Don't report invalid URLs in case of corruption.
+
+ // Copy over the FTS stuff that the URLDatabase doesn't know about.
+ // We do this with swap() to avoid copying, since we know we don't
+ // need the original any more. Note that we override the title with the
+ // one from FTS, since that will match the title_match_positions (the
+ // FTS title and the history DB title may differ).
+ url_result.set_title(fts_matches[i].title);
+ url_result.title_match_positions_.swap(
+ fts_matches[i].title_match_positions);
+ url_result.snippet_.Swap(&fts_matches[i].snippet);
+
+ // The visit time also comes from the full text search database. Since it
+ // has the time, we can avoid an extra query of the visits table.
+ url_result.set_visit_time(fts_matches[i].time);
+
+ // Add it to the vector, this will clear our |url_row| object as a
+ // result of the swap.
+ result->AppendURLBySwapping(&url_result);
+ }
+
+ if (options.begin_time <= first_recorded_time_)
+ result->set_reached_beginning(true);
+}
+
+// Frontend to GetMostRecentRedirectsFrom from the history thread.
+void HistoryBackend::QueryRedirectsFrom(
+ scoped_refptr<QueryRedirectsRequest> request,
+ const GURL& url) {
+ if (request->canceled())
+ return;
+ bool success = GetMostRecentRedirectsFrom(url, &request->value);
+ request->ForwardResult(QueryRedirectsRequest::TupleType(
+ request->handle(), url, success, &request->value));
+}
+
+void HistoryBackend::QueryRedirectsTo(
+ scoped_refptr<QueryRedirectsRequest> request,
+ const GURL& url) {
+ if (request->canceled())
+ return;
+ bool success = GetMostRecentRedirectsTo(url, &request->value);
+ request->ForwardResult(QueryRedirectsRequest::TupleType(
+ request->handle(), url, success, &request->value));
+}
+
+void HistoryBackend::GetVisitCountToHost(
+ scoped_refptr<GetVisitCountToHostRequest> request,
+ const GURL& url) {
+ if (request->canceled())
+ return;
+ int count = 0;
+ Time first_visit;
+ const bool success = (db_.get() && db_->GetVisitCountToHost(url, &count,
+ &first_visit));
+ request->ForwardResult(GetVisitCountToHostRequest::TupleType(
+ request->handle(), success, count, first_visit));
+}
+
+void HistoryBackend::QueryTopURLsAndRedirects(
+ scoped_refptr<QueryTopURLsAndRedirectsRequest> request,
+ int result_count) {
+ if (request->canceled())
+ return;
+
+ if (!db_.get()) {
+ request->ForwardResult(QueryTopURLsAndRedirectsRequest::TupleType(
+ request->handle(), false, NULL, NULL));
+ return;
+ }
+
+ std::vector<GURL>* top_urls = &request->value.a;
+ history::RedirectMap* redirects = &request->value.b;
+
+ ScopedVector<PageUsageData> data;
+ db_->QuerySegmentUsage(base::Time::Now() - base::TimeDelta::FromDays(90),
+ result_count, &data.get());
+
+ for (size_t i = 0; i < data.size(); ++i) {
+ top_urls->push_back(data[i]->GetURL());
+ RefCountedVector<GURL>* list = new RefCountedVector<GURL>;
+ GetMostRecentRedirectsFrom(top_urls->back(), &list->data);
+ (*redirects)[top_urls->back()] = list;
+ }
+
+ request->ForwardResult(QueryTopURLsAndRedirectsRequest::TupleType(
+ request->handle(), true, top_urls, redirects));
+}
+
+// Will replace QueryTopURLsAndRedirectsRequest.
+void HistoryBackend::QueryMostVisitedURLs(
+ scoped_refptr<QueryMostVisitedURLsRequest> request,
+ int result_count,
+ int days_back) {
+ if (request->canceled())
+ return;
+
+ if (!db_.get()) {
+ // No History Database - return an empty list.
+ request->ForwardResult(QueryMostVisitedURLsRequest::TupleType(
+ request->handle(), MostVisitedURLList()));
+ return;
+ }
+
+ MostVisitedURLList* result = &request->value;
+
+ ScopedVector<PageUsageData> data;
+ db_->QuerySegmentUsage(base::Time::Now() -
+ base::TimeDelta::FromDays(days_back),
+ result_count, &data.get());
+
+ for (size_t i = 0; i < data.size(); ++i) {
+ PageUsageData* current_data = data[i];
+ RedirectList redirects;
+ GetMostRecentRedirectsFrom(current_data->GetURL(), &redirects);
+ MostVisitedURL url = MakeMostVisitedURL(*current_data, redirects);
+ result->push_back(url);
+ }
+
+ request->ForwardResult(QueryMostVisitedURLsRequest::TupleType(
+ request->handle(), *result));
+}
+
+void HistoryBackend::GetRedirectsFromSpecificVisit(
+ VisitID cur_visit, history::RedirectList* redirects) {
+ // Follow any redirects from the given visit and add them to the list.
+ // It *should* be impossible to get a circular chain here, but we check
+ // just in case to avoid infinite loops.
+ GURL cur_url;
+ std::set<VisitID> visit_set;
+ visit_set.insert(cur_visit);
+ while (db_->GetRedirectFromVisit(cur_visit, &cur_visit, &cur_url)) {
+ if (visit_set.find(cur_visit) != visit_set.end()) {
+ NOTREACHED() << "Loop in visit chain, giving up";
+ return;
+ }
+ visit_set.insert(cur_visit);
+ redirects->push_back(cur_url);
+ }
+}
+
+void HistoryBackend::GetRedirectsToSpecificVisit(
+ VisitID cur_visit,
+ history::RedirectList* redirects) {
+ // Follow redirects going to cur_visit. These are added to |redirects| in
+ // the order they are found. If a redirect chain looks like A -> B -> C and
+ // |cur_visit| = C, redirects will be {B, A} in that order.
+ if (!db_.get())
+ return;
+
+ GURL cur_url;
+ std::set<VisitID> visit_set;
+ visit_set.insert(cur_visit);
+ while (db_->GetRedirectToVisit(cur_visit, &cur_visit, &cur_url)) {
+ if (visit_set.find(cur_visit) != visit_set.end()) {
+ NOTREACHED() << "Loop in visit chain, giving up";
+ return;
+ }
+ visit_set.insert(cur_visit);
+ redirects->push_back(cur_url);
+ }
+}
+
+bool HistoryBackend::GetMostRecentRedirectsFrom(
+ const GURL& from_url,
+ history::RedirectList* redirects) {
+ redirects->clear();
+ if (!db_.get())
+ return false;
+
+ URLID from_url_id = db_->GetRowForURL(from_url, NULL);
+ VisitID cur_visit = db_->GetMostRecentVisitForURL(from_url_id, NULL);
+ if (!cur_visit)
+ return false; // No visits for URL.
+
+ GetRedirectsFromSpecificVisit(cur_visit, redirects);
+ return true;
+}
+
+bool HistoryBackend::GetMostRecentRedirectsTo(
+ const GURL& to_url,
+ history::RedirectList* redirects) {
+ redirects->clear();
+ if (!db_.get())
+ return false;
+
+ URLID to_url_id = db_->GetRowForURL(to_url, NULL);
+ VisitID cur_visit = db_->GetMostRecentVisitForURL(to_url_id, NULL);
+ if (!cur_visit)
+ return false; // No visits for URL.
+
+ GetRedirectsToSpecificVisit(cur_visit, redirects);
+ return true;
+}
+
+void HistoryBackend::ScheduleAutocomplete(HistoryURLProvider* provider,
+ HistoryURLProviderParams* params) {
+ // ExecuteWithDB should handle the NULL database case.
+ provider->ExecuteWithDB(this, db_.get(), params);
+}
+
+void HistoryBackend::SetPageContents(const GURL& url,
+ const string16& contents) {
+ // This is histogrammed in the text database manager.
+ if (!text_database_.get())
+ return;
+ text_database_->AddPageContents(url, contents);
+}
+
+void HistoryBackend::SetPageThumbnail(
+ const GURL& url,
+ const SkBitmap& thumbnail,
+ const ThumbnailScore& score) {
+ if (!db_.get() || !thumbnail_db_.get())
+ return;
+
+ URLRow url_row;
+ URLID url_id = db_->GetRowForURL(url, &url_row);
+ if (url_id) {
+ thumbnail_db_->SetPageThumbnail(url, url_id, thumbnail, score,
+ url_row.last_visit());
+ }
+
+ ScheduleCommit();
+}
+
+void HistoryBackend::GetPageThumbnail(
+ scoped_refptr<GetPageThumbnailRequest> request,
+ const GURL& page_url) {
+ if (request->canceled())
+ return;
+
+ scoped_refptr<RefCountedBytes> data;
+ GetPageThumbnailDirectly(page_url, &data);
+
+ request->ForwardResult(GetPageThumbnailRequest::TupleType(
+ request->handle(), data));
+}
+
+void HistoryBackend::GetPageThumbnailDirectly(
+ const GURL& page_url,
+ scoped_refptr<RefCountedBytes>* data) {
+ if (thumbnail_db_.get()) {
+ *data = new RefCountedBytes;
+
+ // Time the result.
+ TimeTicks beginning_time = TimeTicks::Now();
+
+ history::RedirectList redirects;
+ URLID url_id;
+ bool success = false;
+
+ // If there are some redirects, try to get a thumbnail from the last
+ // redirect destination.
+ if (GetMostRecentRedirectsFrom(page_url, &redirects) &&
+ !redirects.empty()) {
+ if ((url_id = db_->GetRowForURL(redirects.back(), NULL)))
+ success = thumbnail_db_->GetPageThumbnail(url_id, &(*data)->data);
+ }
+
+ // If we don't have a thumbnail from redirects, try the URL directly.
+ if (!success) {
+ if ((url_id = db_->GetRowForURL(page_url, NULL)))
+ success = thumbnail_db_->GetPageThumbnail(url_id, &(*data)->data);
+ }
+
+ // In this rare case, we start to mine the older redirect sessions
+ // from the visit table to try to find a thumbnail.
+ if (!success) {
+ success = GetThumbnailFromOlderRedirect(page_url, &(*data)->data);
+ }
+
+ if (!success)
+ *data = NULL; // This will tell the callback there was an error.
+
+ UMA_HISTOGRAM_TIMES("History.GetPageThumbnail",
+ TimeTicks::Now() - beginning_time);
+ }
+}
+
+bool HistoryBackend::GetThumbnailFromOlderRedirect(
+ const GURL& page_url,
+ std::vector<unsigned char>* data) {
+ // Look at a few previous visit sessions.
+ VisitVector older_sessions;
+ URLID page_url_id = db_->GetRowForURL(page_url, NULL);
+ static const int kVisitsToSearchForThumbnail = 4;
+ db_->GetMostRecentVisitsForURL(
+ page_url_id, kVisitsToSearchForThumbnail, &older_sessions);
+
+ // Iterate across all those previous visits, and see if any of the
+ // final destinations of those redirect chains have a good thumbnail
+ // for us.
+ bool success = false;
+ for (VisitVector::const_iterator it = older_sessions.begin();
+ !success && it != older_sessions.end(); ++it) {
+ history::RedirectList redirects;
+ if (it->visit_id) {
+ GetRedirectsFromSpecificVisit(it->visit_id, &redirects);
+
+ if (!redirects.empty()) {
+ URLID url_id;
+ if ((url_id = db_->GetRowForURL(redirects.back(), NULL)))
+ success = thumbnail_db_->GetPageThumbnail(url_id, data);
+ }
+ }
+ }
+
+ return success;
+}
+
+void HistoryBackend::GetFavIcon(scoped_refptr<GetFavIconRequest> request,
+ const GURL& icon_url) {
+ UpdateFavIconMappingAndFetchImpl(NULL, icon_url, request);
+}
+
+void HistoryBackend::UpdateFavIconMappingAndFetch(
+ scoped_refptr<GetFavIconRequest> request,
+ const GURL& page_url,
+ const GURL& icon_url) {
+ UpdateFavIconMappingAndFetchImpl(&page_url, icon_url, request);
+}
+
+void HistoryBackend::SetFavIconOutOfDateForPage(const GURL& page_url) {
+ if (!thumbnail_db_.get() || !db_.get())
+ return;
+
+ URLRow url_row;
+ URLID url_id = db_->GetRowForURL(page_url, &url_row);
+ if (!url_id || !url_row.favicon_id())
+ return;
+
+ thumbnail_db_->SetFavIconLastUpdateTime(url_row.favicon_id(), Time());
+ ScheduleCommit();
+}
+
+void HistoryBackend::SetImportedFavicons(
+ const std::vector<ImportedFavIconUsage>& favicon_usage) {
+ if (!db_.get() || !thumbnail_db_.get())
+ return;
+
+ Time now = Time::Now();
+
+ // Track all URLs that had their favicons set or updated.
+ std::set<GURL> favicons_changed;
+
+ for (size_t i = 0; i < favicon_usage.size(); i++) {
+ FavIconID favicon_id = thumbnail_db_->GetFavIconIDForFavIconURL(
+ favicon_usage[i].favicon_url);
+ if (!favicon_id) {
+ // This favicon doesn't exist yet, so we create it using the given data.
+ favicon_id = thumbnail_db_->AddFavIcon(favicon_usage[i].favicon_url);
+ if (!favicon_id)
+ continue; // Unable to add the favicon.
+ thumbnail_db_->SetFavIcon(favicon_id,
+ new RefCountedBytes(favicon_usage[i].png_data), now);
+ }
+
+ // Save the mapping from all the URLs to the favicon.
+ BookmarkService* bookmark_service = GetBookmarkService();
+ for (std::set<GURL>::const_iterator url = favicon_usage[i].urls.begin();
+ url != favicon_usage[i].urls.end(); ++url) {
+ URLRow url_row;
+ if (!db_->GetRowForURL(*url, &url_row)) {
+ // If the URL is present as a bookmark, add the url in history to
+ // save the favicon mapping. This will match with what history db does
+ // for regular bookmarked URLs with favicons - when history db is
+ // cleaned, we keep an entry in the db with 0 visits as long as that
+ // url is bookmarked.
+ if (bookmark_service && bookmark_service_->IsBookmarked(*url)) {
+ URLRow url_info(*url);
+ url_info.set_visit_count(0);
+ url_info.set_typed_count(0);
+ url_info.set_last_visit(base::Time());
+ url_info.set_hidden(false);
+ url_info.set_favicon_id(favicon_id);
+ db_->AddURL(url_info);
+ favicons_changed.insert(*url);
+ }
+ } else if (url_row.favicon_id() == 0) {
+ // URL is present in history, update the favicon *only* if it
+ // is not set already.
+ url_row.set_favicon_id(favicon_id);
+ db_->UpdateURLRow(url_row.id(), url_row);
+ favicons_changed.insert(*url);
+ }
+ }
+ }
+
+ if (!favicons_changed.empty()) {
+ // Send the notification about the changed favicon URLs.
+ FavIconChangeDetails* changed_details = new FavIconChangeDetails;
+ changed_details->urls.swap(favicons_changed);
+ BroadcastNotifications(NotificationType::FAVICON_CHANGED, changed_details);
+ }
+}
+
+void HistoryBackend::UpdateFavIconMappingAndFetchImpl(
+ const GURL* page_url,
+ const GURL& icon_url,
+ scoped_refptr<GetFavIconRequest> request) {
+ if (request->canceled())
+ return;
+
+ bool know_favicon = false;
+ bool expired = true;
+ scoped_refptr<RefCountedBytes> data;
+
+ if (thumbnail_db_.get()) {
+ const FavIconID favicon_id =
+ thumbnail_db_->GetFavIconIDForFavIconURL(icon_url);
+ if (favicon_id) {
+ data = new RefCountedBytes;
+ know_favicon = true;
+ Time last_updated;
+ if (thumbnail_db_->GetFavIcon(favicon_id, &last_updated, &data->data,
+ NULL)) {
+ expired = (Time::Now() - last_updated) >
+ TimeDelta::FromDays(kFavIconRefetchDays);
+ }
+
+ if (page_url)
+ SetFavIconMapping(*page_url, favicon_id);
+ }
+ // else case, haven't cached entry yet. Caller is responsible for
+ // downloading the favicon and invoking SetFavIcon.
+ }
+ request->ForwardResult(GetFavIconRequest::TupleType(
+ request->handle(), know_favicon, data, expired,
+ icon_url));
+}
+
+void HistoryBackend::GetFavIconForURL(
+ scoped_refptr<GetFavIconRequest> request,
+ const GURL& page_url) {
+ if (request->canceled())
+ return;
+
+ bool know_favicon = false;
+ bool expired = false;
+ GURL icon_url;
+
+ scoped_refptr<RefCountedBytes> data;
+
+ if (db_.get() && thumbnail_db_.get()) {
+ // Time the query.
+ TimeTicks beginning_time = TimeTicks::Now();
+
+ URLRow url_info;
+ data = new RefCountedBytes;
+ Time last_updated;
+ if (db_->GetRowForURL(page_url, &url_info) && url_info.favicon_id() &&
+ thumbnail_db_->GetFavIcon(url_info.favicon_id(), &last_updated,
+ &data->data, &icon_url)) {
+ know_favicon = true;
+ expired = (Time::Now() - last_updated) >
+ TimeDelta::FromDays(kFavIconRefetchDays);
+ }
+
+ UMA_HISTOGRAM_TIMES("History.GetFavIconForURL",
+ TimeTicks::Now() - beginning_time);
+ }
+
+ request->ForwardResult(
+ GetFavIconRequest::TupleType(request->handle(), know_favicon, data,
+ expired, icon_url));
+}
+
+void HistoryBackend::SetFavIcon(
+ const GURL& page_url,
+ const GURL& icon_url,
+ scoped_refptr<RefCountedMemory> data) {
+ DCHECK(data.get());
+ if (!thumbnail_db_.get() || !db_.get())
+ return;
+
+ FavIconID id = thumbnail_db_->GetFavIconIDForFavIconURL(icon_url);
+ if (!id)
+ id = thumbnail_db_->AddFavIcon(icon_url);
+
+ // Set the image data.
+ thumbnail_db_->SetFavIcon(id, data, Time::Now());
+
+ SetFavIconMapping(page_url, id);
+}
+
+void HistoryBackend::SetFavIconMapping(const GURL& page_url,
+ FavIconID id) {
+ // Find all the pages whose favicons we should set, we want to set it for
+ // all the pages in the redirect chain if it redirected.
+ history::RedirectList dummy_list;
+ history::RedirectList* redirects;
+ RedirectCache::iterator iter = recent_redirects_.Get(page_url);
+ if (iter != recent_redirects_.end()) {
+ redirects = &iter->second;
+
+ // This redirect chain should have the destination URL as the last item.
+ DCHECK(!redirects->empty());
+ DCHECK(redirects->back() == page_url);
+ } else {
+ // No redirect chain stored, make up one containing the URL we want to we
+ // can use the same logic below.
+ dummy_list.push_back(page_url);
+ redirects = &dummy_list;
+ }
+
+ std::set<GURL> favicons_changed;
+
+ // Save page <-> favicon association.
+ for (history::RedirectList::const_iterator i(redirects->begin());
+ i != redirects->end(); ++i) {
+ URLRow row;
+ if (!db_->GetRowForURL(*i, &row) || row.favicon_id() == id)
+ continue;
+
+ FavIconID old_id = row.favicon_id();
+ if (old_id == id)
+ continue;
+ row.set_favicon_id(id);
+ db_->UpdateURLRow(row.id(), row);
+
+ if (old_id) {
+ // The page's favicon ID changed. This means that the one we just
+ // changed from could have been orphaned, and we need to re-check it.
+ // This is not super fast, but this case will get triggered rarely,
+ // since normally a page will always map to the same favicon ID. It
+ // will mostly happen for favicons we import.
+ if (!db_->IsFavIconUsed(old_id) && thumbnail_db_.get())
+ thumbnail_db_->DeleteFavIcon(old_id);
+ }
+
+ favicons_changed.insert(row.url());
+ }
+
+ // Send the notification about the changed favicons.
+ FavIconChangeDetails* changed_details = new FavIconChangeDetails;
+ changed_details->urls.swap(favicons_changed);
+ BroadcastNotifications(NotificationType::FAVICON_CHANGED, changed_details);
+
+ ScheduleCommit();
+}
+
+void HistoryBackend::Commit() {
+ if (!db_.get())
+ return;
+
+ // Note that a commit may not actually have been scheduled if a caller
+ // explicitly calls this instead of using ScheduleCommit. Likewise, we
+ // may reset the flag written by a pending commit. But this is OK! It
+ // will merely cause extra commits (which is kind of the idea). We
+ // could optimize more for this case (we may get two extra commits in
+ // some cases) but it hasn't been important yet.
+ CancelScheduledCommit();
+
+ db_->CommitTransaction();
+ DCHECK(db_->transaction_nesting() == 0) << "Somebody left a transaction open";
+ db_->BeginTransaction();
+
+ if (thumbnail_db_.get()) {
+ thumbnail_db_->CommitTransaction();
+ DCHECK(thumbnail_db_->transaction_nesting() == 0) <<
+ "Somebody left a transaction open";
+ thumbnail_db_->BeginTransaction();
+ }
+
+ if (archived_db_.get()) {
+ archived_db_->CommitTransaction();
+ archived_db_->BeginTransaction();
+ }
+
+ if (text_database_.get()) {
+ text_database_->CommitTransaction();
+ text_database_->BeginTransaction();
+ }
+}
+
+void HistoryBackend::ScheduleCommit() {
+ if (scheduled_commit_.get())
+ return;
+ scheduled_commit_ = new CommitLaterTask(this);
+ MessageLoop::current()->PostDelayedTask(FROM_HERE,
+ NewRunnableMethod(scheduled_commit_.get(),
+ &CommitLaterTask::RunCommit),
+ kCommitIntervalMs);
+}
+
+void HistoryBackend::CancelScheduledCommit() {
+ if (scheduled_commit_) {
+ scheduled_commit_->Cancel();
+ scheduled_commit_ = NULL;
+ }
+}
+
+void HistoryBackend::ProcessDBTaskImpl() {
+ if (!db_.get()) {
+ // db went away, release all the refs.
+ ReleaseDBTasks();
+ return;
+ }
+
+ // Remove any canceled tasks.
+ while (!db_task_requests_.empty() && db_task_requests_.front()->canceled()) {
+ db_task_requests_.front()->Release();
+ db_task_requests_.pop_front();
+ }
+ if (db_task_requests_.empty())
+ return;
+
+ // Run the first task.
+ HistoryDBTaskRequest* request = db_task_requests_.front();
+ db_task_requests_.pop_front();
+ if (request->value->RunOnDBThread(this, db_.get())) {
+ // The task is done. Notify the callback.
+ request->ForwardResult(HistoryDBTaskRequest::TupleType());
+ // We AddRef'd the request before adding, need to release it now.
+ request->Release();
+ } else {
+ // Tasks wants to run some more. Schedule it at the end of current tasks.
+ db_task_requests_.push_back(request);
+ // And process it after an invoke later.
+ MessageLoop::current()->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &HistoryBackend::ProcessDBTaskImpl));
+ }
+}
+
+void HistoryBackend::ReleaseDBTasks() {
+ for (std::list<HistoryDBTaskRequest*>::iterator i =
+ db_task_requests_.begin(); i != db_task_requests_.end(); ++i) {
+ (*i)->Release();
+ }
+ db_task_requests_.clear();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// Generic operations
+//
+////////////////////////////////////////////////////////////////////////////////
+
+void HistoryBackend::DeleteURLs(const std::vector<GURL>& urls) {
+ for (std::vector<GURL>::const_iterator url = urls.begin(); url != urls.end();
+ ++url) {
+ expirer_.DeleteURL(*url);
+ }
+
+ db_->GetStartDate(&first_recorded_time_);
+ // Force a commit, if the user is deleting something for privacy reasons, we
+ // want to get it on disk ASAP.
+ Commit();
+}
+
+void HistoryBackend::DeleteURL(const GURL& url) {
+ expirer_.DeleteURL(url);
+
+ db_->GetStartDate(&first_recorded_time_);
+ // Force a commit, if the user is deleting something for privacy reasons, we
+ // want to get it on disk ASAP.
+ Commit();
+}
+
+void HistoryBackend::ExpireHistoryBetween(
+ scoped_refptr<ExpireHistoryRequest> request,
+ const std::set<GURL>& restrict_urls,
+ Time begin_time,
+ Time end_time) {
+ if (request->canceled())
+ return;
+
+ if (db_.get()) {
+ if (begin_time.is_null() && end_time.is_null() && restrict_urls.empty()) {
+ // Special case deleting all history so it can be faster and to reduce the
+ // possibility of an information leak.
+ DeleteAllHistory();
+ } else {
+ // Clearing parts of history, have the expirer do the depend
+ expirer_.ExpireHistoryBetween(restrict_urls, begin_time, end_time);
+
+ // Force a commit, if the user is deleting something for privacy reasons,
+ // we want to get it on disk ASAP.
+ Commit();
+ }
+ }
+
+ if (begin_time <= first_recorded_time_)
+ db_->GetStartDate(&first_recorded_time_);
+
+ request->ForwardResult(ExpireHistoryRequest::TupleType());
+
+ if (history_publisher_.get() && restrict_urls.empty())
+ history_publisher_->DeleteUserHistoryBetween(begin_time, end_time);
+}
+
+void HistoryBackend::URLsNoLongerBookmarked(const std::set<GURL>& urls) {
+ if (!db_.get())
+ return;
+
+ for (std::set<GURL>::const_iterator i = urls.begin(); i != urls.end(); ++i) {
+ URLRow url_row;
+ if (!db_->GetRowForURL(*i, &url_row))
+ continue; // The URL isn't in the db; nothing to do.
+
+ VisitVector visits;
+ db_->GetVisitsForURL(url_row.id(), &visits);
+
+ if (visits.empty())
+ expirer_.DeleteURL(*i); // There are no more visits; nuke the URL.
+ }
+}
+
+void HistoryBackend::ProcessDBTask(
+ scoped_refptr<HistoryDBTaskRequest> request) {
+ DCHECK(request.get());
+ if (request->canceled())
+ return;
+
+ bool task_scheduled = !db_task_requests_.empty();
+ // Make sure we up the refcount of the request. ProcessDBTaskImpl will
+ // release when done with the task.
+ request->AddRef();
+ db_task_requests_.push_back(request.get());
+ if (!task_scheduled) {
+ // No other tasks are scheduled. Process request now.
+ ProcessDBTaskImpl();
+ }
+}
+
+void HistoryBackend::BroadcastNotifications(
+ NotificationType type,
+ HistoryDetails* details_deleted) {
+ DCHECK(delegate_.get());
+ delegate_->BroadcastNotifications(type, details_deleted);
+}
+
+// Deleting --------------------------------------------------------------------
+
+void HistoryBackend::DeleteAllHistory() {
+ // Our approach to deleting all history is:
+ // 1. Copy the bookmarks and their dependencies to new tables with temporary
+ // names.
+ // 2. Delete the original tables. Since tables can not share pages, we know
+ // that any data we don't want to keep is now in an unused page.
+ // 3. Renaming the temporary tables to match the original.
+ // 4. Vacuuming the database to delete the unused pages.
+ //
+ // Since we are likely to have very few bookmarks and their dependencies
+ // compared to all history, this is also much faster than just deleting from
+ // the original tables directly.
+
+ // Get the bookmarked URLs.
+ std::vector<GURL> starred_urls;
+ BookmarkService* bookmark_service = GetBookmarkService();
+ if (bookmark_service)
+ bookmark_service_->GetBookmarks(&starred_urls);
+
+ std::vector<URLRow> kept_urls;
+ for (size_t i = 0; i < starred_urls.size(); i++) {
+ URLRow row;
+ if (!db_->GetRowForURL(starred_urls[i], &row))
+ continue;
+
+ // Clear the last visit time so when we write these rows they are "clean."
+ row.set_last_visit(Time());
+ row.set_visit_count(0);
+ row.set_typed_count(0);
+ kept_urls.push_back(row);
+ }
+
+ // Clear thumbnail and favicon history. The favicons for the given URLs will
+ // be kept.
+ if (!ClearAllThumbnailHistory(&kept_urls)) {
+ LOG(ERROR) << "Thumbnail history could not be cleared";
+ // We continue in this error case. If the user wants to delete their
+ // history, we should delete as much as we can.
+ }
+
+ // ClearAllMainHistory will change the IDs of the URLs in kept_urls. Therfore,
+ // we clear the list afterwards to make sure nobody uses this invalid data.
+ if (!ClearAllMainHistory(kept_urls))
+ LOG(ERROR) << "Main history could not be cleared";
+ kept_urls.clear();
+
+ // Delete FTS files & archived history.
+ if (text_database_.get()) {
+ // We assume that the text database has one transaction on them that we need
+ // to close & restart (the long-running history transaction).
+ text_database_->CommitTransaction();
+ text_database_->DeleteAll();
+ text_database_->BeginTransaction();
+ }
+
+ if (archived_db_.get()) {
+ // Close the database and delete the file.
+ archived_db_.reset();
+ FilePath archived_file_name = GetArchivedFileName();
+ file_util::Delete(archived_file_name, false);
+
+ // Now re-initialize the database (which may fail).
+ archived_db_.reset(new ArchivedDatabase());
+ if (!archived_db_->Init(archived_file_name)) {
+ LOG(WARNING) << "Could not initialize the archived database.";
+ archived_db_.reset();
+ } else {
+ // Open our long-running transaction on this database.
+ archived_db_->BeginTransaction();
+ }
+ }
+
+ db_->GetStartDate(&first_recorded_time_);
+
+ // Send out the notfication that history is cleared. The in-memory datdabase
+ // will pick this up and clear itself.
+ URLsDeletedDetails* details = new URLsDeletedDetails;
+ details->all_history = true;
+ BroadcastNotifications(NotificationType::HISTORY_URLS_DELETED, details);
+}
+
+bool HistoryBackend::ClearAllThumbnailHistory(
+ std::vector<URLRow>* kept_urls) {
+ if (!thumbnail_db_.get()) {
+ // When we have no reference to the thumbnail database, maybe there was an
+ // error opening it. In this case, we just try to blow it away to try to
+ // fix the error if it exists. This may fail, in which case either the
+ // file doesn't exist or there's no more we can do.
+ file_util::Delete(GetThumbnailFileName(), false);
+ return true;
+ }
+
+ // Create the duplicate favicon table, this is where the favicons we want
+ // to keep will be stored.
+ if (!thumbnail_db_->InitTemporaryFavIconsTable())
+ return false;
+
+ // This maps existing favicon IDs to the ones in the temporary table.
+ typedef std::map<FavIconID, FavIconID> FavIconMap;
+ FavIconMap copied_favicons;
+
+ // Copy all unique favicons to the temporary table, and update all the
+ // URLs to have the new IDs.
+ for (std::vector<URLRow>::iterator i = kept_urls->begin();
+ i != kept_urls->end(); ++i) {
+ FavIconID old_id = i->favicon_id();
+ if (!old_id)
+ continue; // URL has no favicon.
+ FavIconID new_id;
+
+ FavIconMap::const_iterator found = copied_favicons.find(old_id);
+ if (found == copied_favicons.end()) {
+ new_id = thumbnail_db_->CopyToTemporaryFavIconTable(old_id);
+ copied_favicons[old_id] = new_id;
+ } else {
+ // We already encountered a URL that used this favicon, use the ID we
+ // previously got.
+ new_id = found->second;
+ }
+ i->set_favicon_id(new_id);
+ }
+
+ // Rename the duplicate favicon table back and recreate the other tables.
+ // This will make the database consistent again.
+ thumbnail_db_->CommitTemporaryFavIconTable();
+ thumbnail_db_->RecreateThumbnailTable();
+
+ // Vacuum to remove all the pages associated with the dropped tables. There
+ // must be no transaction open on the table when we do this. We assume that
+ // our long-running transaction is open, so we complete it and start it again.
+ DCHECK(thumbnail_db_->transaction_nesting() == 1);
+ thumbnail_db_->CommitTransaction();
+ thumbnail_db_->Vacuum();
+ thumbnail_db_->BeginTransaction();
+ return true;
+}
+
+bool HistoryBackend::ClearAllMainHistory(
+ const std::vector<URLRow>& kept_urls) {
+ // Create the duplicate URL table. We will copy the kept URLs into this.
+ if (!db_->CreateTemporaryURLTable())
+ return false;
+
+ // Insert the URLs into the temporary table, we need to keep a map of changed
+ // IDs since the ID will be different in the new table.
+ typedef std::map<URLID, URLID> URLIDMap;
+ URLIDMap old_to_new; // Maps original ID to new one.
+ for (std::vector<URLRow>::const_iterator i = kept_urls.begin();
+ i != kept_urls.end();
+ ++i) {
+ URLID new_id = db_->AddTemporaryURL(*i);
+ old_to_new[i->id()] = new_id;
+ }
+
+ // Replace the original URL table with the temporary one.
+ if (!db_->CommitTemporaryURLTable())
+ return false;
+
+ // Delete the old tables and recreate them empty.
+ db_->RecreateAllTablesButURL();
+
+ // Vacuum to reclaim the space from the dropped tables. This must be done
+ // when there is no transaction open, and we assume that our long-running
+ // transaction is currently open.
+ db_->CommitTransaction();
+ db_->Vacuum();
+ db_->BeginTransaction();
+ db_->GetStartDate(&first_recorded_time_);
+
+ return true;
+}
+
+BookmarkService* HistoryBackend::GetBookmarkService() {
+ if (bookmark_service_)
+ bookmark_service_->BlockTillLoaded();
+ return bookmark_service_;
+}
+
+void HistoryBackend::MigrateThumbnailsDatabase() {
+ thumbnail_db_->RenameAndDropThumbnails(GetThumbnailFileName(),
+ GetFaviconsFileName());
+ db_->MigrationToTopSitesDone();
+}
+
+} // namespace history
diff --git a/chrome/browser/history/history_backend.h b/chrome/browser/history/history_backend.h
new file mode 100644
index 0000000..cbda2e9
--- /dev/null
+++ b/chrome/browser/history/history_backend.h
@@ -0,0 +1,560 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_HISTORY_BACKEND_H_
+#define CHROME_BROWSER_HISTORY_HISTORY_BACKEND_H_
+
+#include <utility>
+
+#include "base/file_path.h"
+#include "base/gtest_prod_util.h"
+#include "base/scoped_ptr.h"
+#include "chrome/browser/history/archived_database.h"
+#include "chrome/browser/history/download_types.h"
+#include "chrome/browser/history/expire_history_backend.h"
+#include "chrome/browser/history/history_database.h"
+#include "chrome/browser/history/history_marshaling.h"
+#include "chrome/browser/history/history_types.h"
+#include "chrome/browser/history/text_database_manager.h"
+#include "chrome/browser/history/thumbnail_database.h"
+#include "chrome/browser/history/visit_tracker.h"
+#include "chrome/common/mru_cache.h"
+
+class BookmarkService;
+class TestingProfile;
+struct ThumbnailScore;
+
+namespace history {
+
+class CommitLaterTask;
+class HistoryPublisher;
+
+// *See the .cc file for more information on the design.*
+//
+// Internal history implementation which does most of the work of the history
+// system. This runs on a background thread (to not block the browser when we
+// do expensive operations) and is NOT threadsafe, so it must only be called
+// from message handlers on the background thread. Invoking on another thread
+// requires threadsafe refcounting.
+//
+// Most functions here are just the implementations of the corresponding
+// functions in the history service. These functions are not documented
+// here, see the history service for behavior.
+class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
+ public BroadcastNotificationDelegate {
+ public:
+ // Interface implemented by the owner of the HistoryBackend object. Normally,
+ // the history service implements this to send stuff back to the main thread.
+ // The unit tests can provide a different implementation if they don't have
+ // a history service object.
+ class Delegate {
+ public:
+ virtual ~Delegate() {}
+
+ // Called when the database cannot be read correctly for some reason.
+ virtual void NotifyProfileError(int message_id) = 0;
+
+ // Sets the in-memory history backend. The in-memory backend is created by
+ // the main backend. For non-unit tests, this happens on the background
+ // thread. It is to be used on the main thread, so this would transfer
+ // it to the history service. Unit tests can override this behavior.
+ //
+ // This function is NOT guaranteed to be called. If there is an error,
+ // there may be no in-memory database.
+ //
+ // Ownership of the backend pointer is transferred to this function.
+ virtual void SetInMemoryBackend(InMemoryHistoryBackend* backend) = 0;
+
+ // Broadcasts the specified notification to the notification service.
+ // This is implemented here because notifications must only be sent from
+ // the main thread.
+ //
+ // Ownership of the HistoryDetails is transferred to this function.
+ virtual void BroadcastNotifications(NotificationType type,
+ HistoryDetails* details) = 0;
+
+ // Invoked when the backend has finished loading the db.
+ virtual void DBLoaded() = 0;
+
+ // Tell TopSites to start reading thumbnails from the ThumbnailsDB.
+ virtual void StartTopSitesMigration() = 0;
+ };
+
+ // Init must be called to complete object creation. This object can be
+ // constructed on any thread, but all other functions including Init() must
+ // be called on the history thread.
+ //
+ // |history_dir| is the directory where the history files will be placed.
+ // See the definition of BroadcastNotificationsCallback above. This function
+ // takes ownership of the callback pointer.
+ //
+ // |bookmark_service| is used to determine bookmarked URLs when deleting and
+ // may be NULL.
+ //
+ // This constructor is fast and does no I/O, so can be called at any time.
+ HistoryBackend(const FilePath& history_dir,
+ Delegate* delegate,
+ BookmarkService* bookmark_service);
+
+ // Must be called after creation but before any objects are created. If this
+ // fails, all other functions will fail as well. (Since this runs on another
+ // thread, we don't bother returning failure.)
+ //
+ // |force_fail| can be set during unittests to unconditionally fail to init.
+ void Init(bool force_fail);
+
+ // Notification that the history system is shutting down. This will break
+ // the refs owned by the delegate and any pending transaction so it will
+ // actually be deleted.
+ void Closing();
+
+ // See NotifyRenderProcessHostDestruction.
+ void NotifyRenderProcessHostDestruction(const void* host);
+
+ // Navigation ----------------------------------------------------------------
+
+ void AddPage(scoped_refptr<HistoryAddPageArgs> request);
+ virtual void SetPageTitle(const GURL& url, const string16& title);
+
+ // Indexing ------------------------------------------------------------------
+
+ void SetPageContents(const GURL& url, const string16& contents);
+
+ // Querying ------------------------------------------------------------------
+
+ // ScheduleAutocomplete() never frees |provider| (which is globally live).
+ // It passes |params| on to the autocomplete system which will eventually
+ // free it.
+ void ScheduleAutocomplete(HistoryURLProvider* provider,
+ HistoryURLProviderParams* params);
+
+ void IterateURLs(HistoryService::URLEnumerator* enumerator);
+ void QueryURL(scoped_refptr<QueryURLRequest> request,
+ const GURL& url,
+ bool want_visits);
+ void QueryHistory(scoped_refptr<QueryHistoryRequest> request,
+ const string16& text_query,
+ const QueryOptions& options);
+ void QueryRedirectsFrom(scoped_refptr<QueryRedirectsRequest> request,
+ const GURL& url);
+ void QueryRedirectsTo(scoped_refptr<QueryRedirectsRequest> request,
+ const GURL& url);
+
+ void GetVisitCountToHost(scoped_refptr<GetVisitCountToHostRequest> request,
+ const GURL& url);
+
+ // TODO(Nik): remove. Use QueryMostVisitedURLs instead.
+ void QueryTopURLsAndRedirects(
+ scoped_refptr<QueryTopURLsAndRedirectsRequest> request,
+ int result_count);
+
+ // Request the |result_count| most visited URLs and the chain of
+ // redirects leading to each of these URLs. |days_back| is the
+ // number of days of history to use. Used by TopSites.
+ void QueryMostVisitedURLs(
+ scoped_refptr<QueryMostVisitedURLsRequest> request,
+ int result_count,
+ int days_back);
+
+ // Computes the most recent URL(s) that the given canonical URL has
+ // redirected to and returns true on success. There may be more than one
+ // redirect in a row, so this function will fill the given array with the
+ // entire chain. If there are no redirects for the most recent visit of the
+ // URL, or the URL is not in history, returns false.
+ //
+ // Backend for QueryRedirectsFrom.
+ bool GetMostRecentRedirectsFrom(const GURL& url,
+ history::RedirectList* redirects);
+
+ // Similar to above function except computes a chain of redirects to the
+ // given URL. Stores the most recent list of redirects ending at |url| in the
+ // given RedirectList. For example, if we have the redirect list A -> B -> C,
+ // then calling this function with url=C would fill redirects with {B, A}.
+ bool GetMostRecentRedirectsTo(const GURL& url,
+ history::RedirectList* redirects);
+
+ // Thumbnails ----------------------------------------------------------------
+
+ void SetPageThumbnail(const GURL& url,
+ const SkBitmap& thumbnail,
+ const ThumbnailScore& score);
+
+ // Retrieves a thumbnail, passing it across thread boundaries
+ // via. the included callback.
+ void GetPageThumbnail(scoped_refptr<GetPageThumbnailRequest> request,
+ const GURL& page_url);
+
+ // Backend implementation of GetPageThumbnail. Unlike
+ // GetPageThumbnail(), this method has way to transport data across
+ // thread boundaries.
+ //
+ // Exposed for testing reasons.
+ void GetPageThumbnailDirectly(
+ const GURL& page_url,
+ scoped_refptr<RefCountedBytes>* data);
+
+ void MigrateThumbnailsDatabase();
+
+ // Favicon -------------------------------------------------------------------
+
+ void GetFavIcon(scoped_refptr<GetFavIconRequest> request,
+ const GURL& icon_url);
+ void GetFavIconForURL(scoped_refptr<GetFavIconRequest> request,
+ const GURL& page_url);
+ void SetFavIcon(const GURL& page_url,
+ const GURL& icon_url,
+ scoped_refptr<RefCountedMemory> data);
+ void UpdateFavIconMappingAndFetch(scoped_refptr<GetFavIconRequest> request,
+ const GURL& page_url,
+ const GURL& icon_url);
+ void SetFavIconOutOfDateForPage(const GURL& page_url);
+ void SetImportedFavicons(
+ const std::vector<ImportedFavIconUsage>& favicon_usage);
+
+ // Downloads -----------------------------------------------------------------
+
+ void QueryDownloads(scoped_refptr<DownloadQueryRequest> request);
+ void CleanUpInProgressEntries();
+ void UpdateDownload(int64 received_bytes, int32 state, int64 db_handle);
+ void UpdateDownloadPath(const FilePath& path, int64 db_handle);
+ void CreateDownload(scoped_refptr<DownloadCreateRequest> request,
+ const DownloadCreateInfo& info);
+ void RemoveDownload(int64 db_handle);
+ void RemoveDownloadsBetween(const base::Time remove_begin,
+ const base::Time remove_end);
+ void RemoveDownloads(const base::Time remove_end);
+ void SearchDownloads(scoped_refptr<DownloadSearchRequest>,
+ const string16& search_text);
+
+ // Segment usage -------------------------------------------------------------
+
+ void QuerySegmentUsage(scoped_refptr<QuerySegmentUsageRequest> request,
+ const base::Time from_time,
+ int max_result_count);
+ void DeleteOldSegmentData();
+ void SetSegmentPresentationIndex(SegmentID segment_id, int index);
+
+ // Keyword search terms ------------------------------------------------------
+
+ void SetKeywordSearchTermsForURL(const GURL& url,
+ TemplateURL::IDType keyword_id,
+ const string16& term);
+
+ void DeleteAllSearchTermsForKeyword(TemplateURL::IDType keyword_id);
+
+ void GetMostRecentKeywordSearchTerms(
+ scoped_refptr<GetMostRecentKeywordSearchTermsRequest> request,
+ TemplateURL::IDType keyword_id,
+ const string16& prefix,
+ int max_count);
+
+ // Generic operations --------------------------------------------------------
+
+ void ProcessDBTask(scoped_refptr<HistoryDBTaskRequest> request);
+
+ virtual bool GetAllTypedURLs(std::vector<history::URLRow>* urls);
+
+ virtual bool GetVisitsForURL(URLID id, VisitVector* visits);
+
+ virtual bool UpdateURL(URLID id, const history::URLRow& url);
+
+ virtual bool AddVisits(const GURL& url,
+ const std::vector<base::Time>& visits);
+
+ virtual bool RemoveVisits(const VisitVector& visits);
+
+ virtual bool GetURL(const GURL& url, history::URLRow* url_row);
+
+ // Deleting ------------------------------------------------------------------
+
+ virtual void DeleteURLs(const std::vector<GURL>& urls);
+
+ virtual void DeleteURL(const GURL& url);
+
+ // Calls ExpireHistoryBackend::ExpireHistoryBetween and commits the change.
+ void ExpireHistoryBetween(scoped_refptr<ExpireHistoryRequest> request,
+ const std::set<GURL>& restrict_urls,
+ base::Time begin_time,
+ base::Time end_time);
+
+ // Bookmarks -----------------------------------------------------------------
+
+ // Notification that a URL is no longer bookmarked. If there are no visits
+ // for the specified url, it is deleted.
+ void URLsNoLongerBookmarked(const std::set<GURL>& urls);
+
+ // Testing -------------------------------------------------------------------
+
+ // Sets the task to run and the message loop to run it on when this object
+ // is destroyed. See HistoryService::SetOnBackendDestroyTask for a more
+ // complete description.
+ void SetOnBackendDestroyTask(MessageLoop* message_loop, Task* task);
+
+ // Adds the given rows to the database if it doesn't exist. A visit will be
+ // added for each given URL at the last visit time in the URLRow.
+ void AddPagesWithDetails(const std::vector<URLRow>& info);
+
+#if defined(UNIT_TEST)
+ HistoryDatabase* db() const { return db_.get(); }
+
+ ExpireHistoryBackend* expire_backend() { return &expirer_; }
+#endif
+
+ protected:
+ virtual ~HistoryBackend();
+
+ private:
+ friend class base::RefCountedThreadSafe<HistoryBackend>;
+ friend class CommitLaterTask; // The commit task needs to call Commit().
+ friend class HistoryTest; // So the unit tests can poke our innards.
+ FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, DeleteAll);
+ FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, ImportedFaviconsTest);
+ FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, URLsNoLongerBookmarked);
+ FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, StripUsernamePasswordTest);
+ FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, DeleteThumbnailsDatabaseTest);
+ friend class ::TestingProfile;
+
+ // Computes the name of the specified database on disk.
+ FilePath GetThumbnailFileName() const;
+
+ // Returns the name of the Favicons database. This is the new name
+ // of the Thumbnails database.
+ // See ThumbnailDatabase::RenameAndDropThumbnails.
+ FilePath GetFaviconsFileName() const;
+ FilePath GetArchivedFileName() const;
+
+ class URLQuerier;
+ friend class URLQuerier;
+
+ // Does the work of Init.
+ void InitImpl();
+
+ // Adds a single visit to the database, updating the URL information such
+ // as visit and typed count. The visit ID of the added visit and the URL ID
+ // of the associated URL (whether added or not) is returned. Both values will
+ // be 0 on failure.
+ //
+ // This does not schedule database commits, it is intended to be used as a
+ // subroutine for AddPage only. It also assumes the database is valid.
+ std::pair<URLID, VisitID> AddPageVisit(const GURL& url,
+ base::Time time,
+ VisitID referring_visit,
+ PageTransition::Type transition);
+
+ // Returns a redirect chain in |redirects| for the VisitID
+ // |cur_visit|. |cur_visit| is assumed to be valid. Assumes that
+ // this HistoryBackend object has been Init()ed successfully.
+ void GetRedirectsFromSpecificVisit(
+ VisitID cur_visit, history::RedirectList* redirects);
+
+ // Similar to the above function except returns a redirect list ending
+ // at |cur_visit|.
+ void GetRedirectsToSpecificVisit(
+ VisitID cur_visit, history::RedirectList* redirects);
+
+ // Thumbnail Helpers ---------------------------------------------------------
+
+ // When a simple GetMostRecentRedirectsFrom() fails, this method is
+ // called which searches the last N visit sessions instead of just
+ // the current one. Returns true and puts thumbnail data in |data|
+ // if a proper thumbnail was found. Returns false otherwise. Assumes
+ // that this HistoryBackend object has been Init()ed successfully.
+ bool GetThumbnailFromOlderRedirect(
+ const GURL& page_url, std::vector<unsigned char>* data);
+
+ // Querying ------------------------------------------------------------------
+
+ // Backends for QueryHistory. *Basic() handles queries that are not FTS (full
+ // text search) queries and can just be given directly to the history DB).
+ // The FTS version queries the text_database, then merges with the history DB.
+ // Both functions assume QueryHistory already checked the DB for validity.
+ void QueryHistoryBasic(URLDatabase* url_db, VisitDatabase* visit_db,
+ const QueryOptions& options, QueryResults* result);
+ void QueryHistoryFTS(const string16& text_query,
+ const QueryOptions& options,
+ QueryResults* result);
+
+ // Committing ----------------------------------------------------------------
+
+ // We always keep a transaction open on the history database so that multiple
+ // transactions can be batched. Periodically, these are flushed (use
+ // ScheduleCommit). This function does the commit to write any new changes to
+ // disk and opens a new transaction. This will be called automatically by
+ // ScheduleCommit, or it can be called explicitly if a caller really wants
+ // to write something to disk.
+ void Commit();
+
+ // Schedules a commit to happen in the future. We do this so that many
+ // operations over a period of time will be batched together. If there is
+ // already a commit scheduled for the future, this will do nothing.
+ void ScheduleCommit();
+
+ // Cancels the scheduled commit, if any. If there is no scheduled commit,
+ // does nothing.
+ void CancelScheduledCommit();
+
+ // Segments ------------------------------------------------------------------
+
+ // Walks back a segment chain to find the last visit with a non null segment
+ // id and returns it. If there is none found, returns 0.
+ SegmentID GetLastSegmentID(VisitID from_visit);
+
+ // Update the segment information. This is called internally when a page is
+ // added. Return the segment id of the segment that has been updated.
+ SegmentID UpdateSegments(const GURL& url,
+ VisitID from_visit,
+ VisitID visit_id,
+ PageTransition::Type transition_type,
+ const base::Time ts);
+
+ // Favicons ------------------------------------------------------------------
+
+ // Used by both UpdateFavIconMappingAndFetch and GetFavIcon.
+ // If page_url is non-null and SetFavIcon has previously been invoked for
+ // icon_url the favicon url for page_url (and all redirects) is set to
+ // icon_url.
+ void UpdateFavIconMappingAndFetchImpl(
+ const GURL* page_url,
+ const GURL& icon_url,
+ scoped_refptr<GetFavIconRequest> request);
+
+ // Sets the favicon url id for page_url to id. This will also broadcast
+ // notifications as necessary.
+ void SetFavIconMapping(const GURL& page_url, FavIconID id);
+
+ // Generic stuff -------------------------------------------------------------
+
+ // Processes the next scheduled HistoryDBTask, scheduling this method
+ // to be invoked again if there are more tasks that need to run.
+ void ProcessDBTaskImpl();
+
+ // Release all tasks in history_db_tasks_ and clears it.
+ void ReleaseDBTasks();
+
+ // Schedules a broadcast of the given notification on the main thread. The
+ // details argument will have ownership taken by this function (it will be
+ // sent to the main thread and deleted there).
+ void BroadcastNotifications(NotificationType type,
+ HistoryDetails* details_deleted);
+
+ // Deleting all history ------------------------------------------------------
+
+ // Deletes all history. This is a special case of deleting that is separated
+ // from our normal dependency-following method for performance reasons. The
+ // logic lives here instead of ExpireHistoryBackend since it will cause
+ // re-initialization of some databases such as Thumbnails or Archived that
+ // could fail. When these databases are not valid, our pointers must be NULL,
+ // so we need to handle this type of operation to keep the pointers in sync.
+ void DeleteAllHistory();
+
+ // Given a vector of all URLs that we will keep, removes all thumbnails
+ // referenced by any URL, and also all favicons that aren't used by those
+ // URLs. The favicon IDs will change, so this will update the url rows in the
+ // vector to reference the new IDs.
+ bool ClearAllThumbnailHistory(std::vector<URLRow>* kept_urls);
+
+ // Deletes all information in the history database, except for the supplied
+ // set of URLs in the URL table (these should correspond to the bookmarked
+ // URLs).
+ //
+ // The IDs of the URLs may change.
+ bool ClearAllMainHistory(const std::vector<URLRow>& kept_urls);
+
+ // Returns the BookmarkService, blocking until it is loaded. This may return
+ // NULL during testing.
+ BookmarkService* GetBookmarkService();
+
+ // Data ----------------------------------------------------------------------
+
+ // Delegate. See the class definition above for more information. This will
+ // be NULL before Init is called and after Cleanup, but is guaranteed
+ // non-NULL in between.
+ scoped_ptr<Delegate> delegate_;
+
+ // Directory where database files will be stored.
+ FilePath history_dir_;
+
+ // The history/thumbnail databases. Either MAY BE NULL if the database could
+ // not be opened, all users must first check for NULL and return immediately
+ // if it is. The thumbnail DB may be NULL when the history one isn't, but not
+ // vice-versa.
+ scoped_ptr<HistoryDatabase> db_;
+ scoped_ptr<ThumbnailDatabase> thumbnail_db_;
+
+ // Stores old history in a larger, slower database.
+ scoped_ptr<ArchivedDatabase> archived_db_;
+
+ // Full text database manager, possibly NULL if the database could not be
+ // created.
+ scoped_ptr<TextDatabaseManager> text_database_;
+
+ // Manages expiration between the various databases.
+ ExpireHistoryBackend expirer_;
+
+ // A commit has been scheduled to occur sometime in the future. We can check
+ // non-null-ness to see if there is a commit scheduled in the future, and we
+ // can use the pointer to cancel the scheduled commit. There can be only one
+ // scheduled commit at a time (see ScheduleCommit).
+ scoped_refptr<CommitLaterTask> scheduled_commit_;
+
+ // Maps recent redirect destination pages to the chain of redirects that
+ // brought us to there. Pages that did not have redirects or were not the
+ // final redirect in a chain will not be in this list, as well as pages that
+ // redirected "too long" ago (as determined by ExpireOldRedirects above).
+ // It is used to set titles & favicons for redirects to that of the
+ // destination.
+ //
+ // As with AddPage, the last item in the redirect chain will be the
+ // destination of the redirect (i.e., the key into recent_redirects_);
+ typedef MRUCache<GURL, history::RedirectList> RedirectCache;
+ RedirectCache recent_redirects_;
+
+ // Timestamp of the last page addition request. We use this to detect when
+ // multiple additions are requested at the same time (within the resolution
+ // of the timer), so we can try to ensure they're unique when they're added
+ // to the database by using the last_recorded_time_ (q.v.). We still can't
+ // enforce or guarantee uniqueness, since the user might set his clock back.
+ base::Time last_requested_time_;
+
+ // Timestamp of the last page addition, as it was recorded in the database.
+ // If two or more requests come in at the same time, we increment that time
+ // by 1 us between them so it's more likely to be unique in the database.
+ // This keeps track of that higher-resolution timestamp.
+ base::Time last_recorded_time_;
+
+ // Timestamp of the first entry in our database.
+ base::Time first_recorded_time_;
+
+ // When non-NULL, this is the task that should be invoked on
+ MessageLoop* backend_destroy_message_loop_;
+ Task* backend_destroy_task_;
+
+ // Tracks page transition types.
+ VisitTracker tracker_;
+
+ // A boolean variable to track whether we have already purged obsolete segment
+ // data.
+ bool segment_queried_;
+
+ // HistoryDBTasks to run. Be sure to AddRef when adding, and Release when
+ // done.
+ std::list<HistoryDBTaskRequest*> db_task_requests_;
+
+ // Used to determine if a URL is bookmarked. This is owned by the Profile and
+ // may be NULL (during testing).
+ //
+ // Use GetBookmarkService to access this, which makes sure the service is
+ // loaded.
+ BookmarkService* bookmark_service_;
+
+ // Publishes the history to all indexers which are registered to receive
+ // history data from us. Can be NULL if there are no listeners.
+ scoped_ptr<HistoryPublisher> history_publisher_;
+
+ DISALLOW_COPY_AND_ASSIGN(HistoryBackend);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_HISTORY_BACKEND_H_
diff --git a/chrome/browser/history/history_backend_unittest.cc b/chrome/browser/history/history_backend_unittest.cc
new file mode 100644
index 0000000..24cc1cd
--- /dev/null
+++ b/chrome/browser/history/history_backend_unittest.cc
@@ -0,0 +1,606 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/file_path.h"
+#include "base/file_util.h"
+#include "base/path_service.h"
+#include "base/ref_counted.h"
+#include "base/scoped_ptr.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/bookmarks/bookmark_model.h"
+#include "chrome/browser/history/history_backend.h"
+#include "chrome/browser/history/history_notifications.h"
+#include "chrome/browser/history/in_memory_history_backend.h"
+#include "chrome/browser/history/in_memory_database.h"
+#include "chrome/common/notification_service.h"
+#include "chrome/common/thumbnail_score.h"
+#include "chrome/tools/profiles/thumbnail-inl.h"
+#include "gfx/codec/jpeg_codec.h"
+#include "googleurl/src/gurl.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using base::Time;
+
+// This file only tests functionality where it is most convenient to call the
+// backend directly. Most of the history backend functions are tested by the
+// history unit test. Because of the elaborate callbacks involved, this is no
+// harder than calling it directly for many things.
+
+namespace history {
+
+class HistoryBackendTest;
+
+// This must be a separate object since HistoryBackend manages its lifetime.
+// This just forwards the messages we're interested in to the test object.
+class HistoryBackendTestDelegate : public HistoryBackend::Delegate {
+ public:
+ explicit HistoryBackendTestDelegate(HistoryBackendTest* test) : test_(test) {}
+
+ virtual void NotifyProfileError(int message_id) {}
+ virtual void SetInMemoryBackend(InMemoryHistoryBackend* backend);
+ virtual void BroadcastNotifications(NotificationType type,
+ HistoryDetails* details);
+ virtual void DBLoaded();
+ virtual void StartTopSitesMigration();
+
+ private:
+ // Not owned by us.
+ HistoryBackendTest* test_;
+
+ DISALLOW_COPY_AND_ASSIGN(HistoryBackendTestDelegate);
+};
+
+class HistoryBackendTest : public testing::Test {
+ public:
+ HistoryBackendTest() : bookmark_model_(NULL), loaded_(false) {}
+ virtual ~HistoryBackendTest() {
+ }
+
+ protected:
+ scoped_refptr<HistoryBackend> backend_; // Will be NULL on init failure.
+ scoped_ptr<InMemoryHistoryBackend> mem_backend_;
+
+ void AddRedirectChain(const char* sequence[], int page_id) {
+ history::RedirectList redirects;
+ for (int i = 0; sequence[i] != NULL; ++i)
+ redirects.push_back(GURL(sequence[i]));
+
+ int int_scope = 1;
+ void* scope = 0;
+ memcpy(&scope, &int_scope, sizeof(int_scope));
+ scoped_refptr<history::HistoryAddPageArgs> request(
+ new history::HistoryAddPageArgs(
+ redirects.back(), Time::Now(), scope, page_id, GURL(),
+ redirects, PageTransition::LINK, true));
+ backend_->AddPage(request);
+ }
+
+ // Adds CLIENT_REDIRECT page transition.
+ // |url1| is the source URL and |url2| is the destination.
+ // |did_replace| is true if the transition is non-user initiated and the
+ // navigation entry for |url2| has replaced that for |url1|. The possibly
+ // updated transition code of the visit records for |url1| and |url2| is
+ // returned by filling in |*transition1| and |*transition2|, respectively.
+ void AddClientRedirect(const GURL& url1, const GURL& url2, bool did_replace,
+ int* transition1, int* transition2) {
+ void* const dummy_scope = reinterpret_cast<void*>(0x87654321);
+ history::RedirectList redirects;
+ if (url1.is_valid())
+ redirects.push_back(url1);
+ if (url2.is_valid())
+ redirects.push_back(url2);
+ scoped_refptr<HistoryAddPageArgs> request(
+ new HistoryAddPageArgs(url2, base::Time(), dummy_scope, 0, url1,
+ redirects, PageTransition::CLIENT_REDIRECT, did_replace));
+ backend_->AddPage(request);
+
+ *transition1 = getTransition(url1);
+ *transition2 = getTransition(url2);
+ }
+
+ int getTransition(const GURL& url) {
+ if (!url.is_valid())
+ return 0;
+ URLRow row;
+ URLID id = backend_->db()->GetRowForURL(url, &row);
+ VisitVector visits;
+ EXPECT_TRUE(backend_->db()->GetVisitsForURL(id, &visits));
+ return visits[0].transition;
+ }
+
+ BookmarkModel bookmark_model_;
+
+ protected:
+ bool loaded_;
+
+ private:
+ friend class HistoryBackendTestDelegate;
+
+ // testing::Test
+ virtual void SetUp() {
+ if (!file_util::CreateNewTempDirectory(FILE_PATH_LITERAL("BackendTest"),
+ &test_dir_))
+ return;
+ backend_ = new HistoryBackend(test_dir_,
+ new HistoryBackendTestDelegate(this),
+ &bookmark_model_);
+ backend_->Init(false);
+ }
+ virtual void TearDown() {
+ backend_->Closing();
+ backend_ = NULL;
+ mem_backend_.reset();
+ file_util::Delete(test_dir_, true);
+ }
+
+ void SetInMemoryBackend(InMemoryHistoryBackend* backend) {
+ mem_backend_.reset(backend);
+ }
+
+ void BroadcastNotifications(NotificationType type,
+ HistoryDetails* details) {
+ // Send the notifications directly to the in-memory database.
+ Details<HistoryDetails> det(details);
+ mem_backend_->Observe(type, Source<HistoryBackendTest>(NULL), det);
+
+ // The backend passes ownership of the details pointer to us.
+ delete details;
+ }
+
+ MessageLoop message_loop_;
+ FilePath test_dir_;
+};
+
+void HistoryBackendTestDelegate::SetInMemoryBackend(
+ InMemoryHistoryBackend* backend) {
+ test_->SetInMemoryBackend(backend);
+}
+
+void HistoryBackendTestDelegate::BroadcastNotifications(
+ NotificationType type,
+ HistoryDetails* details) {
+ test_->BroadcastNotifications(type, details);
+}
+
+void HistoryBackendTestDelegate::DBLoaded() {
+ test_->loaded_ = true;
+}
+
+void HistoryBackendTestDelegate::StartTopSitesMigration() {
+ test_->backend_->MigrateThumbnailsDatabase();
+}
+
+TEST_F(HistoryBackendTest, Loaded) {
+ ASSERT_TRUE(backend_.get());
+ ASSERT_TRUE(loaded_);
+}
+
+TEST_F(HistoryBackendTest, DeleteAll) {
+ ASSERT_TRUE(backend_.get());
+
+ // Add two favicons, use the characters '1' and '2' for the image data. Note
+ // that we do these in the opposite order. This is so the first one gets ID
+ // 2 autoassigned to the database, which will change when the other one is
+ // deleted. This way we can test that updating works properly.
+ GURL favicon_url1("http://www.google.com/favicon.ico");
+ GURL favicon_url2("http://news.google.com/favicon.ico");
+ FavIconID favicon2 = backend_->thumbnail_db_->AddFavIcon(favicon_url2);
+ FavIconID favicon1 = backend_->thumbnail_db_->AddFavIcon(favicon_url1);
+
+ std::vector<unsigned char> data;
+ data.push_back('1');
+ EXPECT_TRUE(backend_->thumbnail_db_->SetFavIcon(favicon1,
+ new RefCountedBytes(data), Time::Now()));
+
+ data[0] = '2';
+ EXPECT_TRUE(backend_->thumbnail_db_->SetFavIcon(
+ favicon2, new RefCountedBytes(data), Time::Now()));
+
+ // First visit two URLs.
+ URLRow row1(GURL("http://www.google.com/"));
+ row1.set_visit_count(2);
+ row1.set_typed_count(1);
+ row1.set_last_visit(Time::Now());
+ row1.set_favicon_id(favicon1);
+
+ URLRow row2(GURL("http://news.google.com/"));
+ row2.set_visit_count(1);
+ row2.set_last_visit(Time::Now());
+ row2.set_favicon_id(favicon2);
+
+ std::vector<URLRow> rows;
+ rows.push_back(row2); // Reversed order for the same reason as favicons.
+ rows.push_back(row1);
+ backend_->AddPagesWithDetails(rows);
+
+ URLID row1_id = backend_->db_->GetRowForURL(row1.url(), NULL);
+ URLID row2_id = backend_->db_->GetRowForURL(row2.url(), NULL);
+
+ // Get the two visits for the URLs we just added.
+ VisitVector visits;
+ backend_->db_->GetVisitsForURL(row1_id, &visits);
+ ASSERT_EQ(1U, visits.size());
+ VisitID visit1_id = visits[0].visit_id;
+
+ visits.clear();
+ backend_->db_->GetVisitsForURL(row2_id, &visits);
+ ASSERT_EQ(1U, visits.size());
+ VisitID visit2_id = visits[0].visit_id;
+
+ // The in-memory backend should have been set and it should have gotten the
+ // typed URL.
+ ASSERT_TRUE(mem_backend_.get());
+ URLRow outrow1;
+ EXPECT_TRUE(mem_backend_->db_->GetRowForURL(row1.url(), NULL));
+
+ // Add thumbnails for each page.
+ ThumbnailScore score(0.25, true, true);
+ scoped_ptr<SkBitmap> google_bitmap(
+ gfx::JPEGCodec::Decode(kGoogleThumbnail, sizeof(kGoogleThumbnail)));
+
+ Time time;
+ GURL gurl;
+ backend_->thumbnail_db_->SetPageThumbnail(gurl, row1_id, *google_bitmap,
+ score, time);
+ scoped_ptr<SkBitmap> weewar_bitmap(
+ gfx::JPEGCodec::Decode(kWeewarThumbnail, sizeof(kWeewarThumbnail)));
+ backend_->thumbnail_db_->SetPageThumbnail(gurl, row2_id, *weewar_bitmap,
+ score, time);
+
+ // Star row1.
+ bookmark_model_.AddURL(
+ bookmark_model_.GetBookmarkBarNode(), 0, std::wstring(), row1.url());
+
+ // Set full text index for each one.
+ backend_->text_database_->AddPageData(row1.url(), row1_id, visit1_id,
+ row1.last_visit(),
+ UTF8ToUTF16("Title 1"),
+ UTF8ToUTF16("Body 1"));
+ backend_->text_database_->AddPageData(row2.url(), row2_id, visit2_id,
+ row2.last_visit(),
+ UTF8ToUTF16("Title 2"),
+ UTF8ToUTF16("Body 2"));
+
+ // Now finally clear all history.
+ backend_->DeleteAllHistory();
+
+ // The first URL should be preserved but the time should be cleared.
+ EXPECT_TRUE(backend_->db_->GetRowForURL(row1.url(), &outrow1));
+ EXPECT_EQ(0, outrow1.visit_count());
+ EXPECT_EQ(0, outrow1.typed_count());
+ EXPECT_TRUE(Time() == outrow1.last_visit());
+
+ // The second row should be deleted.
+ URLRow outrow2;
+ EXPECT_FALSE(backend_->db_->GetRowForURL(row2.url(), &outrow2));
+
+ // All visits should be deleted for both URLs.
+ VisitVector all_visits;
+ backend_->db_->GetAllVisitsInRange(Time(), Time(), 0, &all_visits);
+ ASSERT_EQ(0U, all_visits.size());
+
+ // All thumbnails should be deleted.
+ std::vector<unsigned char> out_data;
+ EXPECT_FALSE(backend_->thumbnail_db_->GetPageThumbnail(outrow1.id(),
+ &out_data));
+ EXPECT_FALSE(backend_->thumbnail_db_->GetPageThumbnail(row2_id, &out_data));
+
+ // We should have a favicon for the first URL only. We look them up by favicon
+ // URL since the IDs may hav changed.
+ FavIconID out_favicon1 = backend_->thumbnail_db_->
+ GetFavIconIDForFavIconURL(favicon_url1);
+ EXPECT_TRUE(out_favicon1);
+ FavIconID out_favicon2 = backend_->thumbnail_db_->
+ GetFavIconIDForFavIconURL(favicon_url2);
+ EXPECT_FALSE(out_favicon2) << "Favicon not deleted";
+
+ // The remaining URL should still reference the same favicon, even if its
+ // ID has changed.
+ EXPECT_EQ(out_favicon1, outrow1.favicon_id());
+
+ // The first URL should still be bookmarked.
+ EXPECT_TRUE(bookmark_model_.IsBookmarked(row1.url()));
+
+ // The full text database should have no data.
+ std::vector<TextDatabase::Match> text_matches;
+ Time first_time_searched;
+ backend_->text_database_->GetTextMatches(UTF8ToUTF16("Body"),
+ QueryOptions(),
+ &text_matches,
+ &first_time_searched);
+ EXPECT_EQ(0U, text_matches.size());
+}
+
+TEST_F(HistoryBackendTest, URLsNoLongerBookmarked) {
+ GURL favicon_url1("http://www.google.com/favicon.ico");
+ GURL favicon_url2("http://news.google.com/favicon.ico");
+ FavIconID favicon2 = backend_->thumbnail_db_->AddFavIcon(favicon_url2);
+ FavIconID favicon1 = backend_->thumbnail_db_->AddFavIcon(favicon_url1);
+
+ std::vector<unsigned char> data;
+ data.push_back('1');
+ EXPECT_TRUE(backend_->thumbnail_db_->SetFavIcon(
+ favicon1, new RefCountedBytes(data), Time::Now()));
+
+ data[0] = '2';
+ EXPECT_TRUE(backend_->thumbnail_db_->SetFavIcon(
+ favicon2, new RefCountedBytes(data), Time::Now()));
+
+ // First visit two URLs.
+ URLRow row1(GURL("http://www.google.com/"));
+ row1.set_visit_count(2);
+ row1.set_typed_count(1);
+ row1.set_last_visit(Time::Now());
+ row1.set_favicon_id(favicon1);
+
+ URLRow row2(GURL("http://news.google.com/"));
+ row2.set_visit_count(1);
+ row2.set_last_visit(Time::Now());
+ row2.set_favicon_id(favicon2);
+
+ std::vector<URLRow> rows;
+ rows.push_back(row2); // Reversed order for the same reason as favicons.
+ rows.push_back(row1);
+ backend_->AddPagesWithDetails(rows);
+
+ URLID row1_id = backend_->db_->GetRowForURL(row1.url(), NULL);
+ URLID row2_id = backend_->db_->GetRowForURL(row2.url(), NULL);
+
+ // Star the two URLs.
+ bookmark_model_.SetURLStarred(row1.url(), std::wstring(), true);
+ bookmark_model_.SetURLStarred(row2.url(), std::wstring(), true);
+
+ // Delete url 2. Because url 2 is starred this won't delete the URL, only
+ // the visits.
+ backend_->expirer_.DeleteURL(row2.url());
+
+ // Make sure url 2 is still valid, but has no visits.
+ URLRow tmp_url_row;
+ EXPECT_EQ(row2_id, backend_->db_->GetRowForURL(row2.url(), NULL));
+ VisitVector visits;
+ backend_->db_->GetVisitsForURL(row2_id, &visits);
+ EXPECT_EQ(0U, visits.size());
+ // The favicon should still be valid.
+ EXPECT_EQ(favicon2,
+ backend_->thumbnail_db_->GetFavIconIDForFavIconURL(favicon_url2));
+
+ // Unstar row2.
+ bookmark_model_.SetURLStarred(row2.url(), std::wstring(), false);
+ // Tell the backend it was unstarred. We have to explicitly do this as
+ // BookmarkModel isn't wired up to the backend during testing.
+ std::set<GURL> unstarred_urls;
+ unstarred_urls.insert(row2.url());
+ backend_->URLsNoLongerBookmarked(unstarred_urls);
+
+ // The URL should no longer exist.
+ EXPECT_FALSE(backend_->db_->GetRowForURL(row2.url(), &tmp_url_row));
+ // And the favicon should be deleted.
+ EXPECT_EQ(0,
+ backend_->thumbnail_db_->GetFavIconIDForFavIconURL(favicon_url2));
+
+ // Unstar row 1.
+ bookmark_model_.SetURLStarred(row1.url(), std::wstring(), false);
+ // Tell the backend it was unstarred. We have to explicitly do this as
+ // BookmarkModel isn't wired up to the backend during testing.
+ unstarred_urls.clear();
+ unstarred_urls.insert(row1.url());
+ backend_->URLsNoLongerBookmarked(unstarred_urls);
+
+ // The URL should still exist (because there were visits).
+ EXPECT_EQ(row1_id, backend_->db_->GetRowForURL(row1.url(), NULL));
+
+ // There should still be visits.
+ visits.clear();
+ backend_->db_->GetVisitsForURL(row1_id, &visits);
+ EXPECT_EQ(1U, visits.size());
+
+ // The favicon should still be valid.
+ EXPECT_EQ(favicon1,
+ backend_->thumbnail_db_->GetFavIconIDForFavIconURL(favicon_url1));
+}
+
+TEST_F(HistoryBackendTest, GetPageThumbnailAfterRedirects) {
+ ASSERT_TRUE(backend_.get());
+
+ const char* base_url = "http://mail";
+ const char* thumbnail_url = "http://mail.google.com";
+ const char* first_chain[] = {
+ base_url,
+ thumbnail_url,
+ NULL
+ };
+ AddRedirectChain(first_chain, 0);
+
+ // Add a thumbnail for the end of that redirect chain.
+ scoped_ptr<SkBitmap> thumbnail(
+ gfx::JPEGCodec::Decode(kGoogleThumbnail, sizeof(kGoogleThumbnail)));
+ backend_->SetPageThumbnail(GURL(thumbnail_url), *thumbnail,
+ ThumbnailScore(0.25, true, true));
+
+ // Write a second URL chain so that if you were to simply check what
+ // "http://mail" redirects to, you wouldn't see the URL that has
+ // contains the thumbnail.
+ const char* second_chain[] = {
+ base_url,
+ "http://mail.google.com/somewhere/else",
+ NULL
+ };
+ AddRedirectChain(second_chain, 1);
+
+ // Now try to get the thumbnail for the base url. It shouldn't be
+ // distracted by the second chain and should return the thumbnail
+ // attached to thumbnail_url_.
+ scoped_refptr<RefCountedBytes> data;
+ backend_->GetPageThumbnailDirectly(GURL(base_url), &data);
+
+ EXPECT_TRUE(data.get());
+}
+
+// Tests a handful of assertions for a navigation with a type of
+// KEYWORD_GENERATED.
+TEST_F(HistoryBackendTest, KeywordGenerated) {
+ ASSERT_TRUE(backend_.get());
+
+ GURL url("http://google.com");
+
+ Time visit_time = Time::Now() - base::TimeDelta::FromDays(1);
+ scoped_refptr<HistoryAddPageArgs> request(
+ new HistoryAddPageArgs(url, visit_time, NULL, 0, GURL(),
+ history::RedirectList(),
+ PageTransition::KEYWORD_GENERATED, false));
+ backend_->AddPage(request);
+
+ // A row should have been added for the url.
+ URLRow row;
+ URLID url_id = backend_->db()->GetRowForURL(url, &row);
+ ASSERT_NE(0, url_id);
+
+ // The typed count should be 1.
+ ASSERT_EQ(1, row.typed_count());
+
+ // KEYWORD_GENERATED urls should not be added to the segment db.
+ std::string segment_name = VisitSegmentDatabase::ComputeSegmentName(url);
+ EXPECT_EQ(0, backend_->db()->GetSegmentNamed(segment_name));
+
+ // One visit should be added.
+ VisitVector visits;
+ EXPECT_TRUE(backend_->db()->GetVisitsForURL(url_id, &visits));
+ EXPECT_EQ(1U, visits.size());
+
+ // But no visible visits.
+ visits.clear();
+ backend_->db()->GetVisibleVisitsInRange(base::Time(), base::Time(), 1,
+ &visits);
+ EXPECT_TRUE(visits.empty());
+
+ // Expire the visits.
+ std::set<GURL> restrict_urls;
+ backend_->expire_backend()->ExpireHistoryBetween(restrict_urls,
+ visit_time, Time::Now());
+
+ // The visit should have been nuked.
+ visits.clear();
+ EXPECT_TRUE(backend_->db()->GetVisitsForURL(url_id, &visits));
+ EXPECT_TRUE(visits.empty());
+
+ // As well as the url.
+ ASSERT_EQ(0, backend_->db()->GetRowForURL(url, &row));
+}
+
+TEST_F(HistoryBackendTest, ClientRedirect) {
+ ASSERT_TRUE(backend_.get());
+
+ int transition1;
+ int transition2;
+
+ // Initial transition to page A.
+ GURL url_a("http://google.com/a");
+ AddClientRedirect(GURL(), url_a, false, &transition1, &transition2);
+ EXPECT_TRUE(transition2 & PageTransition::CHAIN_END);
+
+ // User initiated redirect to page B.
+ GURL url_b("http://google.com/b");
+ AddClientRedirect(url_a, url_b, false, &transition1, &transition2);
+ EXPECT_TRUE(transition1 & PageTransition::CHAIN_END);
+ EXPECT_TRUE(transition2 & PageTransition::CHAIN_END);
+
+ // Non-user initiated redirect to page C.
+ GURL url_c("http://google.com/c");
+ AddClientRedirect(url_b, url_c, true, &transition1, &transition2);
+ EXPECT_FALSE(transition1 & PageTransition::CHAIN_END);
+ EXPECT_TRUE(transition2 & PageTransition::CHAIN_END);
+}
+
+TEST_F(HistoryBackendTest, ImportedFaviconsTest) {
+ // Setup test data - two Urls in the history, one with favicon assigned and
+ // one without.
+ GURL favicon_url1("http://www.google.com/favicon.ico");
+ FavIconID favicon1 = backend_->thumbnail_db_->AddFavIcon(favicon_url1);
+ std::vector<unsigned char> data;
+ data.push_back('1');
+ EXPECT_TRUE(backend_->thumbnail_db_->SetFavIcon(favicon1,
+ RefCountedBytes::TakeVector(&data), Time::Now()));
+ URLRow row1(GURL("http://www.google.com/"));
+ row1.set_favicon_id(favicon1);
+ row1.set_visit_count(1);
+ row1.set_last_visit(Time::Now());
+ URLRow row2(GURL("http://news.google.com/"));
+ row2.set_visit_count(1);
+ row2.set_last_visit(Time::Now());
+ std::vector<URLRow> rows;
+ rows.push_back(row1);
+ rows.push_back(row2);
+ backend_->AddPagesWithDetails(rows);
+ URLRow url_row1, url_row2;
+ EXPECT_FALSE(backend_->db_->GetRowForURL(row1.url(), &url_row1) == 0);
+ EXPECT_FALSE(backend_->db_->GetRowForURL(row2.url(), &url_row2) == 0);
+ EXPECT_FALSE(url_row1.favicon_id() == 0);
+ EXPECT_TRUE(url_row2.favicon_id() == 0);
+
+ // Now provide one imported favicon for both URLs already in the registry.
+ // The new favicon should only be used with the URL that doesn't already have
+ // a favicon.
+ std::vector<history::ImportedFavIconUsage> favicons;
+ history::ImportedFavIconUsage favicon;
+ favicon.favicon_url = GURL("http://news.google.com/favicon.ico");
+ favicon.png_data.push_back('2');
+ favicon.urls.insert(row1.url());
+ favicon.urls.insert(row2.url());
+ favicons.push_back(favicon);
+ backend_->SetImportedFavicons(favicons);
+ EXPECT_FALSE(backend_->db_->GetRowForURL(row1.url(), &url_row1) == 0);
+ EXPECT_FALSE(backend_->db_->GetRowForURL(row2.url(), &url_row2) == 0);
+ EXPECT_FALSE(url_row1.favicon_id() == 0);
+ EXPECT_FALSE(url_row2.favicon_id() == 0);
+ EXPECT_FALSE(url_row1.favicon_id() == url_row2.favicon_id());
+
+ // A URL should not be added to history (to store favicon), if
+ // the URL is not bookmarked.
+ GURL url3("http://mail.google.com");
+ favicons.clear();
+ favicon.favicon_url = GURL("http://mail.google.com/favicon.ico");
+ favicon.png_data.push_back('3');
+ favicon.urls.insert(url3);
+ favicons.push_back(favicon);
+ backend_->SetImportedFavicons(favicons);
+ URLRow url_row3;
+ EXPECT_TRUE(backend_->db_->GetRowForURL(url3, &url_row3) == 0);
+
+ // If the URL is bookmarked, it should get added to history with 0 visits.
+ bookmark_model_.AddURL(bookmark_model_.GetBookmarkBarNode(), 0,
+ std::wstring(), url3);
+ backend_->SetImportedFavicons(favicons);
+ EXPECT_FALSE(backend_->db_->GetRowForURL(url3, &url_row3) == 0);
+ EXPECT_TRUE(url_row3.visit_count() == 0);
+}
+
+TEST_F(HistoryBackendTest, StripUsernamePasswordTest) {
+ ASSERT_TRUE(backend_.get());
+
+ GURL url("http://anyuser:anypass@www.google.com");
+ GURL stripped_url("http://www.google.com");
+
+ // Clear all history.
+ backend_->DeleteAllHistory();
+
+ // Visit the url with username, password.
+ backend_->AddPageVisit(url, base::Time::Now(), 0,
+ PageTransition::GetQualifier(PageTransition::TYPED));
+
+ // Fetch the row information about stripped url from history db.
+ VisitVector visits;
+ URLID row_id = backend_->db_->GetRowForURL(stripped_url, NULL);
+ backend_->db_->GetVisitsForURL(row_id, &visits);
+
+ // Check if stripped url is stored in database.
+ ASSERT_EQ(1U, visits.size());
+}
+
+TEST_F(HistoryBackendTest, DeleteThumbnailsDatabaseTest) {
+ EXPECT_TRUE(backend_->thumbnail_db_->NeedsMigrationToTopSites());
+ backend_->delegate_->StartTopSitesMigration();
+ EXPECT_FALSE(backend_->thumbnail_db_->NeedsMigrationToTopSites());
+}
+
+} // namespace history
diff --git a/chrome/browser/history/history_database.cc b/chrome/browser/history/history_database.cc
new file mode 100644
index 0000000..26bb81a
--- /dev/null
+++ b/chrome/browser/history/history_database.cc
@@ -0,0 +1,337 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/history_database.h"
+
+#include <algorithm>
+#include <set>
+#include <string>
+#include "app/sql/transaction.h"
+#include "base/command_line.h"
+#include "base/file_util.h"
+#if defined(OS_MACOSX)
+#include "base/mac_util.h"
+#endif
+#include "base/histogram.h"
+#include "base/rand_util.h"
+#include "base/string_util.h"
+#include "chrome/browser/diagnostics/sqlite_diagnostics.h"
+#include "chrome/common/chrome_switches.h"
+
+namespace history {
+
+namespace {
+
+// Current version number. We write databases at the "current" version number,
+// but any previous version that can read the "compatible" one can make do with
+// or database without *too* many bad effects.
+static const int kCurrentVersionNumber = 18;
+static const int kCompatibleVersionNumber = 16;
+static const char kEarlyExpirationThresholdKey[] = "early_expiration_threshold";
+
+void ComputeDatabaseMetrics(const FilePath& history_name,
+ sql::Connection& db) {
+ if (base::RandInt(1, 100) != 50)
+ return; // Only do this computation sometimes since it can be expensive.
+
+ int64 file_size = 0;
+ if (!file_util::GetFileSize(history_name, &file_size))
+ return;
+ int file_mb = static_cast<int>(file_size / (1024 * 1024));
+ UMA_HISTOGRAM_MEMORY_MB("History.DatabaseFileMB", file_mb);
+
+ sql::Statement url_count(db.GetUniqueStatement("SELECT count(*) FROM urls"));
+ if (!url_count || !url_count.Step())
+ return;
+ UMA_HISTOGRAM_COUNTS("History.URLTableCount", url_count.ColumnInt(0));
+
+ sql::Statement visit_count(db.GetUniqueStatement(
+ "SELECT count(*) FROM visits"));
+ if (!visit_count || !visit_count.Step())
+ return;
+ UMA_HISTOGRAM_COUNTS("History.VisitTableCount", visit_count.ColumnInt(0));
+}
+
+} // namespace
+
+HistoryDatabase::HistoryDatabase()
+ : needs_version_17_migration_(false),
+ needs_version_18_migration_(false) {
+}
+
+HistoryDatabase::~HistoryDatabase() {
+}
+
+sql::InitStatus HistoryDatabase::Init(const FilePath& history_name,
+ const FilePath& bookmarks_path) {
+ // Set the exceptional sqlite error handler.
+ db_.set_error_delegate(GetErrorHandlerForHistoryDb());
+
+ // Set the database page size to something a little larger to give us
+ // better performance (we're typically seek rather than bandwidth limited).
+ // This only has an effect before any tables have been created, otherwise
+ // this is a NOP. Must be a power of 2 and a max of 8192.
+ db_.set_page_size(4096);
+
+ // Increase the cache size. The page size, plus a little extra, times this
+ // value, tells us how much memory the cache will use maximum.
+ // 6000 * 4MB = 24MB
+ // TODO(brettw) scale this value to the amount of available memory.
+ db_.set_cache_size(6000);
+
+ // Note that we don't set exclusive locking here. That's done by
+ // BeginExclusiveMode below which is called later (we have to be in shared
+ // mode to start out for the in-memory backend to read the data).
+
+ if (!db_.Open(history_name))
+ return sql::INIT_FAILURE;
+
+ // Wrap the rest of init in a tranaction. This will prevent the database from
+ // getting corrupted if we crash in the middle of initialization or migration.
+ sql::Transaction committer(&db_);
+ if (!committer.Begin())
+ return sql::INIT_FAILURE;
+
+#if defined(OS_MACOSX)
+ // Exclude the history file and its journal from backups.
+ mac_util::SetFileBackupExclusion(history_name, true);
+ FilePath::StringType history_name_string(history_name.value());
+ history_name_string += "-journal";
+ FilePath history_journal_name(history_name_string);
+ mac_util::SetFileBackupExclusion(history_journal_name, true);
+#endif
+
+ // Prime the cache.
+ db_.Preload();
+
+ // Create the tables and indices.
+ // NOTE: If you add something here, also add it to
+ // RecreateAllButStarAndURLTables.
+ if (!meta_table_.Init(&db_, GetCurrentVersion(), kCompatibleVersionNumber))
+ return sql::INIT_FAILURE;
+ if (!CreateURLTable(false) || !InitVisitTable() ||
+ !InitKeywordSearchTermsTable() || !InitDownloadTable() ||
+ !InitSegmentTables())
+ return sql::INIT_FAILURE;
+ CreateMainURLIndex();
+ CreateSupplimentaryURLIndices();
+
+ // Version check.
+ sql::InitStatus version_status = EnsureCurrentVersion(bookmarks_path);
+ if (version_status != sql::INIT_OK)
+ return version_status;
+
+ ComputeDatabaseMetrics(history_name, db_);
+ return committer.Commit() ? sql::INIT_OK : sql::INIT_FAILURE;
+}
+
+void HistoryDatabase::BeginExclusiveMode() {
+ // We can't use set_exclusive_locking() since that only has an effect before
+ // the DB is opened.
+ db_.Execute("PRAGMA locking_mode=EXCLUSIVE");
+}
+
+// static
+int HistoryDatabase::GetCurrentVersion() {
+ // Temporary solution while TopSites is behind a flag. If there is
+ // no flag, we are still using the Thumbnails file, i.e. we are at
+ // version 17.
+ if (CommandLine::ForCurrentProcess()->HasSwitch(switches::kTopSites)) {
+ return kCurrentVersionNumber;
+ } else {
+ return kCurrentVersionNumber - 1;
+ }
+}
+
+void HistoryDatabase::BeginTransaction() {
+ db_.BeginTransaction();
+}
+
+void HistoryDatabase::CommitTransaction() {
+ db_.CommitTransaction();
+}
+
+bool HistoryDatabase::RecreateAllTablesButURL() {
+ if (!DropVisitTable())
+ return false;
+ if (!InitVisitTable())
+ return false;
+
+ if (!DropKeywordSearchTermsTable())
+ return false;
+ if (!InitKeywordSearchTermsTable())
+ return false;
+
+ if (!DropSegmentTables())
+ return false;
+ if (!InitSegmentTables())
+ return false;
+
+ // We also add the supplementary URL indices at this point. This index is
+ // over parts of the URL table that weren't automatically created when the
+ // temporary URL table was
+ CreateSupplimentaryURLIndices();
+ return true;
+}
+
+void HistoryDatabase::Vacuum() {
+ DCHECK_EQ(0, db_.transaction_nesting()) <<
+ "Can not have a transaction when vacuuming.";
+ db_.Execute("VACUUM");
+}
+
+bool HistoryDatabase::SetSegmentID(VisitID visit_id, SegmentID segment_id) {
+ sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE visits SET segment_id = ? WHERE id = ?"));
+ if (!s) {
+ NOTREACHED() << db_.GetErrorMessage();
+ return false;
+ }
+ s.BindInt64(0, segment_id);
+ s.BindInt64(1, visit_id);
+ DCHECK(db_.GetLastChangeCount() == 1);
+ return s.Run();
+}
+
+SegmentID HistoryDatabase::GetSegmentID(VisitID visit_id) {
+ sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
+ "SELECT segment_id FROM visits WHERE id = ?"));
+ if (!s) {
+ NOTREACHED() << db_.GetErrorMessage();
+ return 0;
+ }
+
+ s.BindInt64(0, visit_id);
+ if (s.Step()) {
+ if (s.ColumnType(0) == sql::COLUMN_TYPE_NULL)
+ return 0;
+ else
+ return s.ColumnInt64(0);
+ }
+ return 0;
+}
+
+base::Time HistoryDatabase::GetEarlyExpirationThreshold() {
+ if (!cached_early_expiration_threshold_.is_null())
+ return cached_early_expiration_threshold_;
+
+ int64 threshold;
+ if (!meta_table_.GetValue(kEarlyExpirationThresholdKey, &threshold)) {
+ // Set to a very early non-zero time, so it's before all history, but not
+ // zero to avoid re-retrieval.
+ threshold = 1L;
+ }
+
+ cached_early_expiration_threshold_ = base::Time::FromInternalValue(threshold);
+ return cached_early_expiration_threshold_;
+}
+
+void HistoryDatabase::UpdateEarlyExpirationThreshold(base::Time threshold) {
+ meta_table_.SetValue(kEarlyExpirationThresholdKey,
+ threshold.ToInternalValue());
+ cached_early_expiration_threshold_ = threshold;
+}
+
+sql::Connection& HistoryDatabase::GetDB() {
+ return db_;
+}
+
+// Migration -------------------------------------------------------------------
+
+sql::InitStatus HistoryDatabase::EnsureCurrentVersion(
+ const FilePath& tmp_bookmarks_path) {
+ // We can't read databases newer than we were designed for.
+ if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
+ LOG(WARNING) << "History database is too new.";
+ return sql::INIT_TOO_NEW;
+ }
+
+ // NOTICE: If you are changing structures for things shared with the archived
+ // history file like URLs, visits, or downloads, that will need migration as
+ // well. Instead of putting such migration code in this class, it should be
+ // in the corresponding file (url_database.cc, etc.) and called from here and
+ // from the archived_database.cc.
+
+ int cur_version = meta_table_.GetVersionNumber();
+
+ // Put migration code here
+
+ if (cur_version == 15) {
+ if (!MigrateBookmarksToFile(tmp_bookmarks_path) ||
+ !DropStarredIDFromURLs()) {
+ LOG(WARNING) << "Unable to update history database to version 16.";
+ return sql::INIT_FAILURE;
+ }
+ ++cur_version;
+ meta_table_.SetVersionNumber(cur_version);
+ meta_table_.SetCompatibleVersionNumber(
+ std::min(cur_version, kCompatibleVersionNumber));
+ }
+
+ if (cur_version == 16) {
+#if !defined(OS_WIN)
+ // In this version we bring the time format on Mac & Linux in sync with the
+ // Windows version so that profiles can be moved between computers.
+ MigrateTimeEpoch();
+#endif
+ // On all platforms we bump the version number, so on Windows this
+ // migration is a NOP. We keep the compatible version at 16 since things
+ // will basically still work, just history will be in the future if an
+ // old version reads it.
+ ++cur_version;
+ meta_table_.SetVersionNumber(cur_version);
+ }
+
+ if (cur_version == 17)
+ needs_version_18_migration_ = true;
+
+ if (!CommandLine::ForCurrentProcess()->HasSwitch(switches::kTopSites) &&
+ cur_version == 18) {
+ // Set DB version back to pre-top sites.
+ cur_version = 17;
+ meta_table_.SetVersionNumber(cur_version);
+ }
+
+ // When the version is too old, we just try to continue anyway, there should
+ // not be a released product that makes a database too old for us to handle.
+ LOG_IF(WARNING, (cur_version < GetCurrentVersion() &&
+ !needs_version_18_migration_)) <<
+ "History database version " << cur_version << " is too old to handle.";
+
+ return sql::INIT_OK;
+}
+
+#if !defined(OS_WIN)
+void HistoryDatabase::MigrateTimeEpoch() {
+ // Update all the times in the URLs and visits table in the main database.
+ // For visits, clear the indexed flag since we'll delete the FTS databases in
+ // the next step.
+ db_.Execute(
+ "UPDATE urls "
+ "SET last_visit_time = last_visit_time + 11644473600000000 "
+ "WHERE id IN (SELECT id FROM urls WHERE last_visit_time > 0);");
+ db_.Execute(
+ "UPDATE visits "
+ "SET visit_time = visit_time + 11644473600000000, is_indexed = 0 "
+ "WHERE id IN (SELECT id FROM visits WHERE visit_time > 0);");
+ db_.Execute(
+ "UPDATE segment_usage "
+ "SET time_slot = time_slot + 11644473600000000 "
+ "WHERE id IN (SELECT id FROM segment_usage WHERE time_slot > 0);");
+
+ // Erase all the full text index files. These will take a while to update and
+ // are less important, so we just blow them away. Same with the archived
+ // database.
+ needs_version_17_migration_ = true;
+}
+#endif
+
+void HistoryDatabase::MigrationToTopSitesDone() {
+ // We should be migrating from 17 to 18.
+ DCHECK_EQ(17, meta_table_.GetVersionNumber());
+ meta_table_.SetVersionNumber(18);
+ needs_version_18_migration_ = false;
+}
+
+} // namespace history
diff --git a/chrome/browser/history/history_database.h b/chrome/browser/history/history_database.h
new file mode 100644
index 0000000..3b3414a
--- /dev/null
+++ b/chrome/browser/history/history_database.h
@@ -0,0 +1,188 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_HISTORY_DATABASE_H_
+#define CHROME_BROWSER_HISTORY_HISTORY_DATABASE_H_
+
+#include "app/sql/connection.h"
+#include "app/sql/init_status.h"
+#include "app/sql/meta_table.h"
+#include "build/build_config.h"
+#include "chrome/browser/history/download_database.h"
+#include "chrome/browser/history/history_types.h"
+#include "chrome/browser/history/starred_url_database.h"
+#include "chrome/browser/history/url_database.h"
+#include "chrome/browser/history/visit_database.h"
+#include "chrome/browser/history/visitsegment_database.h"
+
+class FilePath;
+
+namespace history {
+
+// Forward declaration for the temporary migration code in Init().
+class TextDatabaseManager;
+
+// Encapsulates the SQL connection for the history database. This class holds
+// the database connection and has methods the history system (including full
+// text search) uses for writing and retrieving information.
+//
+// We try to keep most logic out of the history database; this should be seen
+// as the storage interface. Logic for manipulating this storage layer should
+// be in HistoryBackend.cc.
+class HistoryDatabase : public DownloadDatabase,
+ // TODO(sky): See if we can nuke StarredURLDatabase and just create on the
+ // stack for migration. Then HistoryDatabase would directly descend from
+ // URLDatabase.
+ public StarredURLDatabase,
+ public VisitDatabase,
+ public VisitSegmentDatabase {
+ public:
+ // A simple class for scoping a history database transaction. This does not
+ // support rollback since the history database doesn't, either.
+ class TransactionScoper {
+ public:
+ explicit TransactionScoper(HistoryDatabase* db) : db_(db) {
+ db_->BeginTransaction();
+ }
+ ~TransactionScoper() {
+ db_->CommitTransaction();
+ }
+ private:
+ HistoryDatabase* db_;
+ };
+
+ // Must call Init() to complete construction. Although it can be created on
+ // any thread, it must be destructed on the history thread for proper
+ // database cleanup.
+ HistoryDatabase();
+
+ virtual ~HistoryDatabase();
+
+ // Must call this function to complete initialization. Will return true on
+ // success. On false, no other function should be called. You may want to call
+ // BeginExclusiveMode after this when you are ready.
+ sql::InitStatus Init(const FilePath& history_name,
+ const FilePath& tmp_bookmarks_path);
+
+ // Call to set the mode on the database to exclusive. The default locking mode
+ // is "normal" but we want to run in exclusive mode for slightly better
+ // performance since we know nobody else is using the database. This is
+ // separate from Init() since the in-memory database attaches to slurp the
+ // data out, and this can't happen in exclusive mode.
+ void BeginExclusiveMode();
+
+ // Returns the current version that we will generate history databases with.
+ static int GetCurrentVersion();
+
+ // Transactions on the history database. Use the Transaction object above
+ // for most work instead of these directly. We support nested transactions
+ // and only commit when the outermost transaction is committed. This means
+ // that it is impossible to rollback a specific transaction. We could roll
+ // back the outermost transaction if any inner one is rolled back, but it
+ // turns out we don't really need this type of integrity for the history
+ // database, so we just don't support it.
+ void BeginTransaction();
+ void CommitTransaction();
+ int transaction_nesting() const { // for debugging and assertion purposes
+ return db_.transaction_nesting();
+ }
+
+ // Drops all tables except the URL, and download tables, and recreates them
+ // from scratch. This is done to rapidly clean up stuff when deleting all
+ // history. It is faster and less likely to have problems that deleting all
+ // rows in the tables.
+ //
+ // We don't delete the downloads table, since there may be in progress
+ // downloads. We handle the download history clean up separately in:
+ // DownloadManager::RemoveDownloadsFromHistoryBetween.
+ //
+ // Returns true on success. On failure, the caller should assume that the
+ // database is invalid. There could have been an error recreating a table.
+ // This should be treated the same as an init failure, and the database
+ // should not be used any more.
+ //
+ // This will also recreate the supplementary URL indices, since these
+ // indices won't be created automatically when using the temporary URL
+ // table (what the caller does right before calling this).
+ bool RecreateAllTablesButURL();
+
+ // Vacuums the database. This will cause sqlite to defragment and collect
+ // unused space in the file. It can be VERY SLOW.
+ void Vacuum();
+
+ // Returns true if the history backend should erase the full text search
+ // and archived history files as part of version 16 -> 17 migration. The
+ // time format changed in this revision, and these files would be much slower
+ // to migrate. Since the data is less important, they should be deleted.
+ //
+ // This flag will be valid after Init() is called. It will always be false
+ // when running on Windows.
+ bool needs_version_17_migration() const {
+ return needs_version_17_migration_;
+ }
+
+ // Returns true if the Thumbnails database should be renamed to
+ // Favicons database. 17 -> 18 is migration to TopSites. ThumbnailsDatabase
+ // doesn't store the thumbnails any more, only the favicons. Hence, its file
+ // is renamed from Thumbnails to Favicons.
+ bool needs_version_18_migration() const {
+ return needs_version_18_migration_;
+ }
+
+ // Update the database version after the TopSites migration.
+ void MigrationToTopSitesDone();
+
+ // Visit table functions ----------------------------------------------------
+
+ // Update the segment id of a visit. Return true on success.
+ bool SetSegmentID(VisitID visit_id, SegmentID segment_id);
+
+ // Query the segment ID for the provided visit. Return 0 on failure or if the
+ // visit id wasn't found.
+ SegmentID GetSegmentID(VisitID visit_id);
+
+ // Retrieves/Updates early expiration threshold, which specifies the earliest
+ // known point in history that may possibly to contain visits suitable for
+ // early expiration (AUTO_SUBFRAMES).
+ virtual base::Time GetEarlyExpirationThreshold();
+ virtual void UpdateEarlyExpirationThreshold(base::Time threshold);
+
+ private:
+ // Implemented for URLDatabase.
+ virtual sql::Connection& GetDB();
+
+ // Migration -----------------------------------------------------------------
+
+ // Makes sure the version is up-to-date, updating if necessary. If the
+ // database is too old to migrate, the user will be notified. In this case, or
+ // for other errors, false will be returned. True means it is up-to-date and
+ // ready for use.
+ //
+ // This assumes it is called from the init function inside a transaction. It
+ // may commit the transaction and start a new one if migration requires it.
+ sql::InitStatus EnsureCurrentVersion(const FilePath& tmp_bookmarks_path);
+
+#if !defined(OS_WIN)
+ // Converts the time epoch in the database from being 1970-based to being
+ // 1601-based which corresponds to the change in Time.internal_value_.
+ void MigrateTimeEpoch();
+#endif
+
+ // ---------------------------------------------------------------------------
+
+ sql::Connection db_;
+ sql::MetaTable meta_table_;
+
+ base::Time cached_early_expiration_threshold_;
+
+ // See the getters above.
+ bool needs_version_17_migration_;
+ bool needs_version_18_migration_;
+
+ DISALLOW_COPY_AND_ASSIGN(HistoryDatabase);
+};
+
+} // history
+
+#endif // CHROME_BROWSER_HISTORY_HISTORY_DATABASE_H_
diff --git a/chrome/browser/history/history_indexer.idl b/chrome/browser/history/history_indexer.idl
new file mode 100644
index 0000000..8fdbf36
--- /dev/null
+++ b/chrome/browser/history/history_indexer.idl
@@ -0,0 +1,57 @@
+// Copyright (c) 2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+import "oaidl.idl";
+import "ocidl.idl";
+
+[
+ object,
+ uuid(9C1100DD-51D4-4827-AE9F-3B8FAC4AED72),
+ oleautomation,
+ nonextensible,
+ pointer_default(unique)
+]
+interface IChromeHistoryIndexer : IUnknown {
+ // This is the method called by Chrome to send content and thumbnail of the
+ // page to be indexed. The html content and thumbnail for the same url
+ // are sent at different points in time. The thumbnail_format and
+ // thumbnail parameters will be NULL when sending only the content.
+ // |time| - The last time at which user visited the page. The time is in UTC.
+ // |url| - The url of the page being published for indexing.
+ // |html| - The html content of the page being published for indexing.
+ // |title| - The url of the page being published for indexing.
+ // |thumbnail_format| - The format of the thumbnail image. It is currently
+ // "image/jpeg", indicating that the thumbail is in jpeg
+ // format.
+ // |thumbnail| - This is an array of bytes that represents the thumbnail in
+ // the format specified by the "thumbnail_format" parameter.
+ HRESULT SendPageData([in] VARIANT time,
+ [in] BSTR url,
+ [in] BSTR html,
+ [in] BSTR title,
+ [in] BSTR thumbnail_format,
+ [in] VARIANT thumbnail);
+
+ // This method is called by Chrome when the users delete their history.
+ // |begin_time| - Represents the start time from which the history needs to be
+ // deleted. It is given in UTC.
+ // |end_time| - Represents the end time until when the history needs to be
+ // deleted. It is given in UTC
+ // If both begin_time and end_time are '0', full user history needs to be
+ // deleted.
+ HRESULT DeleteUserHistoryBetween([in] VARIANT begin_time,
+ [in] VARIANT end_time);
+};
+
+
+// This dummy library statement enforces the creation of a history_indexer.tlb.
+// This is necessary since MSVC assumes a .idl always creates a .tlb. Otherwise,
+// this .idl is always recompiled, giving many engs a headache.
+[
+ uuid(A5C5B8BE-E7E5-4cb9-A13B-B063361E7B6D),
+ helpstring("Dummy library")
+]
+library history_indexerLib
+{
+};
diff --git a/chrome/browser/history/history_marshaling.h b/chrome/browser/history/history_marshaling.h
new file mode 100644
index 0000000..39b8983
--- /dev/null
+++ b/chrome/browser/history/history_marshaling.h
@@ -0,0 +1,140 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Data structures for communication between the history service on the main
+// thread and the backend on the history thread.
+
+#ifndef CHROME_BROWSER_HISTORY_HISTORY_MARSHALING_H__
+#define CHROME_BROWSER_HISTORY_HISTORY_MARSHALING_H__
+
+#include "base/scoped_vector.h"
+#include "chrome/browser/cancelable_request.h"
+#include "chrome/browser/favicon_service.h"
+#include "chrome/browser/history/history.h"
+#include "chrome/browser/history/page_usage_data.h"
+
+namespace history {
+
+// Navigation -----------------------------------------------------------------
+
+// Marshalling structure for AddPage.
+class HistoryAddPageArgs
+ : public base::RefCountedThreadSafe<HistoryAddPageArgs> {
+ public:
+ HistoryAddPageArgs(const GURL& arg_url,
+ base::Time arg_time,
+ const void* arg_id_scope,
+ int32 arg_page_id,
+ const GURL& arg_referrer,
+ const history::RedirectList& arg_redirects,
+ PageTransition::Type arg_transition,
+ bool arg_did_replace_entry)
+ : url(arg_url),
+ time(arg_time),
+ id_scope(arg_id_scope),
+ page_id(arg_page_id),
+ referrer(arg_referrer),
+ redirects(arg_redirects),
+ transition(arg_transition),
+ did_replace_entry(arg_did_replace_entry) {
+ }
+
+ GURL url;
+ base::Time time;
+
+ const void* id_scope;
+ int32 page_id;
+
+ GURL referrer;
+ history::RedirectList redirects;
+ PageTransition::Type transition;
+ bool did_replace_entry;
+
+ private:
+ friend class base::RefCountedThreadSafe<HistoryAddPageArgs>;
+
+ ~HistoryAddPageArgs() {}
+
+ DISALLOW_COPY_AND_ASSIGN(HistoryAddPageArgs);
+};
+
+// Querying -------------------------------------------------------------------
+
+typedef CancelableRequest1<HistoryService::QueryURLCallback,
+ Tuple2<URLRow, VisitVector> >
+ QueryURLRequest;
+
+typedef CancelableRequest1<HistoryService::QueryHistoryCallback,
+ QueryResults>
+ QueryHistoryRequest;
+
+typedef CancelableRequest1<HistoryService::QueryRedirectsCallback,
+ history::RedirectList>
+ QueryRedirectsRequest;
+
+typedef CancelableRequest<HistoryService::GetVisitCountToHostCallback>
+ GetVisitCountToHostRequest;
+
+typedef CancelableRequest1<HistoryService::QueryTopURLsAndRedirectsCallback,
+ Tuple2<std::vector<GURL>,
+ history::RedirectMap> >
+ QueryTopURLsAndRedirectsRequest;
+
+typedef CancelableRequest1<HistoryService::QueryMostVisitedURLsCallback,
+ history::MostVisitedURLList>
+ QueryMostVisitedURLsRequest;
+
+// Thumbnails -----------------------------------------------------------------
+
+typedef CancelableRequest<HistoryService::ThumbnailDataCallback>
+ GetPageThumbnailRequest;
+
+// Favicons -------------------------------------------------------------------
+
+typedef CancelableRequest<FaviconService::FaviconDataCallback>
+ GetFavIconRequest;
+
+// Downloads ------------------------------------------------------------------
+
+typedef CancelableRequest1<HistoryService::DownloadQueryCallback,
+ std::vector<DownloadCreateInfo> >
+ DownloadQueryRequest;
+
+typedef CancelableRequest<HistoryService::DownloadCreateCallback>
+ DownloadCreateRequest;
+
+typedef CancelableRequest1<HistoryService::DownloadSearchCallback,
+ std::vector<int64> >
+ DownloadSearchRequest;
+
+// Deletion --------------------------------------------------------------------
+
+typedef CancelableRequest<HistoryService::ExpireHistoryCallback>
+ ExpireHistoryRequest;
+
+// Segment usage --------------------------------------------------------------
+
+typedef CancelableRequest1<HistoryService::SegmentQueryCallback,
+ ScopedVector<PageUsageData> >
+ QuerySegmentUsageRequest;
+
+// Keyword search terms -------------------------------------------------------
+
+typedef
+ CancelableRequest1<HistoryService::GetMostRecentKeywordSearchTermsCallback,
+ std::vector<KeywordSearchTermVisit> >
+ GetMostRecentKeywordSearchTermsRequest;
+
+// Generic operations ---------------------------------------------------------
+
+// The argument here is an input value, which is the task to run on the
+// background thread. The callback is used to execute the portion of the task
+// that executes on the main thread.
+typedef CancelableRequest1<HistoryService::HistoryDBTaskCallback,
+ scoped_refptr<HistoryDBTask> >
+ HistoryDBTaskRequest;
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_HISTORY_MARSHALING_H__
diff --git a/chrome/browser/history/history_notifications.h b/chrome/browser/history/history_notifications.h
new file mode 100644
index 0000000..80fc9d5
--- /dev/null
+++ b/chrome/browser/history/history_notifications.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Structs that hold data used in broadcasting notifications.
+
+#ifndef CHROME_BROWSER_HISTORY_HISTORY_NOTIFICATIONS_H__
+#define CHROME_BROWSER_HISTORY_HISTORY_NOTIFICATIONS_H__
+
+#include <set>
+#include <vector>
+
+#include "googleurl/src/gurl.h"
+#include "chrome/browser/history/history_types.h"
+
+namespace history {
+
+// Base class for history notifications. This needs only a virtual destructor
+// so that the history service's broadcaster can delete it when the request
+// is complete.
+struct HistoryDetails {
+ public:
+ virtual ~HistoryDetails() {}
+};
+
+// Details for HISTORY_URL_VISITED.
+struct URLVisitedDetails : public HistoryDetails {
+ PageTransition::Type transition;
+ URLRow row;
+
+ // A list of redirects leading up to the URL represented by this struct. If
+ // we have the redirect chain A -> B -> C and this struct represents visiting
+ // C, then redirects[0]=B and redirects[1]=A. If there are no redirects,
+ // this will be an empty vector.
+ history::RedirectList redirects;
+};
+
+// Details for NOTIFY_HISTORY_TYPED_URLS_MODIFIED.
+struct URLsModifiedDetails : public HistoryDetails {
+ // Lists the information for each of the URLs affected.
+ std::vector<URLRow> changed_urls;
+};
+
+// Details for NOTIFY_HISTORY_URLS_DELETED.
+struct URLsDeletedDetails : public HistoryDetails {
+ // Set when all history was deleted. False means just a subset was deleted.
+ bool all_history;
+
+ // The list of unique URLs affected. This is valid only when a subset of
+ // history is deleted. When all of it is deleted, this will be empty, since
+ // we do not bother to list all URLs.
+ std::set<GURL> urls;
+};
+
+// Details for NOTIFY_URLS_STARRED.
+struct URLsStarredDetails : public HistoryDetails {
+ explicit URLsStarredDetails(bool being_starred) : starred(being_starred) {}
+
+ // The new starred state of the list of URLs. True when they are being
+ // starred, false when they are being unstarred.
+ bool starred;
+
+ // The list of URLs that are changing.
+ std::set<GURL> changed_urls;
+};
+
+// Details for NOTIFY_FAVICON_CHANGED.
+struct FavIconChangeDetails : public HistoryDetails {
+ std::set<GURL> urls;
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_HISTORY_NOTIFICATIONS_H__
diff --git a/chrome/browser/history/history_publisher.cc b/chrome/browser/history/history_publisher.cc
new file mode 100644
index 0000000..0392632
--- /dev/null
+++ b/chrome/browser/history/history_publisher.cc
@@ -0,0 +1,46 @@
+// Copyright (c) 2008-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/history_publisher.h"
+
+#include "base/utf_string_conversions.h"
+
+namespace history {
+
+const char* const HistoryPublisher::kThumbnailImageFormat = "image/jpeg";
+
+void HistoryPublisher::PublishPageThumbnail(
+ const std::vector<unsigned char>& thumbnail, const GURL& url,
+ const base::Time& time) const {
+ PageData page_data = {
+ time,
+ url,
+ NULL,
+ NULL,
+ kThumbnailImageFormat,
+ &thumbnail,
+ };
+
+ PublishDataToIndexers(page_data);
+}
+
+void HistoryPublisher::PublishPageContent(const base::Time& time,
+ const GURL& url,
+ const string16& title,
+ const string16& contents) const {
+ std::wstring wide_title = UTF16ToWide(title);
+ std::wstring wide_contents = UTF16ToWide(contents);
+ PageData page_data = {
+ time,
+ url,
+ wide_contents.c_str(),
+ wide_title.c_str(),
+ NULL,
+ NULL,
+ };
+
+ PublishDataToIndexers(page_data);
+}
+
+} // namespace history
diff --git a/chrome/browser/history/history_publisher.h b/chrome/browser/history/history_publisher.h
new file mode 100644
index 0000000..5fafc3e
--- /dev/null
+++ b/chrome/browser/history/history_publisher.h
@@ -0,0 +1,84 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_HISTORY_PUBLISHER_H_
+#define CHROME_BROWSER_HISTORY_HISTORY_PUBLISHER_H_
+
+#include <vector>
+#include <string>
+
+#include "base/basictypes.h"
+#include "base/string16.h"
+
+#if defined(OS_WIN)
+#include "base/scoped_comptr_win.h"
+#include "history_indexer.h"
+#endif
+
+class GURL;
+
+namespace base {
+class Time;
+}
+
+namespace history {
+
+class HistoryPublisher {
+ public:
+ HistoryPublisher();
+ ~HistoryPublisher();
+
+ // Must call this function to complete initialization. Returns true if we
+ // need to publish data to any indexers registered with us. Returns false if
+ // there are none registered. On false, no other function should be called.
+ bool Init();
+
+ void PublishPageThumbnail(const std::vector<unsigned char>& thumbnail,
+ const GURL& url, const base::Time& time) const;
+ void PublishPageContent(const base::Time& time, const GURL& url,
+ const string16& title,
+ const string16& contents) const;
+ void DeleteUserHistoryBetween(const base::Time& begin_time,
+ const base::Time& end_time) const;
+
+ private:
+ struct PageData {
+ const base::Time& time;
+ const GURL& url;
+ const wchar_t* html;
+ const wchar_t* title;
+ const char* thumbnail_format;
+ const std::vector<unsigned char>* thumbnail;
+ };
+
+ void PublishDataToIndexers(const PageData& page_data) const;
+
+#if defined(OS_WIN)
+ // Initializes the indexer_list_ with the list of indexers that registered
+ // with us to index history. Returns true if there are any registered.
+ bool ReadRegisteredIndexersFromRegistry();
+
+ // Converts time represented by the Time class object to variant time in UTC.
+ // Returns '0' if the time object is NULL.
+ static double TimeToUTCVariantTime(const base::Time& time);
+
+ typedef std::vector< ScopedComPtr<IChromeHistoryIndexer> > IndexerList;
+
+ // The list of indexers registered to receive history data from us.
+ IndexerList indexers_;
+
+ // The Registry key under HKCU where the indexers need to register their
+ // CLSID.
+ static const wchar_t* const kRegKeyRegisteredIndexersInfo;
+#endif
+
+ // The format of the thumbnail we pass to indexers.
+ static const char* const kThumbnailImageFormat;
+
+ DISALLOW_COPY_AND_ASSIGN(HistoryPublisher);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_HISTORY_PUBLISHER_H_
diff --git a/chrome/browser/history/history_publisher_none.cc b/chrome/browser/history/history_publisher_none.cc
new file mode 100644
index 0000000..2a164bf
--- /dev/null
+++ b/chrome/browser/history/history_publisher_none.cc
@@ -0,0 +1,34 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// A stub implementation of HistoryPublisher used to provide needed symbols.
+// For now there is no equivalent of this functionality on systems other than
+// Windows.
+
+#include "chrome/browser/history/history_publisher.h"
+
+#include "base/time.h"
+
+namespace history {
+
+HistoryPublisher::HistoryPublisher() {
+}
+
+HistoryPublisher::~HistoryPublisher() {
+}
+
+bool HistoryPublisher::Init() {
+ return false;
+}
+
+void HistoryPublisher::PublishDataToIndexers(const PageData& page_data)
+ const {
+}
+
+void HistoryPublisher::DeleteUserHistoryBetween(const base::Time& begin_time,
+ const base::Time& end_time)
+ const {
+}
+
+} // namespace history
diff --git a/chrome/browser/history/history_publisher_win.cc b/chrome/browser/history/history_publisher_win.cc
new file mode 100644
index 0000000..cbde619
--- /dev/null
+++ b/chrome/browser/history/history_publisher_win.cc
@@ -0,0 +1,139 @@
+// Copyright (c) 2008-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/history_publisher.h"
+
+#include <atlsafe.h>
+#include <objbase.h>
+#include <oleauto.h>
+#include <wtypes.h>
+
+#include "base/registry.h"
+#include "base/scoped_bstr_win.h"
+#include "base/scoped_comptr_win.h"
+#include "base/scoped_variant_win.h"
+#include "base/string_util.h"
+#include "base/time.h"
+#include "googleurl/src/gurl.h"
+
+namespace {
+
+// Instantiates a IChromeHistoryIndexer COM object. Takes a COM class id
+// in |name| and returns the object in |indexer|. Returns false if the
+// operation fails.
+bool CoCreateIndexerFromName(const wchar_t* name,
+ IChromeHistoryIndexer** indexer) {
+ CLSID clsid;
+ HRESULT hr = CLSIDFromString(const_cast<wchar_t*>(name), &clsid);
+ if (FAILED(hr))
+ return false;
+ hr = CoCreateInstance(clsid, NULL, CLSCTX_INPROC,
+ __uuidof(IChromeHistoryIndexer),
+ reinterpret_cast<void**>(indexer));
+ if (FAILED(hr))
+ return false;
+ return true;
+}
+
+// Instantiates the registered indexers from the registry |root| + |path| key
+// and adds them to the |indexers| list.
+void AddRegisteredIndexers(HKEY root, const wchar_t* path,
+ std::vector< ScopedComPtr<IChromeHistoryIndexer> >* indexers) {
+ IChromeHistoryIndexer* indexer;
+ RegistryKeyIterator r_iter(root, path);
+ while (r_iter.Valid()) {
+ if (CoCreateIndexerFromName(r_iter.Name(), &indexer)) {
+ indexers->push_back(ScopedComPtr<IChromeHistoryIndexer>(indexer));
+ indexer->Release();
+ }
+ ++r_iter;
+ }
+}
+
+} // namespace
+
+namespace history {
+
+const wchar_t* const HistoryPublisher::kRegKeyRegisteredIndexersInfo =
+ L"Software\\Google\\Google Chrome\\IndexerPlugins";
+
+// static
+double HistoryPublisher::TimeToUTCVariantTime(const base::Time& time) {
+ double var_time = 0;
+ if (!time.is_null()) {
+ base::Time::Exploded exploded;
+ time.UTCExplode(&exploded);
+
+ // Create the system time struct representing our exploded time.
+ SYSTEMTIME system_time;
+ system_time.wYear = exploded.year;
+ system_time.wMonth = exploded.month;
+ system_time.wDayOfWeek = exploded.day_of_week;
+ system_time.wDay = exploded.day_of_month;
+ system_time.wHour = exploded.hour;
+ system_time.wMinute = exploded.minute;
+ system_time.wSecond = exploded.second;
+ system_time.wMilliseconds = exploded.millisecond;
+ SystemTimeToVariantTime(&system_time, &var_time);
+ }
+
+ return var_time;
+}
+
+HistoryPublisher::HistoryPublisher() {
+ CoInitialize(NULL);
+}
+
+HistoryPublisher::~HistoryPublisher() {
+ CoUninitialize();
+}
+
+bool HistoryPublisher::Init() {
+ return ReadRegisteredIndexersFromRegistry();
+}
+
+// Peruse the registry for Indexer to instantiate and store in |indexers_|.
+// Return true if we found at least one indexer object. We look both in HKCU
+// and HKLM.
+bool HistoryPublisher::ReadRegisteredIndexersFromRegistry() {
+ AddRegisteredIndexers(HKEY_CURRENT_USER,
+ kRegKeyRegisteredIndexersInfo, &indexers_);
+ AddRegisteredIndexers(HKEY_LOCAL_MACHINE,
+ kRegKeyRegisteredIndexersInfo, &indexers_);
+ return indexers_.size() > 0;
+}
+
+void HistoryPublisher::PublishDataToIndexers(const PageData& page_data)
+ const {
+ double var_time = TimeToUTCVariantTime(page_data.time);
+
+ CComSafeArray<unsigned char> thumbnail_arr;
+ if (page_data.thumbnail) {
+ for (size_t i = 0; i < page_data.thumbnail->size(); ++i)
+ thumbnail_arr.Add((*page_data.thumbnail)[i]);
+ }
+
+ // Send data to registered indexers.
+ ScopedVariant time(var_time, VT_DATE);
+ ScopedBstr url(ASCIIToWide(page_data.url.spec()).c_str());
+ ScopedBstr html(page_data.html);
+ ScopedBstr title(page_data.title);
+ ScopedBstr format(ASCIIToWide(page_data.thumbnail_format).c_str());
+ ScopedVariant psa(thumbnail_arr.m_psa);
+ for (size_t i = 0; i < indexers_.size(); ++i) {
+ indexers_[i]->SendPageData(time, url, html, title, format, psa);
+ }
+}
+
+void HistoryPublisher::DeleteUserHistoryBetween(const base::Time& begin_time,
+ const base::Time& end_time)
+ const {
+ ScopedVariant var_begin_time(TimeToUTCVariantTime(begin_time), VT_DATE);
+ ScopedVariant var_end_time(TimeToUTCVariantTime(end_time), VT_DATE);
+ for (size_t i = 0; i < indexers_.size(); ++i) {
+ indexers_[i]->DeleteUserHistoryBetween(var_begin_time, var_end_time);
+ }
+}
+
+} // namespace history
diff --git a/chrome/browser/history/history_querying_unittest.cc b/chrome/browser/history/history_querying_unittest.cc
new file mode 100644
index 0000000..7512786
--- /dev/null
+++ b/chrome/browser/history/history_querying_unittest.cc
@@ -0,0 +1,350 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/basictypes.h"
+#include "base/callback.h"
+#include "base/file_path.h"
+#include "base/file_util.h"
+#include "base/path_service.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/history/history.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using base::Time;
+using base::TimeDelta;
+
+// Tests the history service for querying functionality.
+
+namespace history {
+
+namespace {
+
+struct TestEntry {
+ const char* url;
+ const char* title;
+ const int days_ago;
+ const char* body;
+ Time time; // Filled by SetUp.
+} test_entries[] = {
+ // This one is visited super long ago so it will be in a different database
+ // from the next appearance of it at the end.
+ {"http://example.com/", "Other", 180, "Other"},
+
+ // These are deliberately added out of chronological order. The history
+ // service should sort them by visit time when returning query results.
+ // The correct index sort order is 4 2 3 1 0.
+ {"http://www.google.com/1", "Title 1", 10,
+ "PAGEONE FOO some body text"},
+ {"http://www.google.com/3", "Title 3", 8,
+ "PAGETHREE BAR some hello world for you"},
+ {"http://www.google.com/2", "Title 2", 9,
+ "PAGETWO FOO some more blah blah blah"},
+
+ // A more recent visit of the first one.
+ {"http://example.com/", "Other", 6, "Other"},
+};
+
+// Returns true if the nth result in the given results set matches. It will
+// return false on a non-match or if there aren't enough results.
+bool NthResultIs(const QueryResults& results,
+ int n, // Result index to check.
+ int test_entry_index) { // Index of test_entries to compare.
+ if (static_cast<int>(results.size()) <= n)
+ return false;
+
+ const URLResult& result = results[n];
+
+ // Check the visit time.
+ if (result.visit_time() != test_entries[test_entry_index].time)
+ return false;
+
+ // Now check the URL & title.
+ return result.url() == GURL(test_entries[test_entry_index].url) &&
+ result.title() == UTF8ToUTF16(test_entries[test_entry_index].title);
+}
+
+} // namespace
+
+class HistoryQueryTest : public testing::Test {
+ public:
+ HistoryQueryTest() {
+ }
+
+ // Acts like a synchronous call to history's QueryHistory.
+ void QueryHistory(const std::string& text_query,
+ const QueryOptions& options,
+ QueryResults* results) {
+ history_->QueryHistory(UTF8ToUTF16(text_query), options, &consumer_,
+ NewCallback(this, &HistoryQueryTest::QueryHistoryComplete));
+ MessageLoop::current()->Run(); // Will go until ...Complete calls Quit.
+ results->Swap(&last_query_results_);
+ }
+
+ protected:
+ scoped_refptr<HistoryService> history_;
+
+ private:
+ virtual void SetUp() {
+ FilePath temp_dir;
+ PathService::Get(base::DIR_TEMP, &temp_dir);
+ history_dir_ = temp_dir.AppendASCII("HistoryTest");
+ file_util::Delete(history_dir_, true);
+ file_util::CreateDirectory(history_dir_);
+
+ history_ = new HistoryService;
+ if (!history_->Init(history_dir_, NULL)) {
+ history_ = NULL; // Tests should notice this NULL ptr & fail.
+ return;
+ }
+
+ // Fill the test data.
+ Time now = Time::Now().LocalMidnight();
+ for (size_t i = 0; i < arraysize(test_entries); i++) {
+ test_entries[i].time =
+ now - (test_entries[i].days_ago * TimeDelta::FromDays(1));
+
+ // We need the ID scope and page ID so that the visit tracker can find it.
+ const void* id_scope = reinterpret_cast<void*>(1);
+ int32 page_id = i;
+ GURL url(test_entries[i].url);
+
+ history_->AddPage(url, test_entries[i].time, id_scope, page_id, GURL(),
+ PageTransition::LINK, history::RedirectList(),
+ false);
+ history_->SetPageTitle(url, UTF8ToUTF16(test_entries[i].title));
+ history_->SetPageContents(url, UTF8ToUTF16(test_entries[i].body));
+ }
+ }
+
+ virtual void TearDown() {
+ if (history_.get()) {
+ history_->SetOnBackendDestroyTask(new MessageLoop::QuitTask);
+ history_->Cleanup();
+ history_ = NULL;
+ MessageLoop::current()->Run(); // Wait for the other thread.
+ }
+ file_util::Delete(history_dir_, true);
+ }
+
+ void QueryHistoryComplete(HistoryService::Handle, QueryResults* results) {
+ results->Swap(&last_query_results_);
+ MessageLoop::current()->Quit(); // Will return out to QueryHistory.
+ }
+
+ MessageLoop message_loop_;
+
+ FilePath history_dir_;
+
+ CancelableRequestConsumer consumer_;
+
+ // The QueryHistoryComplete callback will put the results here so QueryHistory
+ // can return them.
+ QueryResults last_query_results_;
+
+ DISALLOW_COPY_AND_ASSIGN(HistoryQueryTest);
+};
+
+TEST_F(HistoryQueryTest, Basic) {
+ ASSERT_TRUE(history_.get());
+
+ QueryOptions options;
+ QueryResults results;
+
+ // Test duplicate collapsing.
+ QueryHistory(std::string(), options, &results);
+ EXPECT_EQ(4U, results.size());
+ EXPECT_TRUE(NthResultIs(results, 0, 4));
+ EXPECT_TRUE(NthResultIs(results, 1, 2));
+ EXPECT_TRUE(NthResultIs(results, 2, 3));
+ EXPECT_TRUE(NthResultIs(results, 3, 1));
+
+ // Next query a time range. The beginning should be inclusive, the ending
+ // should be exclusive.
+ options.begin_time = test_entries[3].time;
+ options.end_time = test_entries[2].time;
+ QueryHistory(std::string(), options, &results);
+ EXPECT_EQ(1U, results.size());
+ EXPECT_TRUE(NthResultIs(results, 0, 3));
+}
+
+// Tests max_count feature for basic (non-Full Text Search) queries.
+TEST_F(HistoryQueryTest, BasicCount) {
+ ASSERT_TRUE(history_.get());
+
+ QueryOptions options;
+ QueryResults results;
+
+ // Query all time but with a limit on the number of entries. We should
+ // get the N most recent entries.
+ options.max_count = 2;
+ QueryHistory(std::string(), options, &results);
+ EXPECT_EQ(2U, results.size());
+ EXPECT_TRUE(NthResultIs(results, 0, 4));
+ EXPECT_TRUE(NthResultIs(results, 1, 2));
+}
+
+TEST_F(HistoryQueryTest, ReachedBeginning) {
+ ASSERT_TRUE(history_.get());
+
+ QueryOptions options;
+ QueryResults results;
+
+ QueryHistory(std::string(), options, &results);
+ EXPECT_TRUE(results.reached_beginning());
+
+ options.begin_time = test_entries[1].time;
+ QueryHistory(std::string(), options, &results);
+ EXPECT_FALSE(results.reached_beginning());
+
+ options.begin_time = test_entries[0].time + TimeDelta::FromMicroseconds(1);
+ QueryHistory(std::string(), options, &results);
+ EXPECT_FALSE(results.reached_beginning());
+
+ options.begin_time = test_entries[0].time;
+ QueryHistory(std::string(), options, &results);
+ EXPECT_TRUE(results.reached_beginning());
+
+ options.begin_time = test_entries[0].time - TimeDelta::FromMicroseconds(1);
+ QueryHistory(std::string(), options, &results);
+ EXPECT_TRUE(results.reached_beginning());
+}
+
+// This does most of the same tests above, but searches for a FTS string that
+// will match the pages in question. This will trigger a different code path.
+TEST_F(HistoryQueryTest, FTS) {
+ ASSERT_TRUE(history_.get());
+
+ QueryOptions options;
+ QueryResults results;
+
+ // Query all of them to make sure they are there and in order. Note that
+ // this query will return the starred item twice since we requested all
+ // starred entries and no de-duping.
+ QueryHistory("some", options, &results);
+ EXPECT_EQ(3U, results.size());
+ EXPECT_TRUE(NthResultIs(results, 0, 2));
+ EXPECT_TRUE(NthResultIs(results, 1, 3));
+ EXPECT_TRUE(NthResultIs(results, 2, 1));
+
+ // Do a query that should only match one of them.
+ QueryHistory("PAGETWO", options, &results);
+ EXPECT_EQ(1U, results.size());
+ EXPECT_TRUE(NthResultIs(results, 0, 3));
+
+ // Next query a time range. The beginning should be inclusive, the ending
+ // should be exclusive.
+ options.begin_time = test_entries[1].time;
+ options.end_time = test_entries[3].time;
+ QueryHistory("some", options, &results);
+ EXPECT_EQ(1U, results.size());
+ EXPECT_TRUE(NthResultIs(results, 0, 1));
+}
+
+// Searches titles.
+TEST_F(HistoryQueryTest, FTSTitle) {
+ ASSERT_TRUE(history_.get());
+
+ QueryOptions options;
+ QueryResults results;
+
+ // Query all time but with a limit on the number of entries. We should
+ // get the N most recent entries.
+ QueryHistory("title", options, &results);
+ EXPECT_EQ(3U, results.size());
+ EXPECT_TRUE(NthResultIs(results, 0, 2));
+ EXPECT_TRUE(NthResultIs(results, 1, 3));
+ EXPECT_TRUE(NthResultIs(results, 2, 1));
+}
+
+// Tests prefix searching for Full Text Search queries.
+TEST_F(HistoryQueryTest, FTSPrefix) {
+ ASSERT_TRUE(history_.get());
+
+ QueryOptions options;
+ QueryResults results;
+
+ // Query with a prefix search. Should return matches for "PAGETWO" and
+ // "PAGETHREE".
+ QueryHistory("PAGET", options, &results);
+ EXPECT_EQ(2U, results.size());
+ EXPECT_TRUE(NthResultIs(results, 0, 2));
+ EXPECT_TRUE(NthResultIs(results, 1, 3));
+}
+
+// Tests max_count feature for Full Text Search queries.
+TEST_F(HistoryQueryTest, FTSCount) {
+ ASSERT_TRUE(history_.get());
+
+ QueryOptions options;
+ QueryResults results;
+
+ // Query all time but with a limit on the number of entries. We should
+ // get the N most recent entries.
+ options.max_count = 2;
+ QueryHistory("some", options, &results);
+ EXPECT_EQ(2U, results.size());
+ EXPECT_TRUE(NthResultIs(results, 0, 2));
+ EXPECT_TRUE(NthResultIs(results, 1, 3));
+
+ // Now query a subset of the pages and limit by N items. "FOO" should match
+ // the 2nd & 3rd pages, but we should only get the 3rd one because of the one
+ // page max restriction.
+ options.max_count = 1;
+ QueryHistory("FOO", options, &results);
+ EXPECT_EQ(1U, results.size());
+ EXPECT_TRUE(NthResultIs(results, 0, 3));
+}
+
+// Tests that FTS queries can find URLs when they exist only in the archived
+// database. This also tests that imported URLs can be found, since we use
+// AddPageWithDetails just like the importer.
+TEST_F(HistoryQueryTest, FTSArchived) {
+ ASSERT_TRUE(history_.get());
+
+ std::vector<URLRow> urls_to_add;
+
+ URLRow row1(GURL("http://foo.bar/"));
+ row1.set_title(UTF8ToUTF16("archived title"));
+ row1.set_last_visit(Time::Now() - TimeDelta::FromDays(365));
+ urls_to_add.push_back(row1);
+
+ URLRow row2(GURL("http://foo.bar/"));
+ row2.set_title(UTF8ToUTF16("nonarchived title"));
+ row2.set_last_visit(Time::Now());
+ urls_to_add.push_back(row2);
+
+ history_->AddPagesWithDetails(urls_to_add);
+
+ QueryOptions options;
+ QueryResults results;
+
+ // Query all time. The title we get should be the one in the full text
+ // database and not the most current title (since otherwise highlighting in
+ // the title might be wrong).
+ QueryHistory("archived", options, &results);
+ ASSERT_EQ(1U, results.size());
+ EXPECT_TRUE(row1.url() == results[0].url());
+ EXPECT_TRUE(row1.title() == results[0].title());
+}
+
+/* TODO(brettw) re-enable this. It is commented out because the current history
+ code prohibits adding more than one indexed page with the same URL. When we
+ have tiered history, there could be a dupe in the archived history which
+ won't get picked up by the deletor and it can happen again. When this is the
+ case, we should fix this test to duplicate that situation.
+
+// Tests duplicate collapsing and not in Full Text Search situations.
+TEST_F(HistoryQueryTest, FTSDupes) {
+ ASSERT_TRUE(history_.get());
+
+ QueryOptions options;
+ QueryResults results;
+
+ QueryHistory("Other", options, &results);
+ EXPECT_EQ(1, results.urls().size());
+ EXPECT_TRUE(NthResultIs(results, 0, 4));
+}
+*/
+
+} // namespace history
diff --git a/chrome/browser/history/history_types.cc b/chrome/browser/history/history_types.cc
new file mode 100644
index 0000000..50395aa
--- /dev/null
+++ b/chrome/browser/history/history_types.cc
@@ -0,0 +1,240 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/history_types.h"
+
+#include <limits>
+
+#include "base/logging.h"
+#include "base/stl_util-inl.h"
+
+using base::Time;
+
+namespace history {
+
+// URLRow ----------------------------------------------------------------------
+
+void URLRow::Swap(URLRow* other) {
+ std::swap(id_, other->id_);
+ url_.Swap(&other->url_);
+ title_.swap(other->title_);
+ std::swap(visit_count_, other->visit_count_);
+ std::swap(typed_count_, other->typed_count_);
+ std::swap(last_visit_, other->last_visit_);
+ std::swap(hidden_, other->hidden_);
+ std::swap(favicon_id_, other->favicon_id_);
+}
+
+void URLRow::Initialize() {
+ id_ = 0;
+ visit_count_ = false;
+ typed_count_ = false;
+ last_visit_ = Time();
+ hidden_ = false;
+ favicon_id_ = 0;
+}
+
+// VisitRow --------------------------------------------------------------------
+
+VisitRow::VisitRow()
+ : visit_id(0),
+ url_id(0),
+ referring_visit(0),
+ transition(PageTransition::LINK),
+ segment_id(0),
+ is_indexed(false) {
+}
+
+VisitRow::VisitRow(URLID arg_url_id,
+ Time arg_visit_time,
+ VisitID arg_referring_visit,
+ PageTransition::Type arg_transition,
+ SegmentID arg_segment_id)
+ : visit_id(0),
+ url_id(arg_url_id),
+ visit_time(arg_visit_time),
+ referring_visit(arg_referring_visit),
+ transition(arg_transition),
+ segment_id(arg_segment_id),
+ is_indexed(false) {
+}
+
+// StarredEntry ----------------------------------------------------------------
+
+StarredEntry::StarredEntry()
+ : id(0),
+ parent_group_id(0),
+ group_id(0),
+ visual_order(0),
+ type(URL),
+ url_id(0) {
+}
+
+void StarredEntry::Swap(StarredEntry* other) {
+ std::swap(id, other->id);
+ title.swap(other->title);
+ std::swap(date_added, other->date_added);
+ std::swap(parent_group_id, other->parent_group_id);
+ std::swap(group_id, other->group_id);
+ std::swap(visual_order, other->visual_order);
+ std::swap(type, other->type);
+ url.Swap(&other->url);
+ std::swap(url_id, other->url_id);
+ std::swap(date_group_modified, other->date_group_modified);
+}
+
+// URLResult -------------------------------------------------------------------
+
+void URLResult::Swap(URLResult* other) {
+ URLRow::Swap(other);
+ std::swap(visit_time_, other->visit_time_);
+ snippet_.Swap(&other->snippet_);
+ title_match_positions_.swap(other->title_match_positions_);
+}
+
+// QueryResults ----------------------------------------------------------------
+
+QueryResults::QueryResults() : reached_beginning_(false) {
+}
+
+QueryResults::~QueryResults() {
+ // Free all the URL objects.
+ STLDeleteContainerPointers(results_.begin(), results_.end());
+}
+
+const size_t* QueryResults::MatchesForURL(const GURL& url,
+ size_t* num_matches) const {
+ URLToResultIndices::const_iterator found = url_to_results_.find(url);
+ if (found == url_to_results_.end()) {
+ if (num_matches)
+ *num_matches = 0;
+ return NULL;
+ }
+
+ // All entries in the map should have at least one index, otherwise it
+ // shouldn't be in the map.
+ DCHECK(found->second->size() > 0);
+ if (num_matches)
+ *num_matches = found->second->size();
+ return &found->second->front();
+}
+
+void QueryResults::Swap(QueryResults* other) {
+ std::swap(first_time_searched_, other->first_time_searched_);
+ std::swap(reached_beginning_, other->reached_beginning_);
+ results_.swap(other->results_);
+ url_to_results_.swap(other->url_to_results_);
+}
+
+void QueryResults::AppendURLBySwapping(URLResult* result) {
+ URLResult* new_result = new URLResult;
+ new_result->Swap(result);
+
+ results_.push_back(new_result);
+ AddURLUsageAtIndex(new_result->url(), results_.size() - 1);
+}
+
+void QueryResults::AppendResultsBySwapping(QueryResults* other,
+ bool remove_dupes) {
+ if (remove_dupes) {
+ // Delete all entries in the other array that are already in this one.
+ for (size_t i = 0; i < results_.size(); i++)
+ other->DeleteURL(results_[i]->url());
+ }
+
+ if (first_time_searched_ > other->first_time_searched_)
+ std::swap(first_time_searched_, other->first_time_searched_);
+
+ if (reached_beginning_ != other->reached_beginning_)
+ std::swap(reached_beginning_, other->reached_beginning_);
+
+ for (size_t i = 0; i < other->results_.size(); i++) {
+ // Just transfer pointer ownership.
+ results_.push_back(other->results_[i]);
+ AddURLUsageAtIndex(results_.back()->url(), results_.size() - 1);
+ }
+
+ // We just took ownership of all the results in the input vector.
+ other->results_.clear();
+ other->url_to_results_.clear();
+}
+
+void QueryResults::DeleteURL(const GURL& url) {
+ // Delete all instances of this URL. We re-query each time since each
+ // mutation will cause the indices to change.
+ while (const size_t* match_indices = MatchesForURL(url, NULL))
+ DeleteRange(*match_indices, *match_indices);
+}
+
+void QueryResults::DeleteRange(size_t begin, size_t end) {
+ DCHECK(begin <= end && begin < size() && end < size());
+
+ // First delete the pointers in the given range and store all the URLs that
+ // were modified. We will delete references to these later.
+ std::set<GURL> urls_modified;
+ for (size_t i = begin; i <= end; i++) {
+ urls_modified.insert(results_[i]->url());
+ delete results_[i];
+ results_[i] = NULL;
+ }
+
+ // Now just delete that range in the vector en masse (the STL ending is
+ // exclusive, while ours is inclusive, hence the +1).
+ results_.erase(results_.begin() + begin, results_.begin() + end + 1);
+
+ // Delete the indicies referencing the deleted entries.
+ for (std::set<GURL>::const_iterator url = urls_modified.begin();
+ url != urls_modified.end(); ++url) {
+ URLToResultIndices::iterator found = url_to_results_.find(*url);
+ if (found == url_to_results_.end()) {
+ NOTREACHED();
+ continue;
+ }
+
+ // Need a signed loop type since we do -- which may take us to -1.
+ for (int match = 0; match < static_cast<int>(found->second->size());
+ match++) {
+ if (found->second[match] >= begin && found->second[match] <= end) {
+ // Remove this referece from the list.
+ found->second->erase(found->second->begin() + match);
+ match--;
+ }
+ }
+
+ // Clear out an empty lists if we just made one.
+ if (found->second->empty())
+ url_to_results_.erase(found);
+ }
+
+ // Shift all other indices over to account for the removed ones.
+ AdjustResultMap(end + 1, std::numeric_limits<size_t>::max(),
+ -static_cast<ptrdiff_t>(end - begin + 1));
+}
+
+void QueryResults::AddURLUsageAtIndex(const GURL& url, size_t index) {
+ URLToResultIndices::iterator found = url_to_results_.find(url);
+ if (found != url_to_results_.end()) {
+ // The URL is already in the list, so we can just append the new index.
+ found->second->push_back(index);
+ return;
+ }
+
+ // Need to add a new entry for this URL.
+ StackVector<size_t, 4> new_list;
+ new_list->push_back(index);
+ url_to_results_[url] = new_list;
+}
+
+void QueryResults::AdjustResultMap(size_t begin, size_t end, ptrdiff_t delta) {
+ for (URLToResultIndices::iterator i = url_to_results_.begin();
+ i != url_to_results_.end(); ++i) {
+ for (size_t match = 0; match < i->second->size(); match++) {
+ size_t match_index = i->second[match];
+ if (match_index >= begin && match_index <= end)
+ i->second[match] += delta;
+ }
+ }
+}
+
+} // namespace history
diff --git a/chrome/browser/history/history_types.h b/chrome/browser/history/history_types.h
new file mode 100644
index 0000000..f7bc7fb
--- /dev/null
+++ b/chrome/browser/history/history_types.h
@@ -0,0 +1,532 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
+#define CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/stack_container.h"
+#include "base/string16.h"
+#include "base/time.h"
+#include "chrome/browser/history/snippet.h"
+#include "chrome/common/page_transition_types.h"
+#include "chrome/common/ref_counted_util.h"
+#include "googleurl/src/gurl.h"
+
+namespace history {
+
+// Forward declaration for friend statements.
+class HistoryBackend;
+class URLDatabase;
+
+// Structure to hold redirect lists for URLs. For a redirect chain
+// A -> B -> C, and entry in the map would look like "A => {B -> C}".
+typedef std::map<GURL, scoped_refptr<RefCountedVector<GURL> > > RedirectMap;
+
+// Container for a list of URLs.
+typedef std::vector<GURL> RedirectList;
+
+typedef int64 StarID; // Unique identifier for star entries.
+typedef int64 UIStarID; // Identifier for star entries that come from the UI.
+typedef int64 DownloadID; // Identifier for a download.
+typedef int64 FavIconID; // For FavIcons.
+typedef int64 SegmentID; // URL segments for the most visited view.
+
+// URLRow ---------------------------------------------------------------------
+
+typedef int64 URLID;
+
+// Holds all information globally associated with one URL (one row in the
+// URL table).
+//
+// This keeps track of dirty bits, which are currently unused:
+//
+// TODO(brettw) the dirty bits are broken in a number of respects. First, the
+// database will want to update them on a const object, so they need to be
+// mutable.
+//
+// Second, there is a problem copying. If you make a copy of this structure
+// (as we allow since we put this into vectors in various places) then the
+// dirty bits will not be in sync for these copies.
+class URLRow {
+ public:
+ URLRow() {
+ Initialize();
+ }
+
+ explicit URLRow(const GURL& url) : url_(url) {
+ // Initialize will not set the URL, so our initialization above will stay.
+ Initialize();
+ }
+
+ // We need to be able to set the id of a URLRow that's being passed through
+ // an IPC message. This constructor should probably not be used otherwise.
+ URLRow(const GURL& url, URLID id) : url_(url) {
+ // Initialize will not set the URL, so our initialization above will stay.
+ Initialize();
+ // Initialize will zero the id_, so set it here.
+ id_ = id;
+ }
+
+ virtual ~URLRow() {}
+
+ URLID id() const { return id_; }
+ const GURL& url() const { return url_; }
+
+ const string16& title() const {
+ return title_;
+ }
+ void set_title(const string16& title) {
+ // The title is frequently set to the same thing, so we don't bother
+ // updating unless the string has changed.
+ if (title != title_) {
+ title_ = title;
+ }
+ }
+
+ int visit_count() const {
+ return visit_count_;
+ }
+ void set_visit_count(int visit_count) {
+ visit_count_ = visit_count;
+ }
+
+ // Number of times the URL was typed in the Omnibox.
+ int typed_count() const {
+ return typed_count_;
+ }
+ void set_typed_count(int typed_count) {
+ typed_count_ = typed_count;
+ }
+
+ base::Time last_visit() const {
+ return last_visit_;
+ }
+ void set_last_visit(base::Time last_visit) {
+ last_visit_ = last_visit;
+ }
+
+ // If this is set, we won't autocomplete this URL.
+ bool hidden() const {
+ return hidden_;
+ }
+ void set_hidden(bool hidden) {
+ hidden_ = hidden;
+ }
+
+ // ID of the favicon. A value of 0 means the favicon isn't known yet.
+ FavIconID favicon_id() const { return favicon_id_; }
+ void set_favicon_id(FavIconID favicon_id) {
+ favicon_id_ = favicon_id;
+ }
+
+ // Swaps the contents of this URLRow with another, which allows it to be
+ // destructively copied without memory allocations.
+ // (Virtual because it's overridden by URLResult.)
+ virtual void Swap(URLRow* other);
+
+ private:
+ // This class writes directly into this structure and clears our dirty bits
+ // when reading out of the DB.
+ friend class URLDatabase;
+ friend class HistoryBackend;
+
+ // Initializes all values that need initialization to their defaults.
+ // This excludes objects which autoinitialize such as strings.
+ void Initialize();
+
+ // The row ID of this URL. Immutable except for the database which sets it
+ // when it pulls them out.
+ URLID id_;
+
+ // The URL of this row. Immutable except for the database which sets it
+ // when it pulls them out. If clients want to change it, they must use
+ // the constructor to make a new one.
+ GURL url_;
+
+ string16 title_;
+
+ // Total number of times this URL has been visited.
+ int visit_count_;
+
+ // Number of times this URL has been manually entered in the URL bar.
+ int typed_count_;
+
+ // The date of the last visit of this URL, which saves us from having to
+ // loop up in the visit table for things like autocomplete and expiration.
+ base::Time last_visit_;
+
+ // Indicates this entry should now be shown in typical UI or queries, this
+ // is usually for subframes.
+ bool hidden_;
+
+ // The ID of the favicon for this url.
+ FavIconID favicon_id_;
+
+ // We support the implicit copy constuctor and operator=.
+};
+
+// VisitRow -------------------------------------------------------------------
+
+typedef int64 VisitID;
+
+// Holds all information associated with a specific visit. A visit holds time
+// and referrer information for one time a URL is visited.
+class VisitRow {
+ public:
+ VisitRow();
+ VisitRow(URLID arg_url_id,
+ base::Time arg_visit_time,
+ VisitID arg_referring_visit,
+ PageTransition::Type arg_transition,
+ SegmentID arg_segment_id);
+
+ // ID of this row (visit ID, used a a referrer for other visits).
+ VisitID visit_id;
+
+ // Row ID into the URL table of the URL that this page is.
+ URLID url_id;
+
+ base::Time visit_time;
+
+ // Indicates another visit that was the referring page for this one.
+ // 0 indicates no referrer.
+ VisitID referring_visit;
+
+ // A combination of bits from PageTransition.
+ PageTransition::Type transition;
+
+ // The segment id (see visitsegment_database.*).
+ // If 0, the segment id is null in the table.
+ SegmentID segment_id;
+
+ // True when this visit has indexed data for it. We try to keep this in sync
+ // with the full text index: when we add or remove things from there, we will
+ // update the visit table as well. However, that file could get deleted, or
+ // out of sync in various ways, so this flag should be false when things
+ // change.
+ bool is_indexed;
+
+ // Compares two visits based on dates, for sorting.
+ bool operator<(const VisitRow& other) {
+ return visit_time < other.visit_time;
+ }
+
+ // We allow the implicit copy constuctor and operator=.
+};
+
+// We pass around vectors of visits a lot
+typedef std::vector<VisitRow> VisitVector;
+
+// Favicons -------------------------------------------------------------------
+
+// Used by the importer to set favicons for imported bookmarks.
+struct ImportedFavIconUsage {
+ // The URL of the favicon.
+ GURL favicon_url;
+
+ // The raw png-encoded data.
+ std::vector<unsigned char> png_data;
+
+ // The list of URLs using this favicon.
+ std::set<GURL> urls;
+};
+
+// PageVisit ------------------------------------------------------------------
+
+// Represents a simplified version of a visit for external users. Normally,
+// views are only interested in the time, and not the other information
+// associated with a VisitRow.
+struct PageVisit {
+ URLID page_id;
+ base::Time visit_time;
+};
+
+// StarredEntry ---------------------------------------------------------------
+
+// StarredEntry represents either a starred page, or a star grouping (where
+// a star grouping consists of child starred entries). Use the type to
+// determine the type of a particular entry.
+//
+// The database internally uses the id field to uniquely identify a starred
+// entry. On the other hand, the UI, which is anything routed through
+// HistoryService and HistoryBackend (including BookmarkBarView), uses the
+// url field to uniquely identify starred entries of type URL and the group_id
+// field to uniquely identify starred entries of type USER_GROUP. For example,
+// HistoryService::UpdateStarredEntry identifies the entry by url (if the
+// type is URL) or group_id (if the type is not URL).
+struct StarredEntry {
+ enum Type {
+ // Type represents a starred URL (StarredEntry).
+ URL,
+
+ // The bookmark bar grouping.
+ BOOKMARK_BAR,
+
+ // User created group.
+ USER_GROUP,
+
+ // The "other bookmarks" folder that holds uncategorized bookmarks.
+ OTHER
+ };
+
+ StarredEntry();
+
+ void Swap(StarredEntry* other);
+
+ // Unique identifier of this entry.
+ StarID id;
+
+ // Title.
+ string16 title;
+
+ // When this was added.
+ base::Time date_added;
+
+ // Group ID of the star group this entry is in. If 0, this entry is not
+ // in a star group.
+ UIStarID parent_group_id;
+
+ // Unique identifier for groups. This is assigned by the UI.
+ //
+ // WARNING: this is NOT the same as id, id is assigned by the database,
+ // this is assigned by the UI. See note about StarredEntry for more info.
+ UIStarID group_id;
+
+ // Visual order within the parent. Only valid if group_id is not 0.
+ int visual_order;
+
+ // Type of this entry (see enum).
+ Type type;
+
+ // If type == URL, this is the URL of the page that was starred.
+ GURL url;
+
+ // If type == URL, this is the ID of the URL of the primary page that was
+ // starred.
+ history::URLID url_id;
+
+ // Time the entry was last modified. This is only used for groups and
+ // indicates the last time a URL was added as a child to the group.
+ base::Time date_group_modified;
+};
+
+// URLResult -------------------------------------------------------------------
+
+class URLResult : public URLRow {
+ public:
+ URLResult() {}
+ URLResult(const GURL& url, base::Time visit_time)
+ : URLRow(url),
+ visit_time_(visit_time) {
+ }
+ // Constructor that create a URLResult from the specified URL and title match
+ // positions from title_matches.
+ URLResult(const GURL& url, const Snippet::MatchPositions& title_matches)
+ : URLRow(url) {
+ title_match_positions_ = title_matches;
+ }
+
+ base::Time visit_time() const { return visit_time_; }
+ void set_visit_time(base::Time visit_time) { visit_time_ = visit_time; }
+
+ const Snippet& snippet() const { return snippet_; }
+
+ // If this is a title match, title_match_positions contains an entry for
+ // every word in the title that matched one of the query parameters. Each
+ // entry contains the start and end of the match.
+ const Snippet::MatchPositions& title_match_positions() const {
+ return title_match_positions_;
+ }
+
+ virtual void Swap(URLResult* other);
+
+ private:
+ friend class HistoryBackend;
+
+ // The time that this result corresponds to.
+ base::Time visit_time_;
+
+ // These values are typically set by HistoryBackend.
+ Snippet snippet_;
+ Snippet::MatchPositions title_match_positions_;
+
+ // We support the implicit copy constructor and operator=.
+};
+
+// QueryResults ----------------------------------------------------------------
+
+// Encapsulates the results of a history query. It supports an ordered list of
+// URLResult objects, plus an efficient way of looking up the index of each time
+// a given URL appears in those results.
+class QueryResults {
+ public:
+ typedef std::vector<URLResult*> URLResultVector;
+
+ QueryResults();
+ ~QueryResults();
+
+ // Indicates the first time that the query includes results for (queries are
+ // clipped at the beginning, so it will always include to the end of the time
+ // queried).
+ //
+ // If the number of results was clipped as a result of the max count, this
+ // will be the time of the first query returned. If there were fewer results
+ // than we were allowed to return, this represents the first date considered
+ // in the query (this will be before the first result if there was time
+ // queried with no results).
+ //
+ // TODO(brettw): bug 1203054: This field is not currently set properly! Do
+ // not use until the bug is fixed.
+ base::Time first_time_searched() const { return first_time_searched_; }
+ void set_first_time_searched(base::Time t) { first_time_searched_ = t; }
+ // Note: If you need end_time_searched, it can be added.
+
+ void set_reached_beginning(bool reached) { reached_beginning_ = reached; }
+ bool reached_beginning() { return reached_beginning_; }
+
+ size_t size() const { return results_.size(); }
+ bool empty() const { return results_.empty(); }
+
+ URLResult& operator[](size_t i) { return *results_[i]; }
+ const URLResult& operator[](size_t i) const { return *results_[i]; }
+
+ URLResultVector::const_iterator begin() const { return results_.begin(); }
+ URLResultVector::const_iterator end() const { return results_.end(); }
+ URLResultVector::const_reverse_iterator rbegin() const {
+ return results_.rbegin();
+ }
+ URLResultVector::const_reverse_iterator rend() const {
+ return results_.rend();
+ }
+
+ // Returns a pointer to the beginning of an array of all matching indices
+ // for entries with the given URL. The array will be |*num_matches| long.
+ // |num_matches| can be NULL if the caller is not interested in the number of
+ // results (commonly it will only be interested in the first one and can test
+ // the pointer for NULL).
+ //
+ // When there is no match, it will return NULL and |*num_matches| will be 0.
+ const size_t* MatchesForURL(const GURL& url, size_t* num_matches) const;
+
+ // Swaps the current result with another. This allows ownership to be
+ // efficiently transferred without copying.
+ void Swap(QueryResults* other);
+
+ // Adds the given result to the map, using swap() on the members to avoid
+ // copying (there are a lot of strings and vectors). This means the parameter
+ // object will be cleared after this call.
+ void AppendURLBySwapping(URLResult* result);
+
+ // Appends a new result set to the other. The |other| results will be
+ // destroyed because the pointer ownership will just be transferred. When
+ // |remove_dupes| is set, each URL that appears in this array will be removed
+ // from the |other| array before appending.
+ void AppendResultsBySwapping(QueryResults* other, bool remove_dupes);
+
+ // Removes all instances of the given URL from the result set.
+ void DeleteURL(const GURL& url);
+
+ // Deletes the given range of items in the result set.
+ void DeleteRange(size_t begin, size_t end);
+
+ private:
+ // Maps the given URL to a list of indices into results_ which identify each
+ // time an entry with that URL appears. Normally, each URL will have one or
+ // very few indices after it, so we optimize this to use statically allocated
+ // memory when possible.
+ typedef std::map<GURL, StackVector<size_t, 4> > URLToResultIndices;
+
+ // Inserts an entry into the |url_to_results_| map saying that the given URL
+ // is at the given index in the results_.
+ void AddURLUsageAtIndex(const GURL& url, size_t index);
+
+ // Adds |delta| to each index in url_to_results_ in the range [begin,end]
+ // (this is inclusive). This is used when inserting or deleting.
+ void AdjustResultMap(size_t begin, size_t end, ptrdiff_t delta);
+
+ base::Time first_time_searched_;
+
+ // Whether the query reaches the beginning of the database.
+ bool reached_beginning_;
+
+ // The ordered list of results. The pointers inside this are owned by this
+ // QueryResults object.
+ URLResultVector results_;
+
+ // Maps URLs to entries in results_.
+ URLToResultIndices url_to_results_;
+
+ DISALLOW_COPY_AND_ASSIGN(QueryResults);
+};
+
+// QueryOptions ----------------------------------------------------------------
+
+struct QueryOptions {
+ QueryOptions() : max_count(0) {}
+
+ // The time range to search for matches in.
+ //
+ // This will match only the one recent visit of a URL. For text search
+ // queries, if the URL was visited in the given time period, but has also been
+ // visited more recently than that, it will not be returned. When the text
+ // query is empty, this will return the most recent visit within the time
+ // range.
+ //
+ // As a special case, if both times are is_null(), then the entire database
+ // will be searched. However, if you set one, you must set the other.
+ //
+ // The beginning is inclusive and the ending is exclusive.
+ base::Time begin_time;
+ base::Time end_time;
+
+ // Sets the query time to the last |days_ago| days to the present time.
+ void SetRecentDayRange(int days_ago) {
+ end_time = base::Time::Now();
+ begin_time = end_time - base::TimeDelta::FromDays(days_ago);
+ }
+
+ // The maximum number of results to return. The results will be sorted with
+ // the most recent first, so older results may not be returned if there is not
+ // enough room. When 0, this will return everything (the default).
+ int max_count;
+};
+
+// KeywordSearchTermVisit -----------------------------------------------------
+
+// KeywordSearchTermVisit is returned from GetMostRecentKeywordSearchTerms. It
+// gives the time and search term of the keyword visit.
+struct KeywordSearchTermVisit {
+ // The time of the visit.
+ base::Time time;
+
+ // The search term that was used.
+ string16 term;
+};
+
+// MostVisitedURL --------------------------------------------------------------
+
+// Holds the per-URL information of the most visited query.
+struct MostVisitedURL {
+ GURL url;
+ GURL favicon_url;
+ string16 title;
+
+ RedirectList redirects;
+
+ bool operator==(const MostVisitedURL& other) {
+ return url == other.url;
+ }
+};
+
+typedef std::vector<MostVisitedURL> MostVisitedURLList;
+
+} // history
+
+#endif // CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
diff --git a/chrome/browser/history/history_types_unittest.cc b/chrome/browser/history/history_types_unittest.cc
new file mode 100644
index 0000000..5e14de5
--- /dev/null
+++ b/chrome/browser/history/history_types_unittest.cc
@@ -0,0 +1,171 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/history_types.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using base::Time;
+
+namespace history {
+
+namespace {
+
+// Validates the consistency of the given history result. We just make sure
+// that the URL rows match the indices structure. The unit tests themselves
+// test the index structure to verify things are in the right order, so we
+// don't need to.
+void CheckHistoryResultConsistency(const QueryResults& result) {
+ for (size_t i = 0; i < result.size(); i++) {
+ size_t match_count;
+ const size_t* matches = result.MatchesForURL(result[i].url(), &match_count);
+
+ bool found = false;
+ for (size_t match = 0; match < match_count; match++) {
+ if (matches[match] == i) {
+ found = true;
+ break;
+ }
+ }
+
+ EXPECT_TRUE(found) << "The URL had no index referring to it.";
+ }
+}
+
+static const char kURL1[] = "http://www.google.com/";
+static const char kURL2[] = "http://news.google.com/";
+static const char kURL3[] = "http://images.google.com/";
+
+// Adds kURL1 twice and kURL2 once.
+void AddSimpleData(QueryResults* results) {
+ GURL url1(kURL1);
+ GURL url2(kURL2);
+ URLResult result1(url1, Time::Now());
+ URLResult result2(url1, Time::Now());
+ URLResult result3(url2, Time::Now());
+
+ // The URLResults are invalid after being inserted.
+ results->AppendURLBySwapping(&result1);
+ results->AppendURLBySwapping(&result2);
+ results->AppendURLBySwapping(&result3);
+ CheckHistoryResultConsistency(*results);
+}
+
+// Adds kURL2 once and kURL3 once.
+void AddAlternateData(QueryResults* results) {
+ GURL url2(kURL2);
+ GURL url3(kURL3);
+ URLResult result1(url2, Time::Now());
+ URLResult result2(url3, Time::Now());
+
+ // The URLResults are invalid after being inserted.
+ results->AppendURLBySwapping(&result1);
+ results->AppendURLBySwapping(&result2);
+ CheckHistoryResultConsistency(*results);
+}
+
+} // namespace
+
+// Tests insertion and deletion by range.
+TEST(HistoryQueryResult, DeleteRange) {
+ GURL url1(kURL1);
+ GURL url2(kURL2);
+ QueryResults results;
+ AddSimpleData(&results);
+
+ // Make sure the first URL is in there twice. The indices can be in either
+ // order.
+ size_t match_count;
+ const size_t* matches = results.MatchesForURL(url1, &match_count);
+ ASSERT_EQ(2U, match_count);
+ EXPECT_TRUE((matches[0] == 0 && matches[1] == 1) ||
+ (matches[0] == 1 && matches[1] == 0));
+
+ // Check the second one.
+ matches = results.MatchesForURL(url2, &match_count);
+ ASSERT_EQ(1U, match_count);
+ EXPECT_TRUE(matches[0] == 2);
+
+ // Delete the first instance of the first URL.
+ results.DeleteRange(0, 0);
+ CheckHistoryResultConsistency(results);
+
+ // Check the two URLs.
+ matches = results.MatchesForURL(url1, &match_count);
+ ASSERT_EQ(1U, match_count);
+ EXPECT_TRUE(matches[0] == 0);
+ matches = results.MatchesForURL(url2, &match_count);
+ ASSERT_EQ(1U, match_count);
+ EXPECT_TRUE(matches[0] == 1);
+
+ // Now delete everything and make sure it's deleted.
+ results.DeleteRange(0, 1);
+ EXPECT_EQ(0U, results.size());
+ EXPECT_FALSE(results.MatchesForURL(url1, NULL));
+ EXPECT_FALSE(results.MatchesForURL(url2, NULL));
+}
+
+// Tests insertion and deletion by URL.
+TEST(HistoryQueryResult, ResultDeleteURL) {
+ GURL url1(kURL1);
+ GURL url2(kURL2);
+ QueryResults results;
+ AddSimpleData(&results);
+
+ // Delete the first URL.
+ results.DeleteURL(url1);
+ CheckHistoryResultConsistency(results);
+ EXPECT_EQ(1U, results.size());
+
+ // The first one should be gone, and the second one should be at [0].
+ size_t match_count;
+ EXPECT_FALSE(results.MatchesForURL(url1, NULL));
+ const size_t* matches = results.MatchesForURL(url2, &match_count);
+ ASSERT_EQ(1U, match_count);
+ EXPECT_TRUE(matches[0] == 0);
+
+ // Delete the second URL, there should be nothing left.
+ results.DeleteURL(url2);
+ EXPECT_EQ(0U, results.size());
+ EXPECT_FALSE(results.MatchesForURL(url2, NULL));
+}
+
+TEST(HistoryQueryResult, AppendResults) {
+ GURL url1(kURL1);
+ GURL url2(kURL2);
+ GURL url3(kURL3);
+
+ // This is the base.
+ QueryResults results;
+ AddSimpleData(&results);
+
+ // Now create the appendee.
+ QueryResults appendee;
+ AddAlternateData(&appendee);
+
+ results.AppendResultsBySwapping(&appendee, true);
+ CheckHistoryResultConsistency(results);
+
+ // There should be 3 results, the second one of the appendee should be
+ // deleted because it was already in the first one and we said remove dupes.
+ ASSERT_EQ(4U, results.size());
+
+ // The first URL should be unchanged in the first two spots.
+ size_t match_count;
+ const size_t* matches = results.MatchesForURL(url1, &match_count);
+ ASSERT_EQ(2U, match_count);
+ EXPECT_TRUE((matches[0] == 0 && matches[1] == 1) ||
+ (matches[0] == 1 && matches[1] == 0));
+
+ // The second URL should be there once after that
+ matches = results.MatchesForURL(url2, &match_count);
+ ASSERT_EQ(1U, match_count);
+ EXPECT_TRUE(matches[0] == 2);
+
+ // The third one should be after that.
+ matches = results.MatchesForURL(url3, &match_count);
+ ASSERT_EQ(1U, match_count);
+ EXPECT_TRUE(matches[0] == 3);
+}
+
+} // namespace
diff --git a/chrome/browser/history/history_unittest.cc b/chrome/browser/history/history_unittest.cc
new file mode 100644
index 0000000..c8db05a
--- /dev/null
+++ b/chrome/browser/history/history_unittest.cc
@@ -0,0 +1,959 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// History unit tests come in two flavors:
+//
+// 1. The more complicated style is that the unit test creates a full history
+// service. This spawns a background thread for the history backend, and
+// all communication is asynchronous. This is useful for testing more
+// complicated things or end-to-end behavior.
+//
+// 2. The simpler style is to create a history backend on this thread and
+// access it directly without a HistoryService object. This is much simpler
+// because communication is synchronous. Generally, sets should go through
+// the history backend (since there is a lot of logic) but gets can come
+// directly from the HistoryDatabase. This is because the backend generally
+// has no logic in the getter except threading stuff, which we don't want
+// to run.
+
+#include <time.h>
+#include <algorithm>
+
+#include "app/sql/connection.h"
+#include "app/sql/statement.h"
+#include "base/basictypes.h"
+#include "base/callback.h"
+#include "base/file_path.h"
+#include "base/file_util.h"
+#include "base/message_loop.h"
+#include "base/path_service.h"
+#include "base/scoped_vector.h"
+#include "base/string_util.h"
+#include "base/task.h"
+#include "chrome/browser/browser_process.h"
+#include "chrome/browser/download/download_item.h"
+#include "chrome/browser/history/history.h"
+#include "chrome/browser/history/history_backend.h"
+#include "chrome/browser/history/history_database.h"
+#include "chrome/browser/history/history_notifications.h"
+#include "chrome/browser/history/in_memory_database.h"
+#include "chrome/browser/history/in_memory_history_backend.h"
+#include "chrome/browser/history/page_usage_data.h"
+#include "chrome/common/chrome_paths.h"
+#include "chrome/common/notification_service.h"
+#include "chrome/common/thumbnail_score.h"
+#include "chrome/tools/profiles/thumbnail-inl.h"
+#include "gfx/codec/jpeg_codec.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/skia/include/core/SkBitmap.h"
+
+using base::Time;
+using base::TimeDelta;
+
+namespace history {
+class HistoryTest;
+}
+
+// Specialize RunnableMethodTraits for HistoryTest so we can create callbacks.
+// None of these callbacks can outlast the test, so there is not need to retain
+// the HistoryTest object.
+DISABLE_RUNNABLE_METHOD_REFCOUNT(history::HistoryTest);
+
+namespace history {
+
+namespace {
+
+// Compares the two data values. Used for comparing thumbnail data.
+bool DataEqual(const unsigned char* reference, size_t reference_len,
+ const std::vector<unsigned char>& data) {
+ if (reference_len != data.size())
+ return false;
+ for (size_t i = 0; i < reference_len; i++) {
+ if (data[i] != reference[i])
+ return false;
+ }
+ return true;
+}
+
+// The tracker uses RenderProcessHost pointers for scoping but never
+// dereferences them. We use ints because it's easier. This function converts
+// between the two.
+static void* MakeFakeHost(int id) {
+ void* host = 0;
+ memcpy(&host, &id, sizeof(id));
+ return host;
+}
+
+} // namespace
+
+// Delegate class for when we create a backend without a HistoryService.
+class BackendDelegate : public HistoryBackend::Delegate {
+ public:
+ explicit BackendDelegate(HistoryTest* history_test)
+ : history_test_(history_test) {
+ }
+
+ virtual void NotifyProfileError(int message_id);
+ virtual void SetInMemoryBackend(InMemoryHistoryBackend* backend);
+ virtual void BroadcastNotifications(NotificationType type,
+ HistoryDetails* details);
+ virtual void DBLoaded() {}
+ virtual void StartTopSitesMigration() {}
+ private:
+ HistoryTest* history_test_;
+};
+
+// This must be outside the anonymous namespace for the friend statement in
+// HistoryBackend to work.
+class HistoryTest : public testing::Test {
+ public:
+ HistoryTest()
+ : history_service_(NULL),
+ got_thumbnail_callback_(false),
+ redirect_query_success_(false),
+ query_url_success_(false),
+ db_(NULL) {
+ }
+ ~HistoryTest() {
+ }
+
+ // Thumbnail callback: we save the data and exit the message loop so the
+ // unit test can read the data
+ void OnThumbnailDataAvailable(
+ HistoryService::Handle request_handle,
+ scoped_refptr<RefCountedBytes> jpeg_data) {
+ got_thumbnail_callback_ = true;
+ if (jpeg_data.get()) {
+ std::copy(jpeg_data->data.begin(), jpeg_data->data.end(),
+ std::back_inserter(thumbnail_data_));
+ }
+ MessageLoop::current()->Quit();
+ }
+
+ // Creates the HistoryBackend and HistoryDatabase on the current thread,
+ // assigning the values to backend_ and db_.
+ void CreateBackendAndDatabase() {
+ backend_ =
+ new HistoryBackend(history_dir_, new BackendDelegate(this), NULL);
+ backend_->Init(false);
+ db_ = backend_->db_.get();
+ DCHECK(in_mem_backend_.get()) << "Mem backend should have been set by "
+ "HistoryBackend::Init";
+ }
+
+ void OnSegmentUsageAvailable(CancelableRequestProvider::Handle handle,
+ std::vector<PageUsageData*>* data) {
+ page_usage_data_->swap(*data);
+ MessageLoop::current()->Quit();
+ }
+
+ void OnDeleteURLsDone(CancelableRequestProvider::Handle handle) {
+ MessageLoop::current()->Quit();
+ }
+
+ void OnMostVisitedURLsAvailable(CancelableRequestProvider::Handle handle,
+ MostVisitedURLList url_list) {
+ most_visited_urls_.swap(url_list);
+ MessageLoop::current()->Quit();
+ }
+
+ protected:
+ friend class BackendDelegate;
+
+ // testing::Test
+ virtual void SetUp() {
+ FilePath temp_dir;
+ PathService::Get(base::DIR_TEMP, &temp_dir);
+ history_dir_ = temp_dir.AppendASCII("HistoryTest");
+ file_util::Delete(history_dir_, true);
+ file_util::CreateDirectory(history_dir_);
+ }
+
+ void DeleteBackend() {
+ if (backend_) {
+ backend_->Closing();
+ backend_ = NULL;
+ }
+ }
+
+ virtual void TearDown() {
+ DeleteBackend();
+
+ if (history_service_)
+ CleanupHistoryService();
+
+ // Try to clean up the database file.
+ file_util::Delete(history_dir_, true);
+
+ // Make sure we don't have any event pending that could disrupt the next
+ // test.
+ MessageLoop::current()->PostTask(FROM_HERE, new MessageLoop::QuitTask);
+ MessageLoop::current()->Run();
+ }
+
+ void CleanupHistoryService() {
+ DCHECK(history_service_.get());
+
+ history_service_->NotifyRenderProcessHostDestruction(0);
+ history_service_->SetOnBackendDestroyTask(new MessageLoop::QuitTask);
+ history_service_->Cleanup();
+ history_service_ = NULL;
+
+ // Wait for the backend class to terminate before deleting the files and
+ // moving to the next test. Note: if this never terminates, somebody is
+ // probably leaking a reference to the history backend, so it never calls
+ // our destroy task.
+ MessageLoop::current()->Run();
+ }
+
+ int64 AddDownload(int32 state, const Time& time) {
+ DownloadCreateInfo download(FilePath(FILE_PATH_LITERAL("foo-path")),
+ GURL("foo-url"), time, 0, 512, state, 0);
+ return db_->CreateDownload(download);
+ }
+
+ // Fills the query_url_row_ and query_url_visits_ structures with the
+ // information about the given URL and returns true. If the URL was not
+ // found, this will return false and those structures will not be changed.
+ bool QueryURL(HistoryService* history, const GURL& url) {
+ history->QueryURL(url, true, &consumer_,
+ NewCallback(this, &HistoryTest::SaveURLAndQuit));
+ MessageLoop::current()->Run(); // Will be exited in SaveURLAndQuit.
+ return query_url_success_;
+ }
+
+ // Callback for HistoryService::QueryURL.
+ void SaveURLAndQuit(HistoryService::Handle handle,
+ bool success,
+ const URLRow* url_row,
+ VisitVector* visit_vector) {
+ query_url_success_ = success;
+ if (query_url_success_) {
+ query_url_row_ = *url_row;
+ query_url_visits_.swap(*visit_vector);
+ } else {
+ query_url_row_ = URLRow();
+ query_url_visits_.clear();
+ }
+ MessageLoop::current()->Quit();
+ }
+
+ // Fills in saved_redirects_ with the redirect information for the given URL,
+ // returning true on success. False means the URL was not found.
+ bool QueryRedirectsFrom(HistoryService* history, const GURL& url) {
+ history->QueryRedirectsFrom(url, &consumer_,
+ NewCallback(this, &HistoryTest::OnRedirectQueryComplete));
+ MessageLoop::current()->Run(); // Will be exited in *QueryComplete.
+ return redirect_query_success_;
+ }
+
+ // Callback for QueryRedirects.
+ void OnRedirectQueryComplete(HistoryService::Handle handle,
+ GURL url,
+ bool success,
+ history::RedirectList* redirects) {
+ redirect_query_success_ = success;
+ if (redirect_query_success_)
+ saved_redirects_.swap(*redirects);
+ else
+ saved_redirects_.clear();
+ MessageLoop::current()->Quit();
+ }
+
+ MessageLoopForUI message_loop_;
+
+ // PageUsageData vector to test segments.
+ ScopedVector<PageUsageData> page_usage_data_;
+
+ MostVisitedURLList most_visited_urls_;
+
+ // When non-NULL, this will be deleted on tear down and we will block until
+ // the backend thread has completed. This allows tests for the history
+ // service to use this feature, but other tests to ignore this.
+ scoped_refptr<HistoryService> history_service_;
+
+ // names of the database files
+ FilePath history_dir_;
+
+ // Set by the thumbnail callback when we get data, you should be sure to
+ // clear this before issuing a thumbnail request.
+ bool got_thumbnail_callback_;
+ std::vector<unsigned char> thumbnail_data_;
+
+ // Set by the redirect callback when we get data. You should be sure to
+ // clear this before issuing a redirect request.
+ history::RedirectList saved_redirects_;
+ bool redirect_query_success_;
+
+ // For history requests.
+ CancelableRequestConsumer consumer_;
+
+ // For saving URL info after a call to QueryURL
+ bool query_url_success_;
+ URLRow query_url_row_;
+ VisitVector query_url_visits_;
+
+ // Created via CreateBackendAndDatabase.
+ scoped_refptr<HistoryBackend> backend_;
+ scoped_ptr<InMemoryHistoryBackend> in_mem_backend_;
+ HistoryDatabase* db_; // Cached reference to the backend's database.
+};
+
+void BackendDelegate::NotifyProfileError(int message_id) {
+}
+
+void BackendDelegate::SetInMemoryBackend(InMemoryHistoryBackend* backend) {
+ // Save the in-memory backend to the history test object, this happens
+ // synchronously, so we don't have to do anything fancy.
+ history_test_->in_mem_backend_.reset(backend);
+}
+
+void BackendDelegate::BroadcastNotifications(NotificationType type,
+ HistoryDetails* details) {
+ // Currently, just send the notifications directly to the in-memory database.
+ // We may want do do something more fancy in the future.
+ Details<HistoryDetails> det(details);
+ history_test_->in_mem_backend_->Observe(type,
+ Source<HistoryTest>(NULL), det);
+
+ // The backend passes ownership of the details pointer to us.
+ delete details;
+}
+
+TEST_F(HistoryTest, ClearBrowsingData_Downloads) {
+ CreateBackendAndDatabase();
+
+ Time now = Time::Now();
+ TimeDelta one_day = TimeDelta::FromDays(1);
+ Time month_ago = now - TimeDelta::FromDays(30);
+
+ // Initially there should be nothing in the downloads database.
+ std::vector<DownloadCreateInfo> downloads;
+ db_->QueryDownloads(&downloads);
+ EXPECT_EQ(0U, downloads.size());
+
+ // Keep track of these as we need to update them later during the test.
+ DownloadID in_progress, removing;
+
+ // Create one with a 0 time.
+ EXPECT_NE(0, AddDownload(DownloadItem::COMPLETE, Time()));
+ // Create one for now and +/- 1 day.
+ EXPECT_NE(0, AddDownload(DownloadItem::COMPLETE, now - one_day));
+ EXPECT_NE(0, AddDownload(DownloadItem::COMPLETE, now));
+ EXPECT_NE(0, AddDownload(DownloadItem::COMPLETE, now + one_day));
+ // Try the other three states.
+ EXPECT_NE(0, AddDownload(DownloadItem::COMPLETE, month_ago));
+ EXPECT_NE(0, in_progress = AddDownload(DownloadItem::IN_PROGRESS, month_ago));
+ EXPECT_NE(0, AddDownload(DownloadItem::CANCELLED, month_ago));
+ EXPECT_NE(0, removing = AddDownload(DownloadItem::REMOVING, month_ago));
+
+ // Test to see if inserts worked.
+ db_->QueryDownloads(&downloads);
+ EXPECT_EQ(8U, downloads.size());
+
+ // Try removing from current timestamp. This should delete the one in the
+ // future and one very recent one.
+ db_->RemoveDownloadsBetween(now, Time());
+ db_->QueryDownloads(&downloads);
+ EXPECT_EQ(6U, downloads.size());
+
+ // Try removing from two months ago. This should not delete items that are
+ // 'in progress' or in 'removing' state.
+ db_->RemoveDownloadsBetween(now - TimeDelta::FromDays(60), Time());
+ db_->QueryDownloads(&downloads);
+ EXPECT_EQ(3U, downloads.size());
+
+ // Download manager converts to TimeT, which is lossy, so we do the same
+ // for comparison.
+ Time month_ago_lossy = Time::FromTimeT(month_ago.ToTimeT());
+
+ // Make sure the right values remain.
+ EXPECT_EQ(DownloadItem::COMPLETE, downloads[0].state);
+ EXPECT_EQ(0, downloads[0].start_time.ToInternalValue());
+ EXPECT_EQ(DownloadItem::IN_PROGRESS, downloads[1].state);
+ EXPECT_EQ(month_ago_lossy.ToInternalValue(),
+ downloads[1].start_time.ToInternalValue());
+ EXPECT_EQ(DownloadItem::REMOVING, downloads[2].state);
+ EXPECT_EQ(month_ago_lossy.ToInternalValue(),
+ downloads[2].start_time.ToInternalValue());
+
+ // Change state so we can delete the downloads.
+ EXPECT_TRUE(db_->UpdateDownload(512, DownloadItem::COMPLETE, in_progress));
+ EXPECT_TRUE(db_->UpdateDownload(512, DownloadItem::CANCELLED, removing));
+
+ // Try removing from Time=0. This should delete all.
+ db_->RemoveDownloadsBetween(Time(), Time());
+ db_->QueryDownloads(&downloads);
+ EXPECT_EQ(0U, downloads.size());
+
+ // Check removal of downloads stuck in IN_PROGRESS state.
+ EXPECT_NE(0, AddDownload(DownloadItem::COMPLETE, month_ago));
+ EXPECT_NE(0, AddDownload(DownloadItem::IN_PROGRESS, month_ago));
+ db_->QueryDownloads(&downloads);
+ EXPECT_EQ(2U, downloads.size());
+ db_->RemoveDownloadsBetween(Time(), Time());
+ db_->QueryDownloads(&downloads);
+ // IN_PROGRESS download should remain. It it indicated as "Canceled"
+ EXPECT_EQ(1U, downloads.size());
+ db_->CleanUpInProgressEntries();
+ db_->QueryDownloads(&downloads);
+ EXPECT_EQ(1U, downloads.size());
+ db_->RemoveDownloadsBetween(Time(), Time());
+ db_->QueryDownloads(&downloads);
+ EXPECT_EQ(0U, downloads.size());
+}
+
+TEST_F(HistoryTest, AddPage) {
+ scoped_refptr<HistoryService> history(new HistoryService);
+ history_service_ = history;
+ ASSERT_TRUE(history->Init(history_dir_, NULL));
+
+ // Add the page once from a child frame.
+ const GURL test_url("http://www.google.com/");
+ history->AddPage(test_url, NULL, 0, GURL(),
+ PageTransition::MANUAL_SUBFRAME,
+ history::RedirectList(), false);
+ EXPECT_TRUE(QueryURL(history, test_url));
+ EXPECT_EQ(1, query_url_row_.visit_count());
+ EXPECT_EQ(0, query_url_row_.typed_count());
+ EXPECT_TRUE(query_url_row_.hidden()); // Hidden because of child frame.
+
+ // Add the page once from the main frame (should unhide it).
+ history->AddPage(test_url, NULL, 0, GURL(), PageTransition::LINK,
+ history::RedirectList(), false);
+ EXPECT_TRUE(QueryURL(history, test_url));
+ EXPECT_EQ(2, query_url_row_.visit_count()); // Added twice.
+ EXPECT_EQ(0, query_url_row_.typed_count()); // Never typed.
+ EXPECT_FALSE(query_url_row_.hidden()); // Because loaded in main frame.
+}
+
+TEST_F(HistoryTest, AddPageSameTimes) {
+ scoped_refptr<HistoryService> history(new HistoryService);
+ history_service_ = history;
+ ASSERT_TRUE(history->Init(history_dir_, NULL));
+
+ Time now = Time::Now();
+ const GURL test_urls[] = {
+ GURL("http://timer.first.page/"),
+ GURL("http://timer.second.page/"),
+ GURL("http://timer.third.page/"),
+ };
+
+ // Make sure that two pages added at the same time with no intervening
+ // additions have different timestamps.
+ history->AddPage(test_urls[0], now, NULL, 0, GURL(),
+ PageTransition::LINK,
+ history::RedirectList(), false);
+ EXPECT_TRUE(QueryURL(history, test_urls[0]));
+ EXPECT_EQ(1, query_url_row_.visit_count());
+ EXPECT_TRUE(now == query_url_row_.last_visit()); // gtest doesn't like Time
+
+ history->AddPage(test_urls[1], now, NULL, 0, GURL(),
+ PageTransition::LINK,
+ history::RedirectList(), false);
+ EXPECT_TRUE(QueryURL(history, test_urls[1]));
+ EXPECT_EQ(1, query_url_row_.visit_count());
+ EXPECT_TRUE(now + TimeDelta::FromMicroseconds(1) ==
+ query_url_row_.last_visit());
+
+ // Make sure the next page, at a different time, is also correct.
+ history->AddPage(test_urls[2], now + TimeDelta::FromMinutes(1),
+ NULL, 0, GURL(),
+ PageTransition::LINK,
+ history::RedirectList(), false);
+ EXPECT_TRUE(QueryURL(history, test_urls[2]));
+ EXPECT_EQ(1, query_url_row_.visit_count());
+ EXPECT_TRUE(now + TimeDelta::FromMinutes(1) ==
+ query_url_row_.last_visit());
+}
+
+TEST_F(HistoryTest, AddRedirect) {
+ scoped_refptr<HistoryService> history(new HistoryService);
+ history_service_ = history;
+ ASSERT_TRUE(history->Init(history_dir_, NULL));
+
+ const char* first_sequence[] = {
+ "http://first.page/",
+ "http://second.page/"};
+ int first_count = arraysize(first_sequence);
+ history::RedirectList first_redirects;
+ for (int i = 0; i < first_count; i++)
+ first_redirects.push_back(GURL(first_sequence[i]));
+
+ // Add the sequence of pages as a server with no referrer. Note that we need
+ // to have a non-NULL page ID scope.
+ history->AddPage(first_redirects.back(), MakeFakeHost(1), 0, GURL(),
+ PageTransition::LINK, first_redirects, true);
+
+ // The first page should be added once with a link visit type (because we set
+ // LINK when we added the original URL, and a referrer of nowhere (0).
+ EXPECT_TRUE(QueryURL(history, first_redirects[0]));
+ EXPECT_EQ(1, query_url_row_.visit_count());
+ ASSERT_EQ(1U, query_url_visits_.size());
+ int64 first_visit = query_url_visits_[0].visit_id;
+ EXPECT_EQ(PageTransition::LINK |
+ PageTransition::CHAIN_START, query_url_visits_[0].transition);
+ EXPECT_EQ(0, query_url_visits_[0].referring_visit); // No referrer.
+
+ // The second page should be a server redirect type with a referrer of the
+ // first page.
+ EXPECT_TRUE(QueryURL(history, first_redirects[1]));
+ EXPECT_EQ(1, query_url_row_.visit_count());
+ ASSERT_EQ(1U, query_url_visits_.size());
+ int64 second_visit = query_url_visits_[0].visit_id;
+ EXPECT_EQ(PageTransition::SERVER_REDIRECT |
+ PageTransition::CHAIN_END, query_url_visits_[0].transition);
+ EXPECT_EQ(first_visit, query_url_visits_[0].referring_visit);
+
+ // Check that the redirect finding function successfully reports it.
+ saved_redirects_.clear();
+ QueryRedirectsFrom(history, first_redirects[0]);
+ ASSERT_EQ(1U, saved_redirects_.size());
+ EXPECT_EQ(first_redirects[1], saved_redirects_[0]);
+
+ // Now add a client redirect from that second visit to a third, client
+ // redirects are tracked by the RenderView prior to updating history,
+ // so we pass in a CLIENT_REDIRECT qualifier to mock that behavior.
+ history::RedirectList second_redirects;
+ second_redirects.push_back(first_redirects[1]);
+ second_redirects.push_back(GURL("http://last.page/"));
+ history->AddPage(second_redirects[1], MakeFakeHost(1), 1,
+ second_redirects[0],
+ static_cast<PageTransition::Type>(PageTransition::LINK |
+ PageTransition::CLIENT_REDIRECT),
+ second_redirects, true);
+
+ // The last page (source of the client redirect) should NOT have an
+ // additional visit added, because it was a client redirect (normally it
+ // would). We should only have 1 left over from the first sequence.
+ EXPECT_TRUE(QueryURL(history, second_redirects[0]));
+ EXPECT_EQ(1, query_url_row_.visit_count());
+
+ // The final page should be set as a client redirect from the previous visit.
+ EXPECT_TRUE(QueryURL(history, second_redirects[1]));
+ EXPECT_EQ(1, query_url_row_.visit_count());
+ ASSERT_EQ(1U, query_url_visits_.size());
+ EXPECT_EQ(PageTransition::CLIENT_REDIRECT |
+ PageTransition::CHAIN_END, query_url_visits_[0].transition);
+ EXPECT_EQ(second_visit, query_url_visits_[0].referring_visit);
+}
+
+TEST_F(HistoryTest, Typed) {
+ scoped_refptr<HistoryService> history(new HistoryService);
+ history_service_ = history;
+ ASSERT_TRUE(history->Init(history_dir_, NULL));
+
+ // Add the page once as typed.
+ const GURL test_url("http://www.google.com/");
+ history->AddPage(test_url, NULL, 0, GURL(), PageTransition::TYPED,
+ history::RedirectList(), false);
+ EXPECT_TRUE(QueryURL(history, test_url));
+
+ // We should have the same typed & visit count.
+ EXPECT_EQ(1, query_url_row_.visit_count());
+ EXPECT_EQ(1, query_url_row_.typed_count());
+
+ // Add the page again not typed.
+ history->AddPage(test_url, NULL, 0, GURL(), PageTransition::LINK,
+ history::RedirectList(), false);
+ EXPECT_TRUE(QueryURL(history, test_url));
+
+ // The second time should not have updated the typed count.
+ EXPECT_EQ(2, query_url_row_.visit_count());
+ EXPECT_EQ(1, query_url_row_.typed_count());
+
+ // Add the page again as a generated URL.
+ history->AddPage(test_url, NULL, 0, GURL(),
+ PageTransition::GENERATED, history::RedirectList(),
+ false);
+ EXPECT_TRUE(QueryURL(history, test_url));
+
+ // This should have worked like a link click.
+ EXPECT_EQ(3, query_url_row_.visit_count());
+ EXPECT_EQ(1, query_url_row_.typed_count());
+
+ // Add the page again as a reload.
+ history->AddPage(test_url, NULL, 0, GURL(),
+ PageTransition::RELOAD, history::RedirectList(),
+ false);
+ EXPECT_TRUE(QueryURL(history, test_url));
+
+ // This should not have incremented any visit counts.
+ EXPECT_EQ(3, query_url_row_.visit_count());
+ EXPECT_EQ(1, query_url_row_.typed_count());
+}
+
+TEST_F(HistoryTest, SetTitle) {
+ scoped_refptr<HistoryService> history(new HistoryService);
+ history_service_ = history;
+ ASSERT_TRUE(history->Init(history_dir_, NULL));
+
+ // Add a URL.
+ const GURL existing_url("http://www.google.com/");
+ history->AddPage(existing_url);
+
+ // Set some title.
+ const string16 existing_title = UTF8ToUTF16("Google");
+ history->SetPageTitle(existing_url, existing_title);
+
+ // Make sure the title got set.
+ EXPECT_TRUE(QueryURL(history, existing_url));
+ EXPECT_EQ(existing_title, query_url_row_.title());
+
+ // set a title on a nonexistent page
+ const GURL nonexistent_url("http://news.google.com/");
+ const string16 nonexistent_title = UTF8ToUTF16("Google News");
+ history->SetPageTitle(nonexistent_url, nonexistent_title);
+
+ // Make sure nothing got written.
+ EXPECT_FALSE(QueryURL(history, nonexistent_url));
+ EXPECT_EQ(string16(), query_url_row_.title());
+
+ // TODO(brettw) this should also test redirects, which get the title of the
+ // destination page.
+}
+
+TEST_F(HistoryTest, Segments) {
+ scoped_refptr<HistoryService> history(new HistoryService);
+ history_service_ = history;
+
+ ASSERT_TRUE(history->Init(history_dir_, NULL));
+
+ static const void* scope = static_cast<void*>(this);
+
+ // Add a URL.
+ const GURL existing_url("http://www.google.com/");
+ history->AddPage(existing_url, scope, 0, GURL(),
+ PageTransition::TYPED, history::RedirectList(),
+ false);
+
+ // Make sure a segment was created.
+ history->QuerySegmentUsageSince(
+ &consumer_, Time::Now() - TimeDelta::FromDays(1), 10,
+ NewCallback(static_cast<HistoryTest*>(this),
+ &HistoryTest::OnSegmentUsageAvailable));
+
+ // Wait for processing.
+ MessageLoop::current()->Run();
+
+ ASSERT_EQ(1U, page_usage_data_->size());
+ EXPECT_TRUE(page_usage_data_[0]->GetURL() == existing_url);
+ EXPECT_DOUBLE_EQ(3.0, page_usage_data_[0]->GetScore());
+
+ // Add a URL which doesn't create a segment.
+ const GURL link_url("http://yahoo.com/");
+ history->AddPage(link_url, scope, 0, GURL(),
+ PageTransition::LINK, history::RedirectList(),
+ false);
+
+ // Query again
+ history->QuerySegmentUsageSince(
+ &consumer_, Time::Now() - TimeDelta::FromDays(1), 10,
+ NewCallback(static_cast<HistoryTest*>(this),
+ &HistoryTest::OnSegmentUsageAvailable));
+
+ // Wait for processing.
+ MessageLoop::current()->Run();
+
+ // Make sure we still have one segment.
+ ASSERT_EQ(1U, page_usage_data_->size());
+ EXPECT_TRUE(page_usage_data_[0]->GetURL() == existing_url);
+
+ // Add a page linked from existing_url.
+ history->AddPage(GURL("http://www.google.com/foo"), scope, 3, existing_url,
+ PageTransition::LINK, history::RedirectList(),
+ false);
+
+ // Query again
+ history->QuerySegmentUsageSince(
+ &consumer_, Time::Now() - TimeDelta::FromDays(1), 10,
+ NewCallback(static_cast<HistoryTest*>(this),
+ &HistoryTest::OnSegmentUsageAvailable));
+
+ // Wait for processing.
+ MessageLoop::current()->Run();
+
+ // Make sure we still have one segment.
+ ASSERT_EQ(1U, page_usage_data_->size());
+ EXPECT_TRUE(page_usage_data_[0]->GetURL() == existing_url);
+
+ // However, the score should have increased.
+ EXPECT_GT(page_usage_data_[0]->GetScore(), 5.0);
+}
+
+// This just tests history system -> thumbnail database integration, the actual
+// thumbnail tests are in its own file.
+TEST_F(HistoryTest, Thumbnails) {
+ scoped_refptr<HistoryService> history(new HistoryService);
+ history_service_ = history;
+ ASSERT_TRUE(history->Init(history_dir_, NULL));
+
+ scoped_ptr<SkBitmap> thumbnail(
+ gfx::JPEGCodec::Decode(kGoogleThumbnail, sizeof(kGoogleThumbnail)));
+ static const double boringness = 0.25;
+
+ const GURL url("http://www.google.com/thumbnail_test/");
+ history->AddPage(url); // Must be visited before adding a thumbnail.
+ history->SetPageThumbnail(url, *thumbnail,
+ ThumbnailScore(boringness, true, true));
+
+ // Make sure we get the correct thumbnail data.
+ EXPECT_TRUE(history->GetPageThumbnail(url, &consumer_,
+ NewCallback(static_cast<HistoryTest*>(this),
+ &HistoryTest::OnThumbnailDataAvailable)));
+ thumbnail_data_.clear();
+ MessageLoop::current()->Run();
+ // Make sure we got a valid JPEG back. This isn't equivalent to
+ // being correct, but when we're roundtripping through JPEG
+ // compression and we don't have a similarity measure.
+ EXPECT_TRUE(thumbnail_data_.size());
+ scoped_ptr<SkBitmap> decoded_thumbnail(
+ gfx::JPEGCodec::Decode(&thumbnail_data_[0], thumbnail_data_.size()));
+ EXPECT_TRUE(decoded_thumbnail.get());
+
+ // Request a nonexistent thumbnail and make sure we get
+ // a callback and no data.
+ EXPECT_TRUE(history->GetPageThumbnail(GURL("http://asdfasdf.com/"),
+ &consumer_,
+ NewCallback(static_cast<HistoryTest*>(this),
+ &HistoryTest::OnThumbnailDataAvailable)));
+ thumbnail_data_.clear();
+ MessageLoop::current()->Run();
+ EXPECT_EQ(0U, thumbnail_data_.size());
+
+ // Request the thumbnail and cancel the request..
+ got_thumbnail_callback_ = false;
+ thumbnail_data_.clear();
+ HistoryService::Handle handle = history->GetPageThumbnail(url, &consumer_,
+ NewCallback(static_cast<HistoryTest*>(this),
+ &HistoryTest::OnThumbnailDataAvailable));
+ EXPECT_TRUE(handle);
+
+ history->CancelRequest(handle);
+
+ // We create a task with a timeout so we can make sure we don't get and
+ // data in that time.
+ class QuitMessageLoop : public Task {
+ public:
+ virtual void Run() {
+ MessageLoop::current()->Quit();
+ }
+ };
+ MessageLoop::current()->PostDelayedTask(FROM_HERE, new QuitMessageLoop, 2000);
+ MessageLoop::current()->Run();
+ EXPECT_FALSE(got_thumbnail_callback_);
+}
+
+TEST_F(HistoryTest, MostVisitedURLs) {
+ scoped_refptr<HistoryService> history(new HistoryService);
+ history_service_ = history;
+ ASSERT_TRUE(history->Init(history_dir_, NULL));
+
+ const GURL url0("http://www.google.com/url0/");
+ const GURL url1("http://www.google.com/url1/");
+ const GURL url2("http://www.google.com/url2/");
+ const GURL url3("http://www.google.com/url3/");
+ const GURL url4("http://www.google.com/url4/");
+
+ static const void* scope = static_cast<void*>(this);
+
+ // Add two pages.
+ history->AddPage(url0, scope, 0, GURL(),
+ PageTransition::TYPED, history::RedirectList(),
+ false);
+ history->AddPage(url1, scope, 0, GURL(),
+ PageTransition::TYPED, history::RedirectList(),
+ false);
+ history->QueryMostVisitedURLs(20, 90, &consumer_,
+ NewCallback(static_cast<HistoryTest*>(this),
+ &HistoryTest::OnMostVisitedURLsAvailable));
+ MessageLoop::current()->Run();
+
+ EXPECT_EQ(2U, most_visited_urls_.size());
+ EXPECT_EQ(url0, most_visited_urls_[0].url);
+ EXPECT_EQ(url1, most_visited_urls_[1].url);
+
+ // Add another page.
+ history->AddPage(url2, scope, 0, GURL(),
+ PageTransition::TYPED, history::RedirectList(),
+ false);
+ history->QueryMostVisitedURLs(20, 90, &consumer_,
+ NewCallback(static_cast<HistoryTest*>(this),
+ &HistoryTest::OnMostVisitedURLsAvailable));
+ MessageLoop::current()->Run();
+
+ EXPECT_EQ(3U, most_visited_urls_.size());
+ EXPECT_EQ(url0, most_visited_urls_[0].url);
+ EXPECT_EQ(url1, most_visited_urls_[1].url);
+ EXPECT_EQ(url2, most_visited_urls_[2].url);
+
+ // Revisit url2, making it the top URL.
+ history->AddPage(url2, scope, 0, GURL(),
+ PageTransition::TYPED, history::RedirectList(),
+ false);
+ history->QueryMostVisitedURLs(20, 90, &consumer_,
+ NewCallback(static_cast<HistoryTest*>(this),
+ &HistoryTest::OnMostVisitedURLsAvailable));
+ MessageLoop::current()->Run();
+
+ EXPECT_EQ(3U, most_visited_urls_.size());
+ EXPECT_EQ(url2, most_visited_urls_[0].url);
+ EXPECT_EQ(url0, most_visited_urls_[1].url);
+ EXPECT_EQ(url1, most_visited_urls_[2].url);
+
+ // Revisit url1, making it the top URL.
+ history->AddPage(url1, scope, 0, GURL(),
+ PageTransition::TYPED, history::RedirectList(),
+ false);
+ history->QueryMostVisitedURLs(20, 90, &consumer_,
+ NewCallback(static_cast<HistoryTest*>(this),
+ &HistoryTest::OnMostVisitedURLsAvailable));
+ MessageLoop::current()->Run();
+
+ EXPECT_EQ(3U, most_visited_urls_.size());
+ EXPECT_EQ(url1, most_visited_urls_[0].url);
+ EXPECT_EQ(url2, most_visited_urls_[1].url);
+ EXPECT_EQ(url0, most_visited_urls_[2].url);
+
+ // Redirects
+ history::RedirectList redirects;
+ redirects.push_back(url3);
+ redirects.push_back(url4);
+
+ // Visit url4 using redirects.
+ history->AddPage(url4, scope, 0, GURL(),
+ PageTransition::TYPED, redirects,
+ false);
+ history->QueryMostVisitedURLs(20, 90, &consumer_,
+ NewCallback(static_cast<HistoryTest*>(this),
+ &HistoryTest::OnMostVisitedURLsAvailable));
+ MessageLoop::current()->Run();
+
+ EXPECT_EQ(4U, most_visited_urls_.size());
+ EXPECT_EQ(url1, most_visited_urls_[0].url);
+ EXPECT_EQ(url2, most_visited_urls_[1].url);
+ EXPECT_EQ(url0, most_visited_urls_[2].url);
+ EXPECT_EQ(url3, most_visited_urls_[3].url);
+ EXPECT_EQ(2U, most_visited_urls_[3].redirects.size());
+}
+
+// The version of the history database should be current in the "typical
+// history" example file or it will be imported on startup, throwing off timing
+// measurements.
+//
+// See test/data/profiles/typical_history/README.txt for instructions on
+// how to up the version.
+TEST(HistoryProfileTest, TypicalProfileVersion) {
+ FilePath file;
+ ASSERT_TRUE(PathService::Get(chrome::DIR_TEST_DATA, &file));
+ file = file.AppendASCII("profiles");
+ file = file.AppendASCII("typical_history");
+ file = file.AppendASCII("Default");
+ file = file.AppendASCII("History");
+
+ int cur_version = HistoryDatabase::GetCurrentVersion();
+
+ sql::Connection db;
+ ASSERT_TRUE(db.Open(file));
+
+ {
+ sql::Statement s(db.GetUniqueStatement(
+ "SELECT value FROM meta WHERE key = 'version'"));
+ EXPECT_TRUE(s.Step());
+ int file_version = s.ColumnInt(0);
+ EXPECT_EQ(cur_version, file_version);
+ }
+}
+
+namespace {
+
+// Use this dummy value to scope the page IDs we give history.
+static const void* kAddArgsScope = reinterpret_cast<void*>(0x12345678);
+
+// Creates a new HistoryAddPageArgs object for sending to the history database
+// with reasonable defaults and the given NULL-terminated URL string. The
+// returned object will NOT be add-ref'ed, which is the responsibility of the
+// caller.
+HistoryAddPageArgs* MakeAddArgs(const GURL& url) {
+ return new HistoryAddPageArgs(url,
+ Time::Now(),
+ kAddArgsScope,
+ 0,
+ GURL(),
+ history::RedirectList(),
+ PageTransition::TYPED, false);
+}
+
+// Convenience version of the above to convert a char string.
+HistoryAddPageArgs* MakeAddArgs(const char* url) {
+ return MakeAddArgs(GURL(url));
+}
+
+// A HistoryDBTask implementation. Each time RunOnDBThread is invoked
+// invoke_count is increment. When invoked kWantInvokeCount times, true is
+// returned from RunOnDBThread which should stop RunOnDBThread from being
+// invoked again. When DoneRunOnMainThread is invoked, done_invoked is set to
+// true.
+class HistoryDBTaskImpl : public HistoryDBTask {
+ public:
+ static const int kWantInvokeCount;
+
+ HistoryDBTaskImpl() : invoke_count(0), done_invoked(false) {}
+
+ virtual bool RunOnDBThread(HistoryBackend* backend, HistoryDatabase* db) {
+ return (++invoke_count == kWantInvokeCount);
+ }
+
+ virtual void DoneRunOnMainThread() {
+ done_invoked = true;
+ MessageLoop::current()->Quit();
+ }
+
+ int invoke_count;
+ bool done_invoked;
+
+ private:
+ virtual ~HistoryDBTaskImpl() {}
+
+ DISALLOW_COPY_AND_ASSIGN(HistoryDBTaskImpl);
+};
+
+// static
+const int HistoryDBTaskImpl::kWantInvokeCount = 2;
+
+} // namespace
+
+TEST_F(HistoryTest, HistoryDBTask) {
+ CancelableRequestConsumerT<int, 0> request_consumer;
+ HistoryService* history = new HistoryService();
+ ASSERT_TRUE(history->Init(history_dir_, NULL));
+ scoped_refptr<HistoryDBTaskImpl> task(new HistoryDBTaskImpl());
+ history_service_ = history;
+ history->ScheduleDBTask(task.get(), &request_consumer);
+ // Run the message loop. When HistoryDBTaskImpl::DoneRunOnMainThread runs,
+ // it will stop the message loop. If the test hangs here, it means
+ // DoneRunOnMainThread isn't being invoked correctly.
+ MessageLoop::current()->Run();
+ CleanupHistoryService();
+ // WARNING: history has now been deleted.
+ history = NULL;
+ ASSERT_EQ(HistoryDBTaskImpl::kWantInvokeCount, task->invoke_count);
+ ASSERT_TRUE(task->done_invoked);
+}
+
+TEST_F(HistoryTest, HistoryDBTaskCanceled) {
+ CancelableRequestConsumerT<int, 0> request_consumer;
+ HistoryService* history = new HistoryService();
+ ASSERT_TRUE(history->Init(history_dir_, NULL));
+ scoped_refptr<HistoryDBTaskImpl> task(new HistoryDBTaskImpl());
+ history_service_ = history;
+ history->ScheduleDBTask(task.get(), &request_consumer);
+ request_consumer.CancelAllRequests();
+ CleanupHistoryService();
+ // WARNING: history has now been deleted.
+ history = NULL;
+ ASSERT_FALSE(task->done_invoked);
+}
+
+} // namespace history
diff --git a/chrome/browser/history/in_memory_database.cc b/chrome/browser/history/in_memory_database.cc
new file mode 100644
index 0000000..a6a9a6f
--- /dev/null
+++ b/chrome/browser/history/in_memory_database.cc
@@ -0,0 +1,107 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/in_memory_database.h"
+
+#include "base/file_path.h"
+#include "base/histogram.h"
+#include "base/logging.h"
+#include "base/time.h"
+#include "base/utf_string_conversions.h"
+#include "build/build_config.h"
+
+namespace history {
+
+InMemoryDatabase::InMemoryDatabase() : URLDatabase() {
+}
+
+InMemoryDatabase::~InMemoryDatabase() {
+}
+
+bool InMemoryDatabase::InitDB() {
+ // Set the database page size to 4K for better performance.
+ db_.set_page_size(4096);
+
+ if (!db_.OpenInMemory()) {
+ NOTREACHED() << "Cannot open databse " << GetDB().GetErrorMessage();
+ return false;
+ }
+
+ // No reason to leave data behind in memory when rows are removed.
+ db_.Execute("PRAGMA auto_vacuum=1");
+
+ // Ensure this is really an in-memory-only cache.
+ db_.Execute("PRAGMA temp_store=MEMORY");
+
+ // Create the URL table, but leave it empty for now.
+ if (!CreateURLTable(false)) {
+ NOTREACHED() << "Unable to create table";
+ db_.Close();
+ return false;
+ }
+
+ return true;
+}
+
+bool InMemoryDatabase::InitFromScratch() {
+ if (!InitDB())
+ return false;
+
+ // InitDB doesn't create the index so in the disk-loading case, it can be
+ // added afterwards.
+ CreateMainURLIndex();
+ return true;
+}
+
+bool InMemoryDatabase::InitFromDisk(const FilePath& history_name) {
+ if (!InitDB())
+ return false;
+
+ // Attach to the history database on disk. (We can't ATTACH in the middle of
+ // a transaction.)
+ sql::Statement attach(GetDB().GetUniqueStatement("ATTACH ? AS history"));
+ if (!attach) {
+ NOTREACHED() << "Unable to attach to history database.";
+ return false;
+ }
+#if defined(OS_POSIX)
+ attach.BindString(0, history_name.value());
+#else
+ attach.BindString(0, WideToUTF8(history_name.value()));
+#endif
+ if (!attach.Run()) {
+ NOTREACHED() << GetDB().GetErrorMessage();
+ return false;
+ }
+
+ // Copy URL data to memory.
+ base::TimeTicks begin_load = base::TimeTicks::Now();
+ if (!db_.Execute(
+ "INSERT INTO urls SELECT * FROM history.urls WHERE typed_count > 0")) {
+ // Unable to get data from the history database. This is OK, the file may
+ // just not exist yet.
+ }
+ base::TimeTicks end_load = base::TimeTicks::Now();
+ UMA_HISTOGRAM_MEDIUM_TIMES("History.InMemoryDBPopulate",
+ end_load - begin_load);
+ UMA_HISTOGRAM_COUNTS("History.InMemoryDBItemCount", db_.GetLastChangeCount());
+
+ // Detach from the history database on disk.
+ if (!db_.Execute("DETACH history")) {
+ NOTREACHED() << "Unable to detach from history database.";
+ return false;
+ }
+
+ // Index the table, this is faster than creating the index first and then
+ // inserting into it.
+ CreateMainURLIndex();
+
+ return true;
+}
+
+sql::Connection& InMemoryDatabase::GetDB() {
+ return db_;
+}
+
+} // namespace history
diff --git a/chrome/browser/history/in_memory_database.h b/chrome/browser/history/in_memory_database.h
new file mode 100644
index 0000000..62460dd
--- /dev/null
+++ b/chrome/browser/history/in_memory_database.h
@@ -0,0 +1,51 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_DATABASE_H_
+#define CHROME_BROWSER_HISTORY_IN_MEMORY_DATABASE_H_
+
+#include <string>
+
+#include "app/sql/connection.h"
+#include "base/basictypes.h"
+#include "chrome/browser/history/url_database.h"
+
+class FilePath;
+
+namespace history {
+
+// Class used for a fast in-memory cache of typed URLs. Used for inline
+// autocomplete since it is fast enough to be called synchronously as the user
+// is typing.
+class InMemoryDatabase : public URLDatabase {
+ public:
+ InMemoryDatabase();
+ virtual ~InMemoryDatabase();
+
+ // Creates an empty in-memory database.
+ bool InitFromScratch();
+
+ // Initializes the database by directly slurping the data from the given
+ // file. Conceptually, the InMemoryHistoryBackend should do the populating
+ // after this object does some common initialization, but that would be
+ // much slower.
+ bool InitFromDisk(const FilePath& history_name);
+
+ protected:
+ // Implemented for URLDatabase.
+ virtual sql::Connection& GetDB();
+
+ private:
+ // Initializes the database connection, this is the shared code between
+ // InitFromScratch() and InitFromDisk() above. Returns true on success.
+ bool InitDB();
+
+ sql::Connection db_;
+
+ DISALLOW_COPY_AND_ASSIGN(InMemoryDatabase);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_DATABASE_H_
diff --git a/chrome/browser/history/in_memory_history_backend.cc b/chrome/browser/history/in_memory_history_backend.cc
new file mode 100644
index 0000000..9f3e7be
--- /dev/null
+++ b/chrome/browser/history/in_memory_history_backend.cc
@@ -0,0 +1,135 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/in_memory_history_backend.h"
+
+#include "base/command_line.h"
+#include "chrome/browser/browser_process.h"
+#include "chrome/browser/history/history_notifications.h"
+#include "chrome/browser/history/in_memory_database.h"
+#include "chrome/browser/history/in_memory_url_index.h"
+#include "chrome/browser/profile.h"
+#include "chrome/common/chrome_switches.h"
+#include "chrome/common/notification_service.h"
+
+namespace history {
+
+// If a page becomes starred we use this id in place of the real starred id.
+// See note in OnURLsStarred.
+static const StarID kBogusStarredID = 0x0FFFFFFF;
+
+InMemoryHistoryBackend::InMemoryHistoryBackend()
+ : profile_(NULL) {
+}
+
+InMemoryHistoryBackend::~InMemoryHistoryBackend() {
+}
+
+bool InMemoryHistoryBackend::Init(const FilePath& history_filename) {
+ db_.reset(new InMemoryDatabase);
+ bool success = db_->InitFromDisk(history_filename);
+
+ if (CommandLine::ForCurrentProcess()->HasSwitch(
+ switches::kEnableInMemoryURLIndex)) {
+ index_.reset(new InMemoryURLIndex);
+ // TODO(rohitrao): Load index.
+ }
+
+ return success;
+}
+
+void InMemoryHistoryBackend::AttachToHistoryService(Profile* profile) {
+ if (!db_.get()) {
+ NOTREACHED();
+ return;
+ }
+
+ profile_ = profile;
+
+ // TODO(evanm): this is currently necessitated by generate_profile, which
+ // runs without a browser process. generate_profile should really create
+ // a browser process, at which point this check can then be nuked.
+ if (!g_browser_process)
+ return;
+
+ // Register for the notifications we care about.
+ // We only want notifications for the associated profile.
+ Source<Profile> source(profile_);
+ registrar_.Add(this, NotificationType::HISTORY_URL_VISITED, source);
+ registrar_.Add(this, NotificationType::HISTORY_TYPED_URLS_MODIFIED, source);
+ registrar_.Add(this, NotificationType::HISTORY_URLS_DELETED, source);
+}
+
+void InMemoryHistoryBackend::Observe(NotificationType type,
+ const NotificationSource& source,
+ const NotificationDetails& details) {
+ switch (type.value) {
+ case NotificationType::HISTORY_URL_VISITED: {
+ Details<history::URLVisitedDetails> visited_details(details);
+ if (visited_details->row.typed_count() > 0) {
+ URLsModifiedDetails modified_details;
+ modified_details.changed_urls.push_back(visited_details->row);
+ OnTypedURLsModified(modified_details);
+ }
+ break;
+ }
+ case NotificationType::HISTORY_TYPED_URLS_MODIFIED:
+ OnTypedURLsModified(
+ *Details<history::URLsModifiedDetails>(details).ptr());
+ break;
+ case NotificationType::HISTORY_URLS_DELETED:
+ OnURLsDeleted(*Details<history::URLsDeletedDetails>(details).ptr());
+ break;
+ default:
+ // For simplicity, the unit tests send us all notifications, even when
+ // we haven't registered for them, so don't assert here.
+ break;
+ }
+}
+
+void InMemoryHistoryBackend::OnTypedURLsModified(
+ const URLsModifiedDetails& details) {
+ DCHECK(db_.get());
+
+ // Add or update the URLs.
+ //
+ // TODO(brettw) currently the rows in the in-memory database don't match the
+ // IDs in the main database. This sucks. Instead of Add and Remove, we should
+ // have Sync(), which would take the ID if it's given and add it.
+ std::vector<history::URLRow>::const_iterator i;
+ for (i = details.changed_urls.begin();
+ i != details.changed_urls.end(); i++) {
+ URLID id = db_->GetRowForURL(i->url(), NULL);
+ if (id)
+ db_->UpdateURLRow(id, *i);
+ else
+ db_->AddURL(*i);
+ }
+}
+
+void InMemoryHistoryBackend::OnURLsDeleted(const URLsDeletedDetails& details) {
+ DCHECK(db_.get());
+
+ if (details.all_history) {
+ // When all history is deleted, the individual URLs won't be listed. Just
+ // create a new database to quickly clear everything out.
+ db_.reset(new InMemoryDatabase);
+ if (!db_->InitFromScratch())
+ db_.reset();
+ return;
+ }
+
+ // Delete all matching URLs in our database.
+ for (std::set<GURL>::const_iterator i = details.urls.begin();
+ i != details.urls.end(); ++i) {
+ URLID id = db_->GetRowForURL(*i, NULL);
+ if (id) {
+ // We typically won't have most of them since we only have a subset of
+ // history, so ignore errors.
+ db_->DeleteURLRow(id);
+ }
+ }
+}
+
+} // namespace history
diff --git a/chrome/browser/history/in_memory_history_backend.h b/chrome/browser/history/in_memory_history_backend.h
new file mode 100644
index 0000000..30026b5
--- /dev/null
+++ b/chrome/browser/history/in_memory_history_backend.h
@@ -0,0 +1,89 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Contains the history backend wrapper around the in-memory URL database. This
+// object maintains an in-memory cache of the subset of history required to do
+// in-line autocomplete.
+//
+// It is created on the history thread and passed to the main thread where
+// operations can be completed synchronously. It listenes for notifications
+// from the "regular" history backend and keeps itself in sync.
+
+#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_HISTORY_BACKEND_H_
+#define CHROME_BROWSER_HISTORY_IN_MEMORY_HISTORY_BACKEND_H_
+
+#include <string>
+
+#include "base/basictypes.h"
+#include "base/gtest_prod_util.h"
+#include "base/scoped_ptr.h"
+#include "chrome/common/notification_registrar.h"
+
+class FilePath;
+class HistoryDatabase;
+class Profile;
+
+namespace history {
+
+class InMemoryDatabase;
+class InMemoryURLIndex;
+struct URLsDeletedDetails;
+struct URLsModifiedDetails;
+
+class InMemoryHistoryBackend : public NotificationObserver {
+ public:
+ InMemoryHistoryBackend();
+ ~InMemoryHistoryBackend();
+
+ // Initializes with data from the given history database.
+ bool Init(const FilePath& history_filename);
+
+ // Does initialization work when this object is attached to the history
+ // system on the main thread. The argument is the profile with which the
+ // attached history service is under.
+ void AttachToHistoryService(Profile* profile);
+
+ // Returns the underlying database associated with this backend. The current
+ // autocomplete code was written fro this, but it should probably be removed
+ // so that it can deal directly with this object, rather than the DB.
+ InMemoryDatabase* db() const {
+ return db_.get();
+ }
+
+ // Returns the in memory index owned by this backend. This index is only
+ // loaded when the --enable-in-memory-url-index flag is used.
+ InMemoryURLIndex* index() const {
+ return index_.get();
+ }
+
+ // Notification callback.
+ virtual void Observe(NotificationType type,
+ const NotificationSource& source,
+ const NotificationDetails& details);
+
+ private:
+ FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, DeleteAll);
+
+ // Handler for NOTIFY_HISTORY_TYPED_URLS_MODIFIED.
+ void OnTypedURLsModified(const URLsModifiedDetails& details);
+
+ // Handler for NOTIFY_HISTORY_URLS_DELETED.
+ void OnURLsDeleted(const URLsDeletedDetails& details);
+
+ NotificationRegistrar registrar_;
+
+ scoped_ptr<InMemoryDatabase> db_;
+
+ scoped_ptr<InMemoryURLIndex> index_;
+
+ // The profile that this object is attached. May be NULL before
+ // initialization.
+ Profile* profile_;
+
+ DISALLOW_COPY_AND_ASSIGN(InMemoryHistoryBackend);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_HISTORY_BACKEND_H_
diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc
new file mode 100644
index 0000000..83c401f
--- /dev/null
+++ b/chrome/browser/history/in_memory_url_index.cc
@@ -0,0 +1,13 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/in_memory_url_index.h"
+
+namespace history {
+
+InMemoryURLIndex::InMemoryURLIndex() {}
+
+InMemoryURLIndex::~InMemoryURLIndex() {}
+
+} // namespace history
diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h
new file mode 100644
index 0000000..7b57a4a
--- /dev/null
+++ b/chrome/browser/history/in_memory_url_index.h
@@ -0,0 +1,23 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
+#define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
+
+namespace history {
+
+// The URL history source.
+// Holds portions of the URL database in memory in an indexed form. Used to
+// quickly look up matching URLs for a given query string. Used by
+// the HistoryURLProvider for inline autocomplete and to provide URL
+// matches to the omnibox.
+class InMemoryURLIndex {
+ public:
+ InMemoryURLIndex();
+ ~InMemoryURLIndex();
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc
new file mode 100644
index 0000000..f5932ef
--- /dev/null
+++ b/chrome/browser/history/in_memory_url_index_unittest.cc
@@ -0,0 +1,22 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/in_memory_url_index.h"
+
+#include "base/scoped_ptr.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace history {
+
+class InMemoryURLIndexTest : public testing::Test {
+ protected:
+ scoped_ptr<InMemoryURLIndex> url_index_;
+};
+
+TEST_F(InMemoryURLIndexTest, Construction) {
+ url_index_.reset(new InMemoryURLIndex);
+ EXPECT_TRUE(url_index_.get());
+}
+
+} // namespace history
diff --git a/chrome/browser/history/multipart_uitest.cc b/chrome/browser/history/multipart_uitest.cc
new file mode 100644
index 0000000..a8fcf4c
--- /dev/null
+++ b/chrome/browser/history/multipart_uitest.cc
@@ -0,0 +1,61 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/test/ui/ui_test.h"
+
+#include "app/sql/connection.h"
+#include "app/sql/statement.h"
+#include "chrome/test/automation/tab_proxy.h"
+#include "chrome/test/automation/browser_proxy.h"
+#include "net/url_request/url_request_unittest.h"
+
+namespace {
+
+class MultipartResponseUITest : public UITest {
+};
+
+#if defined(NDEBUG)
+// http://code.google.com/p/chromium/issues/detail?id=37746
+// Running this test only for release builds as it fails in debug test
+// runs
+TEST_F(MultipartResponseUITest, SingleVisit) {
+ // Make sure that visiting a multipart/x-mixed-replace site only
+ // creates one entry in the visits table.
+ const wchar_t kDocRoot[] = L"chrome/test/data";
+ scoped_refptr<HTTPTestServer> server =
+ HTTPTestServer::CreateServer(kDocRoot, NULL);
+ ASSERT_TRUE(NULL != server.get());
+
+ scoped_refptr<BrowserProxy> browser_proxy(automation()->GetBrowserWindow(0));
+ ASSERT_TRUE(browser_proxy.get());
+ scoped_refptr<TabProxy> tab_proxy(browser_proxy->GetActiveTab());
+ ASSERT_TRUE(tab_proxy.get());
+ NavigateToURL(server->TestServerPage("multipart"));
+ std::wstring title;
+ EXPECT_TRUE(tab_proxy->GetTabTitle(&title));
+ EXPECT_EQ(L"page 9", title);
+ CloseBrowserAndServer();
+
+ // The browser has shutdown now. Check the contents of the history
+ // table. We should have only one visit for the URL even though it
+ // had 10 parts.
+ sql::Connection db;
+ FilePath history =
+ user_data_dir().AppendASCII("Default").AppendASCII("History");
+ ASSERT_TRUE(file_util::PathExists(history));
+ ASSERT_TRUE(db.Open(history));
+ std::string query(
+ "SELECT COUNT(1) FROM visits, urls WHERE visits.url = urls.id"
+ " AND urls.url LIKE 'http://localhost:%/multipart'");
+ {
+ sql::Statement statement(db.GetUniqueStatement(query.c_str()));
+ EXPECT_TRUE(statement);
+ EXPECT_TRUE(statement.Step());
+ EXPECT_EQ(1, statement.ColumnInt(0));
+ }
+ db.Close();
+}
+#endif
+
+} // namespace
diff --git a/chrome/browser/history/page_usage_data.cc b/chrome/browser/history/page_usage_data.cc
new file mode 100644
index 0000000..f202538
--- /dev/null
+++ b/chrome/browser/history/page_usage_data.cc
@@ -0,0 +1,35 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/page_usage_data.h"
+
+#include <algorithm>
+
+#include "third_party/skia/include/core/SkBitmap.h"
+
+PageUsageData::~PageUsageData() {
+ delete thumbnail_;
+ delete favicon_;
+}
+
+void PageUsageData::SetThumbnail(SkBitmap* img) {
+ if (thumbnail_ && thumbnail_ != img)
+ delete thumbnail_;
+
+ thumbnail_ = img;
+ thumbnail_set_ = true;
+}
+
+void PageUsageData::SetFavIcon(SkBitmap* img) {
+ if (favicon_ && favicon_ != img)
+ delete favicon_;
+ favicon_ = img;
+ favicon_set_ = true;
+}
+
+// static
+bool PageUsageData::Predicate(const PageUsageData* lhs,
+ const PageUsageData* rhs) {
+ return lhs->GetScore() > rhs->GetScore();
+}
diff --git a/chrome/browser/history/page_usage_data.h b/chrome/browser/history/page_usage_data.h
new file mode 100644
index 0000000..66a63e2
--- /dev/null
+++ b/chrome/browser/history/page_usage_data.h
@@ -0,0 +1,134 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_PAGE_USAGE_DATA_H__
+#define CHROME_BROWSER_HISTORY_PAGE_USAGE_DATA_H__
+
+#include "base/string16.h"
+#include "chrome/browser/history/history_types.h"
+#include "googleurl/src/gurl.h"
+
+class SkBitmap;
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// PageUsageData
+//
+// A per domain usage data structure to compute and manage most visited
+// pages.
+//
+// See History::QueryPageUsageSince()
+//
+/////////////////////////////////////////////////////////////////////////////
+class PageUsageData {
+ public:
+ explicit PageUsageData(history::URLID id)
+ : id_(id),
+ thumbnail_(NULL),
+ thumbnail_set_(false),
+ thumbnail_pending_(false),
+ favicon_(NULL),
+ favicon_set_(false),
+ favicon_pending_(false),
+ score_(0.0) {
+ }
+
+ virtual ~PageUsageData();
+
+ // Return the url ID
+ history::URLID GetID() const {
+ return id_;
+ }
+
+ void SetURL(const GURL& url) {
+ url_ = url;
+ }
+
+ const GURL& GetURL() const {
+ return url_;
+ }
+
+ void SetTitle(const string16& s) {
+ title_ = s;
+ }
+
+ const string16& GetTitle() const {
+ return title_;
+ }
+
+ void SetScore(double v) {
+ score_ = v;
+ }
+
+ double GetScore() const {
+ return score_;
+ }
+
+ void SetThumbnailMissing() {
+ thumbnail_set_ = true;
+ }
+
+ void SetThumbnail(SkBitmap* img);
+
+ bool HasThumbnail() const {
+ return thumbnail_set_;
+ }
+
+ const SkBitmap* GetThumbnail() const {
+ return thumbnail_;
+ }
+
+ bool thumbnail_pending() const {
+ return thumbnail_pending_;
+ }
+
+ void set_thumbnail_pending(bool pending) {
+ thumbnail_pending_ = pending;
+ }
+
+ void SetFavIconMissing() {
+ favicon_set_ = true;
+ }
+
+ void SetFavIcon(SkBitmap* img);
+
+ bool HasFavIcon() const {
+ return favicon_set_;
+ }
+
+ bool favicon_pending() const {
+ return favicon_pending_;
+ }
+
+ void set_favicon_pending(bool pending) {
+ favicon_pending_ = pending;
+ }
+
+ const SkBitmap* GetFavIcon() const {
+ return favicon_;
+ }
+
+ // Sort predicate to sort instances by score (high to low)
+ static bool Predicate(const PageUsageData* dud1,
+ const PageUsageData* dud2);
+
+ private:
+ history::URLID id_;
+ GURL url_;
+ string16 title_;
+
+ SkBitmap* thumbnail_;
+ bool thumbnail_set_;
+ // Whether we have an outstanding request for the thumbnail.
+ bool thumbnail_pending_;
+
+ SkBitmap* favicon_;
+ bool favicon_set_;
+ // Whether we have an outstanding request for the favicon.
+ bool favicon_pending_;
+
+ double score_;
+};
+
+#endif // CHROME_BROWSER_HISTORY_PAGE_USAGE_DATA_H__
diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc
new file mode 100644
index 0000000..e1afb86
--- /dev/null
+++ b/chrome/browser/history/query_parser.cc
@@ -0,0 +1,386 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/query_parser.h"
+
+#include <algorithm>
+
+#include "app/l10n_util.h"
+#include "base/i18n/word_iterator.h"
+#include "base/logging.h"
+#include "base/scoped_vector.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "unicode/uscript.h"
+
+namespace {
+
+// Returns true if |mp1.first| is less than |mp2.first|. This is used to
+// sort match positions.
+int CompareMatchPosition(const Snippet::MatchPosition& mp1,
+ const Snippet::MatchPosition& mp2) {
+ return mp1.first < mp2.first;
+}
+
+// Returns true if |mp2| intersects |mp1|. This is intended for use by
+// CoalesceMatchesFrom and isn't meant as a general intersectpion comparison
+// function.
+bool SnippetIntersects(const Snippet::MatchPosition& mp1,
+ const Snippet::MatchPosition& mp2) {
+ return mp2.first >= mp1.first && mp2.first <= mp1.second;
+}
+
+// Coalesces match positions in |matches| after index that intersect the match
+// position at |index|.
+void CoalesceMatchesFrom(size_t index,
+ Snippet::MatchPositions* matches) {
+ Snippet::MatchPosition& mp = (*matches)[index];
+ for (Snippet::MatchPositions::iterator i = matches->begin() + index + 1;
+ i != matches->end(); ) {
+ if (SnippetIntersects(mp, *i)) {
+ mp.second = i->second;
+ i = matches->erase(i);
+ } else {
+ return;
+ }
+ }
+}
+
+// Sorts the match positions in |matches| by their first index, then coalesces
+// any match positions that intersect each other.
+void CoalseAndSortMatchPositions(Snippet::MatchPositions* matches) {
+ std::sort(matches->begin(), matches->end(), &CompareMatchPosition);
+ // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove
+ // from matches.
+ for (size_t i = 0; i < matches->size(); ++i)
+ CoalesceMatchesFrom(i, matches);
+}
+
+} // namespace
+
+// Inheritance structure:
+// Queries are represented as trees of QueryNodes.
+// QueryNodes are either a collection of subnodes (a QueryNodeList)
+// or a single word (a QueryNodeWord).
+
+// A QueryNodeWord is a single word in the query.
+class QueryNodeWord : public QueryNode {
+ public:
+ explicit QueryNodeWord(const string16& word)
+ : word_(word), literal_(false) {}
+ virtual ~QueryNodeWord() {}
+ virtual int AppendToSQLiteQuery(string16* query) const;
+ virtual bool IsWord() const { return true; }
+
+ const string16& word() const { return word_; }
+ void set_literal(bool literal) { literal_ = literal; }
+
+ virtual bool HasMatchIn(const std::vector<QueryWord>& words,
+ Snippet::MatchPositions* match_positions) const;
+
+ virtual bool Matches(const string16& word, bool exact) const;
+ virtual void AppendWords(std::vector<string16>* words) const;
+
+ private:
+ string16 word_;
+ bool literal_;
+};
+
+bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words,
+ Snippet::MatchPositions* match_positions) const {
+ for (size_t i = 0; i < words.size(); ++i) {
+ if (Matches(words[i].word, false)) {
+ size_t match_start = words[i].position;
+ match_positions->push_back(
+ Snippet::MatchPosition(match_start,
+ match_start + static_cast<int>(word_.size())));
+ return true;
+ }
+ }
+ return false;
+}
+
+bool QueryNodeWord::Matches(const string16& word, bool exact) const {
+ if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_))
+ return word == word_;
+ return word.size() >= word_.size() &&
+ (word_.compare(0, word_.size(), word, 0, word_.size()) == 0);
+}
+
+void QueryNodeWord::AppendWords(std::vector<string16>* words) const {
+ words->push_back(word_);
+}
+
+int QueryNodeWord::AppendToSQLiteQuery(string16* query) const {
+ query->append(word_);
+
+ // Use prefix search if we're not literal and long enough.
+ if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_))
+ *query += L'*';
+ return 1;
+}
+
+// A QueryNodeList has a collection of child QueryNodes
+// which it cleans up after.
+class QueryNodeList : public QueryNode {
+ public:
+ virtual ~QueryNodeList();
+
+ virtual int AppendToSQLiteQuery(string16* query) const {
+ return AppendChildrenToString(query);
+ }
+ virtual bool IsWord() const { return false; }
+
+ void AddChild(QueryNode* node) { children_.push_back(node); }
+
+ typedef std::vector<QueryNode*> QueryNodeVector;
+ QueryNodeVector* children() { return &children_; }
+
+ // Remove empty subnodes left over from other parsing.
+ void RemoveEmptySubnodes();
+
+ // QueryNodeList is never used with Matches or HasMatchIn.
+ virtual bool Matches(const string16& word, bool exact) const {
+ NOTREACHED();
+ return false;
+ }
+ virtual bool HasMatchIn(const std::vector<QueryWord>& words,
+ Snippet::MatchPositions* match_positions) const {
+ NOTREACHED();
+ return false;
+ }
+ virtual void AppendWords(std::vector<string16>* words) const;
+
+ protected:
+ int AppendChildrenToString(string16* query) const;
+
+ QueryNodeVector children_;
+};
+
+QueryNodeList::~QueryNodeList() {
+ for (QueryNodeVector::iterator node = children_.begin();
+ node != children_.end(); ++node)
+ delete *node;
+}
+
+void QueryNodeList::RemoveEmptySubnodes() {
+ for (size_t i = 0; i < children_.size(); ++i) {
+ if (children_[i]->IsWord())
+ continue;
+
+ QueryNodeList* list_node = static_cast<QueryNodeList*>(children_[i]);
+ list_node->RemoveEmptySubnodes();
+ if (list_node->children()->empty()) {
+ children_.erase(children_.begin() + i);
+ --i;
+ delete list_node;
+ }
+ }
+}
+
+void QueryNodeList::AppendWords(std::vector<string16>* words) const {
+ for (size_t i = 0; i < children_.size(); ++i)
+ children_[i]->AppendWords(words);
+}
+
+int QueryNodeList::AppendChildrenToString(string16* query) const {
+ int num_words = 0;
+ for (QueryNodeVector::const_iterator node = children_.begin();
+ node != children_.end(); ++node) {
+ if (node != children_.begin())
+ query->push_back(L' ');
+ num_words += (*node)->AppendToSQLiteQuery(query);
+ }
+ return num_words;
+}
+
+// A QueryNodePhrase is a phrase query ("quoted").
+class QueryNodePhrase : public QueryNodeList {
+ public:
+ virtual int AppendToSQLiteQuery(string16* query) const {
+ query->push_back(L'"');
+ int num_words = AppendChildrenToString(query);
+ query->push_back(L'"');
+ return num_words;
+ }
+
+ virtual bool Matches(const string16& word, bool exact) const;
+ virtual bool HasMatchIn(const std::vector<QueryWord>& words,
+ Snippet::MatchPositions* match_positions) const;
+};
+
+bool QueryNodePhrase::Matches(const string16& word, bool exact) const {
+ NOTREACHED();
+ return false;
+}
+
+bool QueryNodePhrase::HasMatchIn(
+ const std::vector<QueryWord>& words,
+ Snippet::MatchPositions* match_positions) const {
+ if (words.size() < children_.size())
+ return false;
+
+ for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) {
+ bool matched_all = true;
+ for (size_t j = 0; j < children_.size(); ++j) {
+ if (!children_[j]->Matches(words[i + j].word, true)) {
+ matched_all = false;
+ break;
+ }
+ }
+ if (matched_all) {
+ const QueryWord& last_word = words[i + children_.size() - 1];
+ match_positions->push_back(
+ Snippet::MatchPosition(words[i].position,
+ last_word.position + last_word.word.length()));
+ return true;
+ }
+ }
+ return false;
+}
+
+QueryParser::QueryParser() {
+}
+
+// static
+bool QueryParser::IsWordLongEnoughForPrefixSearch(const string16& word) {
+ DCHECK(word.size() > 0);
+ size_t minimum_length = 3;
+ // We intentionally exclude Hangul Jamos (both Conjoining and compatibility)
+ // because they 'behave like' Latin letters. Moreover, we should
+ // normalize the former before reaching here.
+ if (0xAC00 <= word[0] && word[0] <= 0xD7A3)
+ minimum_length = 2;
+ return word.size() >= minimum_length;
+}
+
+// Returns true if the character is considered a quote.
+static bool IsQueryQuote(wchar_t ch) {
+ return ch == '"' ||
+ ch == 0xab || // left pointing double angle bracket
+ ch == 0xbb || // right pointing double angle bracket
+ ch == 0x201c || // left double quotation mark
+ ch == 0x201d || // right double quotation mark
+ ch == 0x201e; // double low-9 quotation mark
+}
+
+int QueryParser::ParseQuery(const string16& query,
+ string16* sqlite_query) {
+ QueryNodeList root;
+ if (!ParseQueryImpl(query, &root))
+ return 0;
+ return root.AppendToSQLiteQuery(sqlite_query);
+}
+
+void QueryParser::ParseQuery(const string16& query,
+ std::vector<QueryNode*>* nodes) {
+ QueryNodeList root;
+ if (ParseQueryImpl(l10n_util::ToLower(query), &root))
+ nodes->swap(*root.children());
+}
+
+
+void QueryParser::ExtractQueryWords(const string16& query,
+ std::vector<string16>* words) {
+ QueryNodeList root;
+ if (!ParseQueryImpl(query, &root))
+ return;
+ root.AppendWords(words);
+}
+
+bool QueryParser::DoesQueryMatch(const string16& text,
+ const std::vector<QueryNode*>& query_nodes,
+ Snippet::MatchPositions* match_positions) {
+ if (query_nodes.empty())
+ return false;
+
+ std::vector<QueryWord> query_words;
+ string16 lower_text = l10n_util::ToLower(text);
+ ExtractQueryWords(lower_text, &query_words);
+
+ if (query_words.empty())
+ return false;
+
+ Snippet::MatchPositions matches;
+ for (size_t i = 0; i < query_nodes.size(); ++i) {
+ if (!query_nodes[i]->HasMatchIn(query_words, &matches))
+ return false;
+ }
+ if (lower_text.length() != text.length()) {
+ // The lower case string differs from the original string. The matches are
+ // meaningless.
+ // TODO(sky): we need a better way to align the positions so that we don't
+ // completely punt here.
+ match_positions->clear();
+ } else {
+ CoalseAndSortMatchPositions(&matches);
+ match_positions->swap(matches);
+ }
+ return true;
+}
+
+bool QueryParser::ParseQueryImpl(const string16& query,
+ QueryNodeList* root) {
+ WordIterator iter(&query, WordIterator::BREAK_WORD);
+ // TODO(evanm): support a locale here
+ if (!iter.Init())
+ return false;
+
+ // To handle nesting, we maintain a stack of QueryNodeLists.
+ // The last element (back) of the stack contains the current, deepest node.
+ std::vector<QueryNodeList*> query_stack;
+ query_stack.push_back(root);
+
+ bool in_quotes = false; // whether we're currently in a quoted phrase
+ while (iter.Advance()) {
+ // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It
+ // is not necessarily a word, but could also be a sequence of punctuation
+ // or whitespace.
+ if (iter.IsWord()) {
+ string16 word = iter.GetWord();
+
+ QueryNodeWord* word_node = new QueryNodeWord(word);
+ if (in_quotes)
+ word_node->set_literal(true);
+ query_stack.back()->AddChild(word_node);
+ } else { // Punctuation.
+ if (IsQueryQuote(query[iter.prev()])) {
+ if (!in_quotes) {
+ QueryNodeList* quotes_node = new QueryNodePhrase;
+ query_stack.back()->AddChild(quotes_node);
+ query_stack.push_back(quotes_node);
+ in_quotes = true;
+ } else {
+ query_stack.pop_back(); // Stop adding to the quoted phrase.
+ in_quotes = false;
+ }
+ }
+ }
+ }
+
+ root->RemoveEmptySubnodes();
+ return true;
+}
+
+void QueryParser::ExtractQueryWords(const string16& text,
+ std::vector<QueryWord>* words) {
+ WordIterator iter(&text, WordIterator::BREAK_WORD);
+ // TODO(evanm): support a locale here
+ if (!iter.Init())
+ return;
+
+ while (iter.Advance()) {
+ // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It
+ // is not necessarily a word, but could also be a sequence of punctuation
+ // or whitespace.
+ if (iter.IsWord()) {
+ string16 word = iter.GetWord();
+ if (!word.empty()) {
+ words->push_back(QueryWord());
+ words->back().word = word;
+ words->back().position = iter.prev();
+ }
+ }
+ }
+}
diff --git a/chrome/browser/history/query_parser.h b/chrome/browser/history/query_parser.h
new file mode 100644
index 0000000..8399abf
--- /dev/null
+++ b/chrome/browser/history/query_parser.h
@@ -0,0 +1,107 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The query parser is used to parse queries entered into the history
+// search into more normalized queries can be passed to the SQLite backend.
+
+#ifndef CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
+#define CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
+
+#include <vector>
+
+#include "base/string16.h"
+#include "chrome/browser/history/snippet.h"
+
+class QueryNodeList;
+
+// Used by HasMatchIn.
+struct QueryWord {
+ // The work to match against.
+ string16 word;
+
+ // The starting position of the word in the original text.
+ size_t position;
+};
+
+// QueryNode is used by QueryNodeParser to represent the elements that
+// constitute a query. While QueryNode is exposed by way of ParseQuery, it
+// really isn't meant for external usage.
+class QueryNode {
+ public:
+ virtual ~QueryNode() {}
+
+ // Serialize ourselves out to a string that can be passed to SQLite. Returns
+ // the number of words in this node.
+ virtual int AppendToSQLiteQuery(string16* query) const = 0;
+
+ // Return true if this is a word node, false if it's a QueryNodeList.
+ virtual bool IsWord() const = 0;
+
+ // Returns true if this node matches the specified text. If exact is true,
+ // the string must exactly match. Otherwise, this uses a starts with
+ // comparison.
+ virtual bool Matches(const string16& word, bool exact) const = 0;
+
+ // Returns true if this node matches at least one of the words in words. If
+ // the node matches at least one word, an entry is added to match_positions
+ // giving the matching region.
+ virtual bool HasMatchIn(const std::vector<QueryWord>& words,
+ Snippet::MatchPositions* match_positions) const = 0;
+
+ // Appends the words that make up this node in |words|.
+ virtual void AppendWords(std::vector<string16>* words) const = 0;
+};
+
+
+class QueryParser {
+ public:
+ QueryParser();
+
+ // For CJK ideographs and Korean Hangul, even a single character
+ // can be useful in prefix matching, but that may give us too many
+ // false positives. Moreover, the current ICU word breaker gives us
+ // back every single Chinese character as a word so that there's no
+ // point doing anything for them and we only adjust the minimum length
+ // to 2 for Korean Hangul while using 3 for others. This is a temporary
+ // hack until we have a segmentation support.
+ static bool IsWordLongEnoughForPrefixSearch(const string16& word);
+
+ // Parse a query into a SQLite query. The resulting query is placed in
+ // sqlite_query and the number of words is returned.
+ int ParseQuery(const string16& query,
+ string16* sqlite_query);
+
+ // Parses the query words in query, returning the nodes that constitute the
+ // valid words in the query. This is intended for later usage with
+ // DoesQueryMatch.
+ // Ownership of the nodes passes to the caller.
+ void ParseQuery(const string16& query,
+ std::vector<QueryNode*>* nodes);
+
+ // Parses a query returning the words that make up the query. Any words in
+ // quotes are put in |words| without the quotes. For example, the query text
+ // "foo bar" results in two entries being added to words, one for foo and one
+ // for bar.
+ void ExtractQueryWords(const string16& query,
+ std::vector<string16>* words);
+
+ // Returns true if the string text matches the query nodes created by a call
+ // to ParseQuery. If the query does match each of the matching positions in
+ // the text is added to |match_positions|.
+ bool DoesQueryMatch(const string16& text,
+ const std::vector<QueryNode*>& nodes,
+ Snippet::MatchPositions* match_positions);
+
+ private:
+ // Does the work of parsing a query; creates nodes in QueryNodeList as
+ // appropriate. This is invoked from both of the ParseQuery methods.
+ bool ParseQueryImpl(const string16& query,
+ QueryNodeList* root);
+
+ // Extracts the words from text, placing each word into words.
+ void ExtractQueryWords(const string16& text,
+ std::vector<QueryWord>* words);
+};
+
+#endif // CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
diff --git a/chrome/browser/history/query_parser_unittest.cc b/chrome/browser/history/query_parser_unittest.cc
new file mode 100644
index 0000000..f8b41d9
--- /dev/null
+++ b/chrome/browser/history/query_parser_unittest.cc
@@ -0,0 +1,163 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/basictypes.h"
+#include "base/scoped_vector.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/history/query_parser.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+class QueryParserTest : public testing::Test {
+ public:
+ struct TestData {
+ const char* input;
+ const int expected_word_count;
+ };
+
+ std::string QueryToString(const std::string& query);
+
+ protected:
+ QueryParser query_parser_;
+};
+
+// Test helper: Convert a user query string in 8-bit (for hardcoding
+// convenience) to a SQLite query string.
+std::string QueryParserTest::QueryToString(const std::string& query) {
+ string16 sqlite_query;
+ query_parser_.ParseQuery(UTF8ToUTF16(query), &sqlite_query);
+ return UTF16ToUTF8(sqlite_query);
+}
+
+// Basic multi-word queries, including prefix matching.
+TEST_F(QueryParserTest, SimpleQueries) {
+ EXPECT_EQ("", QueryToString(" "));
+ EXPECT_EQ("singleword*", QueryToString("singleword"));
+ EXPECT_EQ("spacedout*", QueryToString(" spacedout "));
+ EXPECT_EQ("foo* bar*", QueryToString("foo bar"));
+ // Short words aren't prefix matches. For Korean Hangul
+ // the minimum is 2 while for other scripts, it's 3.
+ EXPECT_EQ("f b", QueryToString(" f b"));
+ // KA JANG
+ EXPECT_EQ(WideToUTF8(L"\xAC00 \xC7A5"),
+ QueryToString(WideToUTF8(L" \xAC00 \xC7A5")));
+ EXPECT_EQ("foo* bar*", QueryToString(" foo bar "));
+ // KA-JANG BICH-GO
+ EXPECT_EQ(WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"),
+ QueryToString(WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0")));
+}
+
+// Quoted substring parsing.
+TEST_F(QueryParserTest, Quoted) {
+ // ASCII quotes
+ EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\""));
+ // Missing end quotes
+ EXPECT_EQ("\"miss end\"", QueryToString("\"miss end"));
+ // Missing begin quotes
+ EXPECT_EQ("miss* beg*", QueryToString("miss beg\""));
+ // Weird formatting
+ EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many \"\"quotes"));
+}
+
+// Apostrophes within words should be preserved, but otherwise stripped.
+TEST_F(QueryParserTest, Apostrophes) {
+ EXPECT_EQ("foo* bar's*", QueryToString("foo bar's"));
+ EXPECT_EQ("l'foo*", QueryToString("l'foo"));
+ EXPECT_EQ("foo*", QueryToString("'foo"));
+}
+
+// Special characters.
+TEST_F(QueryParserTest, SpecialChars) {
+ EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar"));
+}
+
+TEST_F(QueryParserTest, NumWords) {
+ TestData data[] = {
+ { "blah", 1 },
+ { "foo \"bar baz\"", 3 },
+ { "foo \"baz\"", 2 },
+ { "foo \"bar baz\" blah", 4 },
+ };
+
+ for (size_t i = 0; i < arraysize(data); ++i) {
+ string16 query_string;
+ EXPECT_EQ(data[i].expected_word_count,
+ query_parser_.ParseQuery(UTF8ToUTF16(data[i].input),
+ &query_string));
+ }
+}
+
+TEST_F(QueryParserTest, ParseQueryNodesAndMatch) {
+ struct TestData2 {
+ const std::string query;
+ const std::string text;
+ const bool matches;
+ const size_t m1_start;
+ const size_t m1_end;
+ const size_t m2_start;
+ const size_t m2_end;
+ } data[] = {
+ { "foo foo", "foo", true, 0, 3, 0, 0 },
+ { "foo fooey", "fooey", true, 0, 5, 0, 0 },
+ { "foo fooey bar", "bar fooey", true, 0, 3, 4, 9 },
+ { "blah", "blah", true, 0, 4, 0, 0 },
+ { "blah", "foo", false, 0, 0, 0, 0 },
+ { "blah", "blahblah", true, 0, 4, 0, 0 },
+ { "blah", "foo blah", true, 4, 8, 0, 0 },
+ { "foo blah", "blah", false, 0, 0, 0, 0 },
+ { "foo blah", "blahx foobar", true, 0, 4, 6, 9 },
+ { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 },
+ { "\"foo blah\"", "foox blahx", false, 0, 0, 0, 0 },
+ { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 },
+ { "\"foo blah\"", "\"foo blah\"", true, 1, 9, 0, 0 },
+ { "foo blah", "\"foo bar blah\"", true, 1, 4, 9, 13 },
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
+ QueryParser parser;
+ ScopedVector<QueryNode> query_nodes;
+ parser.ParseQuery(UTF8ToUTF16(data[i].query), &query_nodes.get());
+ Snippet::MatchPositions match_positions;
+ ASSERT_EQ(data[i].matches,
+ parser.DoesQueryMatch(UTF8ToUTF16(data[i].text),
+ query_nodes.get(),
+ &match_positions));
+ size_t offset = 0;
+ if (data[i].m1_start != 0 || data[i].m1_end != 0) {
+ ASSERT_TRUE(match_positions.size() >= 1);
+ EXPECT_EQ(data[i].m1_start, match_positions[0].first);
+ EXPECT_EQ(data[i].m1_end, match_positions[0].second);
+ offset++;
+ }
+ if (data[i].m2_start != 0 || data[i].m2_end != 0) {
+ ASSERT_TRUE(match_positions.size() == 1 + offset);
+ EXPECT_EQ(data[i].m2_start, match_positions[offset].first);
+ EXPECT_EQ(data[i].m2_end, match_positions[offset].second);
+ }
+ }
+}
+
+TEST_F(QueryParserTest, ExtractQueryWords) {
+ struct TestData2 {
+ const std::string text;
+ const std::string w1;
+ const std::string w2;
+ const std::string w3;
+ const size_t word_count;
+ } data[] = {
+ { "foo", "foo", "", "", 1 },
+ { "foo bar", "foo", "bar", "", 2 },
+ { "\"foo bar\"", "foo", "bar", "", 2 },
+ { "\"foo bar\" a", "foo", "bar", "a", 3 },
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
+ std::vector<string16> results;
+ QueryParser parser;
+ parser.ExtractQueryWords(UTF8ToUTF16(data[i].text), &results);
+ ASSERT_EQ(data[i].word_count, results.size());
+ EXPECT_EQ(data[i].w1, UTF16ToUTF8(results[0]));
+ if (results.size() == 2)
+ EXPECT_EQ(data[i].w2, UTF16ToUTF8(results[1]));
+ if (results.size() == 3)
+ EXPECT_EQ(data[i].w3, UTF16ToUTF8(results[2]));
+ }
+}
diff --git a/chrome/browser/history/redirect_uitest.cc b/chrome/browser/history/redirect_uitest.cc
new file mode 100644
index 0000000..f7a1669
--- /dev/null
+++ b/chrome/browser/history/redirect_uitest.cc
@@ -0,0 +1,303 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Navigates the browser to server and client redirect pages and makes sure
+// that the correct redirects are reflected in the history database. Errors
+// here might indicate that WebKit changed the calls our glue layer gets in
+// the case of redirects. It may also mean problems with the history system.
+
+#include "base/file_util.h"
+#include "base/platform_thread.h"
+#include "base/scoped_ptr.h"
+#include "base/string_util.h"
+#include "base/string16.h"
+#include "chrome/test/automation/tab_proxy.h"
+#include "chrome/test/ui/ui_test.h"
+#include "net/base/net_util.h"
+#include "net/url_request/url_request_unittest.h"
+
+namespace {
+
+const wchar_t kDocRoot[] = L"chrome/test/data";
+
+typedef UITest RedirectTest;
+
+// Tests a single server redirect
+TEST_F(RedirectTest, Server) {
+ scoped_refptr<HTTPTestServer> server =
+ HTTPTestServer::CreateServer(kDocRoot, NULL);
+ ASSERT_TRUE(NULL != server.get());
+
+ GURL final_url = server->TestServerPage(std::string());
+ GURL first_url = server->TestServerPage(
+ "server-redirect?" + final_url.spec());
+
+ NavigateToURL(first_url);
+
+ scoped_refptr<TabProxy> tab_proxy(GetActiveTab());
+ ASSERT_TRUE(tab_proxy.get());
+
+ std::vector<GURL> redirects;
+ ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects));
+
+ ASSERT_EQ(1U, redirects.size());
+ EXPECT_EQ(final_url.spec(), redirects[0].spec());
+}
+
+// Tests a single client redirect.
+TEST_F(RedirectTest, Client) {
+ scoped_refptr<HTTPTestServer> server =
+ HTTPTestServer::CreateServer(kDocRoot, NULL);
+ ASSERT_TRUE(NULL != server.get());
+
+ GURL final_url = server->TestServerPage(std::string());
+ GURL first_url = server->TestServerPage(
+ "client-redirect?" + final_url.spec());
+
+ // The client redirect appears as two page visits in the browser.
+ NavigateToURLBlockUntilNavigationsComplete(first_url, 2);
+
+ scoped_refptr<TabProxy> tab_proxy(GetActiveTab());
+ ASSERT_TRUE(tab_proxy.get());
+
+ std::vector<GURL> redirects;
+ ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects));
+
+ ASSERT_EQ(1U, redirects.size());
+ EXPECT_EQ(final_url.spec(), redirects[0].spec());
+
+ // The address bar should display the final URL.
+ GURL tab_url;
+ EXPECT_TRUE(tab_proxy->GetCurrentURL(&tab_url));
+ EXPECT_TRUE(final_url == tab_url);
+
+ // Navigate one more time.
+ NavigateToURLBlockUntilNavigationsComplete(first_url, 2);
+
+ // The address bar should still display the final URL.
+ EXPECT_TRUE(tab_proxy->GetCurrentURL(&tab_url));
+ EXPECT_TRUE(final_url == tab_url);
+}
+
+TEST_F(RedirectTest, ClientEmptyReferer) {
+ scoped_refptr<HTTPTestServer> server =
+ HTTPTestServer::CreateServer(kDocRoot, NULL);
+ ASSERT_TRUE(NULL != server.get());
+
+ GURL final_url = server->TestServerPage(std::string());
+ FilePath test_file(test_data_directory_);
+ test_file = test_file.AppendASCII("file_client_redirect.html");
+ GURL first_url = net::FilePathToFileURL(test_file);
+
+ // The client redirect appears as two page visits in the browser.
+ NavigateToURLBlockUntilNavigationsComplete(first_url, 2);
+
+ std::vector<GURL> redirects;
+ scoped_refptr<TabProxy> tab_proxy(GetActiveTab());
+ ASSERT_TRUE(tab_proxy.get());
+ ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects));
+ ASSERT_EQ(1U, redirects.size());
+ EXPECT_EQ(final_url.spec(), redirects[0].spec());
+}
+
+// Tests to make sure a location change when a pending redirect exists isn't
+// flagged as a redirect.
+TEST_F(RedirectTest, ClientCancelled) {
+ FilePath first_path(test_data_directory_);
+ first_path = first_path.AppendASCII("cancelled_redirect_test.html");
+ ASSERT_TRUE(file_util::AbsolutePath(&first_path));
+ GURL first_url = net::FilePathToFileURL(first_path);
+
+ NavigateToURLBlockUntilNavigationsComplete(first_url, 1);
+
+ NavigateToURL(GURL("javascript:click()")); // User initiated location change.
+
+ scoped_refptr<TabProxy> tab_proxy(GetActiveTab());
+ ASSERT_TRUE(tab_proxy.get());
+
+ std::vector<GURL> redirects;
+ ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects));
+
+ // There should be no redirects from first_url, because the anchor location
+ // change that occurs should not be flagged as a redirect and the meta-refresh
+ // won't have fired yet.
+ ASSERT_EQ(0U, redirects.size());
+ GURL current_url;
+ ASSERT_TRUE(tab_proxy->GetCurrentURL(&current_url));
+
+ // Need to test final path and ref separately since constructing a file url
+ // containing an anchor using FilePathToFileURL will escape the anchor as
+ // %23, but in current_url the anchor will be '#'.
+ std::string final_ref = "myanchor";
+ FilePath current_path;
+ ASSERT_TRUE(net::FileURLToFilePath(current_url, &current_path));
+ ASSERT_TRUE(file_util::AbsolutePath(&current_path));
+ // Path should remain unchanged.
+ EXPECT_EQ(StringToLowerASCII(first_path.value()),
+ StringToLowerASCII(current_path.value()));
+ EXPECT_EQ(final_ref, current_url.ref());
+}
+
+// Tests a client->server->server redirect
+TEST_F(RedirectTest, ClientServerServer) {
+ scoped_refptr<HTTPTestServer> server =
+ HTTPTestServer::CreateServer(kDocRoot, NULL);
+ ASSERT_TRUE(NULL != server.get());
+
+ GURL final_url = server->TestServerPage(std::string());
+ GURL next_to_last = server->TestServerPage(
+ "server-redirect?" + final_url.spec());
+ GURL second_url = server->TestServerPage(
+ "server-redirect?" + next_to_last.spec());
+ GURL first_url = server->TestServerPage(
+ "client-redirect?" + second_url.spec());
+ std::vector<GURL> redirects;
+
+ // We need the sleep for the client redirects, because it appears as two
+ // page visits in the browser.
+ NavigateToURL(first_url);
+
+ for (int i = 0; i < 10; ++i) {
+ PlatformThread::Sleep(sleep_timeout_ms());
+ scoped_refptr<TabProxy> tab_proxy(GetActiveTab());
+ ASSERT_TRUE(tab_proxy.get());
+ ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects));
+ if (!redirects.empty())
+ break;
+ }
+
+ ASSERT_EQ(3U, redirects.size());
+ EXPECT_EQ(second_url.spec(), redirects[0].spec());
+ EXPECT_EQ(next_to_last.spec(), redirects[1].spec());
+ EXPECT_EQ(final_url.spec(), redirects[2].spec());
+}
+
+// Tests that the "#reference" gets preserved across server redirects.
+TEST_F(RedirectTest, ServerReference) {
+ scoped_refptr<HTTPTestServer> server =
+ HTTPTestServer::CreateServer(kDocRoot, NULL);
+ ASSERT_TRUE(NULL != server.get());
+
+ const std::string ref("reference");
+
+ GURL final_url = server->TestServerPage(std::string());
+ GURL initial_url = server->TestServerPage(
+ "server-redirect?" + final_url.spec() + "#" + ref);
+
+ NavigateToURL(initial_url);
+
+ GURL url = GetActiveTabURL();
+ EXPECT_EQ(ref, url.ref());
+}
+
+// Test that redirect from http:// to file:// :
+// A) does not crash the browser or confuse the redirect chain, see bug 1080873
+// B) does not take place.
+TEST_F(RedirectTest, NoHttpToFile) {
+ scoped_refptr<HTTPTestServer> server =
+ HTTPTestServer::CreateServer(kDocRoot, NULL);
+ ASSERT_TRUE(NULL != server.get());
+ FilePath test_file(test_data_directory_);
+ test_file = test_file.AppendASCII("http_to_file.html");
+ GURL file_url = net::FilePathToFileURL(test_file);
+
+ GURL initial_url = server->TestServerPage(
+ "client-redirect?" + file_url.spec());
+
+ NavigateToURL(initial_url);
+ // UITest will check for crashes. We make sure the title doesn't match the
+ // title from the file, because the nav should not have taken place.
+ scoped_refptr<TabProxy> tab_proxy(GetActiveTab());
+ ASSERT_TRUE(tab_proxy.get());
+ std::wstring actual_title;
+ ASSERT_TRUE(tab_proxy->GetTabTitle(&actual_title));
+ EXPECT_NE("File!", WideToUTF8(actual_title));
+}
+
+// Ensures that non-user initiated location changes (within page) are
+// flagged as client redirects. See bug 1139823.
+TEST_F(RedirectTest, ClientFragments) {
+ scoped_refptr<HTTPTestServer> server =
+ HTTPTestServer::CreateServer(kDocRoot, NULL);
+ ASSERT_TRUE(NULL != server.get());
+
+ FilePath test_file(test_data_directory_);
+ test_file = test_file.AppendASCII("ref_redirect.html");
+ GURL first_url = net::FilePathToFileURL(test_file);
+ std::vector<GURL> redirects;
+
+ NavigateToURL(first_url);
+
+ scoped_refptr<TabProxy> tab_proxy(GetActiveTab());
+ ASSERT_TRUE(tab_proxy.get());
+ ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects));
+ EXPECT_EQ(1U, redirects.size());
+ EXPECT_EQ(first_url.spec() + "#myanchor", redirects[0].spec());
+}
+
+// TODO(timsteele): This is disabled because our current testserver can't
+// handle multiple requests in parallel, making it hang on the first request
+// to /slow?60. It's unable to serve our second request for files/title2.html
+// until /slow? completes, which doesn't give the desired behavior. We could
+// alternatively load the second page from disk, but we would need to start
+// the browser for this testcase with --process-per-tab, and I don't think
+// we can do this at test-case-level granularity at the moment.
+// http://crbug.com/45056
+TEST_F(RedirectTest,
+ DISABLED_ClientCancelledByNewNavigationAfterProvisionalLoad) {
+ // We want to initiate a second navigation after the provisional load for
+ // the client redirect destination has started, but before this load is
+ // committed. To achieve this, we tell the browser to load a slow page,
+ // which causes it to start a provisional load, and while it is waiting
+ // for the response (which means it hasn't committed the load for the client
+ // redirect destination page yet), we issue a new navigation request.
+ scoped_refptr<HTTPTestServer> server =
+ HTTPTestServer::CreateServer(kDocRoot, NULL);
+ ASSERT_TRUE(NULL != server.get());
+
+ GURL final_url = server->TestServerPage("files/title2.html");
+ GURL slow = server->TestServerPage("slow?60");
+ GURL first_url = server->TestServerPage(
+ "client-redirect?" + slow.spec());
+ std::vector<GURL> redirects;
+
+ NavigateToURL(first_url);
+ // We don't sleep here - the first navigation won't have been committed yet
+ // because we told the server to wait a minute. This means the browser has
+ // started it's provisional load for the client redirect destination page but
+ // hasn't completed. Our time is now!
+ NavigateToURL(final_url);
+
+ std::wstring tab_title;
+ std::wstring final_url_title = UTF8ToWide("Title Of Awesomeness");
+ // Wait till the final page has been loaded.
+ for (int i = 0; i < 10; ++i) {
+ PlatformThread::Sleep(sleep_timeout_ms());
+ scoped_refptr<TabProxy> tab_proxy(GetActiveTab());
+ ASSERT_TRUE(tab_proxy.get());
+ ASSERT_TRUE(tab_proxy->GetTabTitle(&tab_title));
+ if (tab_title == final_url_title) {
+ ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects));
+ break;
+ }
+ }
+
+ // Check to make sure the navigation did in fact take place and we are
+ // at the expected page.
+ EXPECT_EQ(final_url_title, tab_title);
+
+ bool final_navigation_not_redirect = true;
+ // Check to make sure our request for files/title2.html doesn't get flagged
+ // as a client redirect from the first (/client-redirect?) page.
+ for (std::vector<GURL>::iterator it = redirects.begin();
+ it != redirects.end(); ++it) {
+ if (final_url.spec() == it->spec()) {
+ final_navigation_not_redirect = false;
+ break;
+ }
+ }
+ EXPECT_TRUE(final_navigation_not_redirect);
+}
+
+} // namespace
diff --git a/chrome/browser/history/snippet.cc b/chrome/browser/history/snippet.cc
new file mode 100644
index 0000000..cb96e16
--- /dev/null
+++ b/chrome/browser/history/snippet.cc
@@ -0,0 +1,285 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/snippet.h"
+
+#include <algorithm>
+
+#include "base/logging.h"
+#include "base/scoped_ptr.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "unicode/brkiter.h"
+#include "unicode/utext.h"
+#include "unicode/utf8.h"
+
+namespace {
+
+bool PairFirstLessThan(const Snippet::MatchPosition& a,
+ const Snippet::MatchPosition& b) {
+ return a.first < b.first;
+}
+
+// Combines all pairs after offset in match_positions that are contained
+// or touch the pair at offset.
+void CoalescePositionsFrom(size_t offset,
+ Snippet::MatchPositions* match_positions) {
+ DCHECK(offset < match_positions->size());
+ Snippet::MatchPosition& pair((*match_positions)[offset]);
+ ++offset;
+ while (offset < match_positions->size() &&
+ pair.second >= (*match_positions)[offset].first) {
+ pair.second = std::max(pair.second, (*match_positions)[offset].second);
+ match_positions->erase(match_positions->begin() + offset);
+ }
+}
+
+// Makes sure there is a pair in match_positions that contains the specified
+// range. This keeps the pairs ordered in match_positions by first, and makes
+// sure none of the pairs in match_positions touch each other.
+void AddMatch(size_t start,
+ size_t end,
+ Snippet::MatchPositions* match_positions) {
+ DCHECK(start < end);
+ DCHECK(match_positions);
+ Snippet::MatchPosition pair(start, end);
+ if (match_positions->empty()) {
+ match_positions->push_back(pair);
+ return;
+ }
+ // There's at least one match. Find the position of the new match,
+ // potentially extending pairs around it.
+ Snippet::MatchPositions::iterator i =
+ std::lower_bound(match_positions->begin(), match_positions->end(),
+ pair, &PairFirstLessThan);
+ if (i != match_positions->end() && i->first == start) {
+ // Match not at the end and there is already a pair with the same
+ // start.
+ if (end > i->second) {
+ // New pair extends beyond existing pair. Extend existing pair and
+ // coalesce matches after it.
+ i->second = end;
+ CoalescePositionsFrom(i - match_positions->begin(), match_positions);
+ } // else case, new pair completely contained in existing pair, nothing
+ // to do.
+ } else if (i == match_positions->begin()) {
+ // Match at the beginning and the first pair doesn't have the same
+ // start. Insert new pair and coalesce matches after it.
+ match_positions->insert(i, pair);
+ CoalescePositionsFrom(0, match_positions);
+ } else {
+ // Not at the beginning (but may be at the end).
+ --i;
+ if (start <= i->second && end > i->second) {
+ // Previous element contains match. Extend it and coalesce.
+ i->second = end;
+ CoalescePositionsFrom(i - match_positions->begin(), match_positions);
+ } else if (end > i->second) {
+ // Region doesn't touch previous element. See if region touches current
+ // element.
+ ++i;
+ if (i == match_positions->end() || end < i->first) {
+ match_positions->insert(i, pair);
+ } else {
+ i->first = start;
+ i->second = end;
+ CoalescePositionsFrom(i - match_positions->begin(), match_positions);
+ }
+ }
+ }
+}
+
+// Converts an index in a utf8 string into the index in the corresponding utf16
+// string and returns the utf16 index. This is intended to be called in a loop
+// iterating through a utf8 string.
+//
+// utf8_string: the utf8 string.
+// utf8_length: length of the utf8 string.
+// offset: the utf8 offset to convert.
+// utf8_pos: current offset in the utf8 string. This is modified and on return
+// matches offset.
+// wide_pos: current index in the wide string. This is the same as the return
+// value.
+size_t AdvanceAndReturnUTF16Pos(const char* utf8_string,
+ int32_t utf8_length,
+ int32_t offset,
+ int32_t* utf8_pos,
+ size_t* utf16_pos) {
+ DCHECK(offset >= *utf8_pos && offset <= utf8_length);
+
+ UChar32 wide_char;
+ while (*utf8_pos < offset) {
+ U8_NEXT(utf8_string, *utf8_pos, utf8_length, wide_char);
+ *utf16_pos += (wide_char <= 0xFFFF) ? 1 : 2;
+ }
+ return *utf16_pos;
+}
+
+// Given a character break iterator over a UTF-8 string, set the iterator
+// position to |*utf8_pos| and move by |count| characters. |count| can
+// be either positive or negative.
+void MoveByNGraphemes(icu::BreakIterator* bi, int count, size_t* utf8_pos) {
+ // Ignore the return value. A side effect of the current position
+ // being set at or following |*utf8_pos| is exploited here.
+ // It's simpler than calling following(n) and then previous().
+ // isBoundary() is not very fast, but should be good enough for the
+ // snippet generation. If not, revisit the way we scan in ComputeSnippet.
+ bi->isBoundary(*utf8_pos);
+ bi->next(count);
+ *utf8_pos = static_cast<size_t>(bi->current());
+}
+
+// The amount of context to include for a given hit. Note that it's counted
+// in terms of graphemes rather than bytes.
+const int kSnippetContext = 50;
+
+// Returns true if next match falls within a snippet window
+// from the previous match. The window size is counted in terms
+// of graphemes rather than bytes in UTF-8.
+bool IsNextMatchWithinSnippetWindow(icu::BreakIterator* bi,
+ size_t previous_match_end,
+ size_t next_match_start) {
+ // If it's within a window in terms of bytes, it's certain
+ // that it's within a window in terms of graphemes as well.
+ if (next_match_start < previous_match_end + kSnippetContext)
+ return true;
+ bi->isBoundary(previous_match_end);
+ // An alternative to this is to call |bi->next()| at most
+ // kSnippetContext times, compare |bi->current()| with |next_match_start|
+ // after each call and return early if possible. There are other
+ // heuristics to speed things up if necessary, but it's not likely that
+ // we need to bother.
+ bi->next(kSnippetContext);
+ int64 current = bi->current();
+ return (next_match_start < static_cast<uint64>(current) ||
+ current == icu::BreakIterator::DONE);
+}
+
+} // namespace
+
+// static
+void Snippet::ExtractMatchPositions(const std::string& offsets_str,
+ const std::string& column_num,
+ MatchPositions* match_positions) {
+ DCHECK(match_positions);
+ if (offsets_str.empty())
+ return;
+ std::vector<std::string> offsets;
+ SplitString(offsets_str, ' ', &offsets);
+ // SQLite offsets are sets of four integers:
+ // column, query term, match offset, match length
+ // Matches within a string are marked by (start, end) pairs.
+ for (size_t i = 0; i < offsets.size() - 3; i += 4) {
+ if (offsets[i] != column_num)
+ continue;
+ const size_t start = atoi(offsets[i + 2].c_str());
+ const size_t end = start + atoi(offsets[i + 3].c_str());
+ // Switch to DCHECK after debugging http://crbug.com/15261.
+ CHECK(end >= start);
+ AddMatch(start, end, match_positions);
+ }
+}
+
+// static
+void Snippet::ConvertMatchPositionsToWide(
+ const std::string& utf8_string,
+ Snippet::MatchPositions* match_positions) {
+ DCHECK(match_positions);
+ int32_t utf8_pos = 0;
+ size_t utf16_pos = 0;
+ const char* utf8_cstring = utf8_string.c_str();
+ const int32_t utf8_length = static_cast<int32_t>(utf8_string.size());
+ for (Snippet::MatchPositions::iterator i = match_positions->begin();
+ i != match_positions->end(); ++i) {
+ i->first = AdvanceAndReturnUTF16Pos(utf8_cstring, utf8_length,
+ i->first, &utf8_pos, &utf16_pos);
+ i->second = AdvanceAndReturnUTF16Pos(utf8_cstring, utf8_length,
+ i->second, &utf8_pos, &utf16_pos);
+ }
+}
+
+void Snippet::ComputeSnippet(const MatchPositions& match_positions,
+ const std::string& document) {
+ // The length of snippets we try to produce.
+ // We can generate longer snippets but stop once we cross kSnippetMaxLength.
+ const size_t kSnippetMaxLength = 200;
+ const string16 kEllipsis = ASCIIToUTF16(" ... ");
+
+ UText* document_utext = NULL;
+ UErrorCode status = U_ZERO_ERROR;
+ document_utext = utext_openUTF8(document_utext, document.data(),
+ document.size(), &status);
+ // Locale does not matter because there's no per-locale customization
+ // for character iterator.
+ scoped_ptr<icu::BreakIterator> bi(icu::BreakIterator::createCharacterInstance(
+ icu::Locale::getDefault(), status));
+ bi->setText(document_utext, status);
+ DCHECK(U_SUCCESS(status));
+
+ // We build the snippet by iterating through the matches and then grabbing
+ // context around each match. If matches are near enough each other (within
+ // kSnippetContext), we skip the "..." between them.
+ string16 snippet;
+ size_t start = 0;
+ for (size_t i = 0; i < match_positions.size(); ++i) {
+ // Some shorter names for the current match.
+ const size_t match_start = match_positions[i].first;
+ const size_t match_end = match_positions[i].second;
+
+ // Switch to DCHECK after debugging http://crbug.com/15261.
+ CHECK(match_end > match_start);
+ CHECK(match_end <= document.size());
+
+ // Add the context, if any, to show before the match.
+ size_t context_start = match_start;
+ MoveByNGraphemes(bi.get(), -kSnippetContext, &context_start);
+ start = std::max(start, context_start);
+ if (start < match_start) {
+ if (start > 0)
+ snippet += kEllipsis;
+ // Switch to DCHECK after debugging http://crbug.com/15261.
+ CHECK(start < document.size());
+ snippet += UTF8ToUTF16(document.substr(start, match_start - start));
+ }
+
+ // Add the match.
+ const size_t first = snippet.size();
+ snippet += UTF8ToUTF16(document.substr(match_start,
+ match_end - match_start));
+ matches_.push_back(std::make_pair(first, snippet.size()));
+
+ // Compute the context, if any, to show after the match.
+ size_t end;
+ // Check if the next match falls within our snippet window.
+ if (i + 1 < match_positions.size() &&
+ IsNextMatchWithinSnippetWindow(bi.get(), match_end,
+ match_positions[i + 1].first)) {
+ // Yes, it's within the window. Make the end context extend just up
+ // to the next match.
+ end = match_positions[i + 1].first;
+ // Switch to DCHECK after debugging http://crbug.com/15261.
+ CHECK(end >= match_end);
+ CHECK(end <= document.size());
+ snippet += UTF8ToUTF16(document.substr(match_end, end - match_end));
+ } else {
+ // No, there's either no next match or the next match is too far away.
+ end = match_end;
+ MoveByNGraphemes(bi.get(), kSnippetContext, &end);
+ // Switch to DCHECK after debugging http://crbug.com/15261.
+ CHECK(end >= match_end);
+ CHECK(end <= document.size());
+ snippet += UTF8ToUTF16(document.substr(match_end, end - match_end));
+ if (end < document.size())
+ snippet += kEllipsis;
+ }
+ start = end;
+
+ // Stop here if we have enough snippet computed.
+ if (snippet.size() >= kSnippetMaxLength)
+ break;
+ }
+
+ utext_close(document_utext);
+ swap(text_, snippet);
+}
diff --git a/chrome/browser/history/snippet.h b/chrome/browser/history/snippet.h
new file mode 100644
index 0000000..9e92893
--- /dev/null
+++ b/chrome/browser/history/snippet.h
@@ -0,0 +1,69 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This module computes snippets of queries based on hits in the documents
+// for display in history search results.
+
+#ifndef CHROME_BROWSER_HISTORY_SNIPPET_H__
+#define CHROME_BROWSER_HISTORY_SNIPPET_H__
+
+#include <vector>
+
+#include "base/string16.h"
+
+class Snippet {
+ public:
+ // Each MatchPosition is the [begin, end) positions of a match within a
+ // string.
+ typedef std::pair<size_t, size_t> MatchPosition;
+ typedef std::vector<MatchPosition> MatchPositions;
+
+ // Parses an offsets string as returned from a sqlite full text index. An
+ // offsets string encodes information about why a row matched a text query.
+ // The information is encoded in the string as a set of matches, where each
+ // match consists of the column, term-number, location, and length of the
+ // match. Each element of the match is separated by a space, as is each match
+ // from other matches.
+ //
+ // This method adds the start and end of each match whose column is
+ // column_num to match_positions. The pairs are ordered based on first,
+ // with no overlapping elements.
+ //
+ // NOTE: the positions returned are in terms of UTF8 encoding. To convert the
+ // offsets to wide, use ConvertMatchPositionsToWide.
+ static void ExtractMatchPositions(const std::string& offsets_str,
+ const std::string& column_num,
+ MatchPositions* match_positions);
+
+ // Converts match positions as returned from ExtractMatchPositions to be in
+ // terms of a wide string.
+ static void ConvertMatchPositionsToWide(
+ const std::string& utf8_string,
+ Snippet::MatchPositions* match_positions);
+
+ // Given |matches|, the match positions within |document|, compute the snippet
+ // for the document.
+ // Note that |document| is UTF-8 and the offsets in |matches| are byte
+ // offsets.
+ void ComputeSnippet(const MatchPositions& matches,
+ const std::string& document);
+
+ const string16& text() const { return text_; }
+ const MatchPositions& matches() const { return matches_; }
+
+ // Efficiently swaps the contents of this snippet with the other.
+ void Swap(Snippet* other) {
+ text_.swap(other->text_);
+ matches_.swap(other->matches_);
+ }
+
+ private:
+ // The text of the snippet.
+ string16 text_;
+
+ // The matches within text_.
+ MatchPositions matches_;
+};
+
+#endif // CHROME_BROWSER_HISTORY_SNIPPET_H__
diff --git a/chrome/browser/history/snippet_unittest.cc b/chrome/browser/history/snippet_unittest.cc
new file mode 100644
index 0000000..5bc8a3b
--- /dev/null
+++ b/chrome/browser/history/snippet_unittest.cc
@@ -0,0 +1,254 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/snippet.h"
+
+#include <algorithm>
+
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+
+// A sample document to compute snippets of.
+// The \x bits after the first "Google" are UTF-8 of U+2122 TRADE MARK SIGN,
+// and are useful for verifying we don't screw up in UTF-8/UTF-16 conversion.
+const char* kSampleDocument = "Google\xe2\x84\xa2 Terms of Service "
+"Welcome to Google! "
+"1. Your relationship with Google "
+"1.1 Your use of Google's products, software, services and web sites "
+"(referred to collectively as the \"Services\" in this document and excluding "
+"any services provided to you by Google under a separate written agreement) "
+"is subject to the terms of a legal agreement between you and Google. "
+"\"Google\" means Google Inc., whose principal place of business is at 1600 "
+"Amphitheatre Parkway, Mountain View, CA 94043, United States. This document "
+"explains how the agreement is made up, and sets out some of the terms of "
+"that agreement.";
+};
+
+// Thai sample taken from http://www.google.co.th/intl/th/privacy.html
+// TODO(jungshik) : Add more samples (e.g. Hindi) after porting
+// ICU 4.0's character iterator changes to our copy of ICU 3.8 to get
+// grapheme clusters in Indic scripts handled more reasonably.
+const char* kThaiSample = "Google \xE0\xB9\x80\xE0\xB8\x81\xE0\xB9\x87"
+"\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7"
+"\xE0\xB8\xA1 \xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9"
+"\xE0\xB8\xA5\xE0\xB8\xAA\xE0\xB9\x88\xE0\xB8\xA7\xE0\xB8\x99\xE0\xB8\x9A"
+"\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0\xB8\xA5 \xE0\xB9\x80\xE0\xB8\xA1"
+"\xE0\xB8\xB7\xE0\xB9\x88\xE0\xB8\xAD\xE0\xB8\x84\xE0\xB8\xB8\xE0\xB8\x93"
+"\xE0\xB8\xA5\xE0\xB8\x87\xE0\xB8\x97\xE0\xB8\xB0\xE0\xB9\x80\xE0\xB8\x9A"
+"\xE0\xB8\xB5\xE0\xB8\xA2\xE0\xB8\x99\xE0\xB9\x80\xE0\xB8\x9E\xE0\xB8\xB7"
+"\xE0\xB9\x88\xE0\xB8\xAD\xE0\xB9\x83\xE0\xB8\x8A\xE0\xB9\x89\xE0\xB8\x9A"
+"\xE0\xB8\xA3\xE0\xB8\xB4\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3\xE0\xB8\x82"
+"\xE0\xB8\xAD\xE0\xB8\x87 Google \xE0\xB8\xAB\xE0\xB8\xA3\xE0\xB8\xB7"
+"\xE0\xB8\xAD\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89\xE0\xB8\x82\xE0\xB9\x89"
+"\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5\xE0\xB8\x94\xE0\xB8\xB1"
+"\xE0\xB8\x87\xE0\xB8\x81\xE0\xB8\xA5\xE0\xB9\x88\xE0\xB8\xB2\xE0\xB8\xA7"
+"\xE0\xB9\x82\xE0\xB8\x94\xE0\xB8\xA2\xE0\xB8\xAA\xE0\xB8\xA1\xE0\xB8\xB1"
+"\xE0\xB8\x84\xE0\xB8\xA3\xE0\xB9\x83\xE0\xB8\x88 \xE0\xB9\x80\xE0\xB8\xA3"
+"\xE0\xB8\xB2\xE0\xB8\xAD\xE0\xB8\xB2\xE0\xB8\x88\xE0\xB8\xA3\xE0\xB8\xA7"
+"\xE0\xB8\xA1\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9"
+"\xE0\xB8\xA5\xE0\xB8\xAA\xE0\xB9\x88\xE0\xB8\xA7\xE0\xB8\x99\xE0\xB8\x9A"
+"\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0\xB8\xA5\xE0\xB8\x97\xE0\xB8\xB5"
+"\xE0\xB9\x88\xE0\xB9\x80\xE0\xB8\x81\xE0\xB9\x87\xE0\xB8\x9A\xE0\xB8\xA3"
+"\xE0\xB8\xA7\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\xA1\xE0\xB8\x88"
+"\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\x84\xE0\xB8\xB8\xE0\xB8\x93\xE0\xB9\x80"
+"\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\xB1\xE0\xB8\x9A"
+"\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5"
+"\xE0\xB8\x88\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xB4"
+"\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3\xE0\xB8\xAD\xE0\xB8\xB7\xE0\xB9\x88"
+"\xE0\xB8\x99\xE0\xB8\x82\xE0\xB8\xAD\xE0\xB8\x87 Google \xE0\xB8\xAB"
+"\xE0\xB8\xA3\xE0\xB8\xB7\xE0\xB8\xAD\xE0\xB8\x9A\xE0\xB8\xB8\xE0\xB8\x84"
+"\xE0\xB8\x84\xE0\xB8\xA5\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB8\xAA"
+"\xE0\xB8\xB2\xE0\xB8\xA1 \xE0\xB9\x80\xE0\xB8\x9E\xE0\xB8\xB7\xE0\xB9\x88"
+"\xE0\xB8\xAD\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89\xE0\xB8\x9C\xE0\xB8\xB9"
+"\xE0\xB9\x89\xE0\xB9\x83\xE0\xB8\x8A\xE0\xB9\x89\xE0\xB9\x84\xE0\xB8\x94"
+"\xE0\xB9\x89\xE0\xB8\xA3\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB8\x9B\xE0\xB8\xA3"
+"\xE0\xB8\xB0\xE0\xB8\xAA\xE0\xB8\x9A\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3"
+"\xE0\xB8\x93\xE0\xB9\x8C\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB8\x94"
+"\xE0\xB8\xB5\xE0\xB8\x82\xE0\xB8\xB6\xE0\xB9\x89\xE0\xB8\x99 \xE0\xB8\xA3"
+"\xE0\xB8\xA7\xE0\xB8\xA1\xE0\xB8\x97\xE0\xB8\xB1\xE0\xB9\x89\xE0\xB8\x87"
+"\xE0\xB8\x9B\xE0\xB8\xA3\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB9\x81\xE0\xB8\x95"
+"\xE0\xB9\x88\xE0\xB8\x87\xE0\xB9\x80\xE0\xB8\x99\xE0\xB8\xB7\xE0\xB9\x89"
+"\xE0\xB8\xAD\xE0\xB8\xAB\xE0\xB8\xB2\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89"
+"\xE0\xB9\x80\xE0\xB8\xAB\xE0\xB8\xA1\xE0\xB8\xB2\xE0\xB8\xB0\xE0\xB8\xAA"
+"\xE0\xB8\xB3\xE0\xB8\xAB\xE0\xB8\xA3\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB8\x84"
+"\xE0\xB8\xB8\xE0\xB8\x93";
+
+// Comparator for sorting by the first element in a pair.
+bool ComparePair1st(const Snippet::MatchPosition& a,
+ const Snippet::MatchPosition& b) {
+ return a.first < b.first;
+}
+
+// For testing, we'll compute the match positions manually instead of using
+// sqlite's FTS matching. BuildSnippet returns the snippet for matching
+// |query| against |document|. Matches are surrounded by "**".
+string16 BuildSnippet(const std::string& document,
+ const std::string& query) {
+ // This function assumes that |document| does not contain
+ // any character for which lowercasing changes its length. Further,
+ // it's assumed that lowercasing only the ASCII-portion works for
+ // |document|. We need to add more test cases and change this function
+ // to be more generic depending on how we deal with 'folding for match'
+ // in history.
+ const std::string document_folded = StringToLowerASCII(std::string(document));
+
+ std::vector<std::string> query_words;
+ SplitString(query, ' ', &query_words);
+
+ // Manually construct match_positions of the document.
+ Snippet::MatchPositions match_positions;
+ match_positions.clear();
+ for (std::vector<std::string>::iterator qw = query_words.begin();
+ qw != query_words.end(); ++qw) {
+ // Insert all instances of this word into match_pairs.
+ size_t ofs = 0;
+ while ((ofs = document_folded.find(*qw, ofs)) != std::string::npos) {
+ match_positions.push_back(std::make_pair(ofs, ofs + qw->size()));
+ ofs += qw->size();
+ }
+ }
+ // Sort match_positions in order of increasing offset.
+ std::sort(match_positions.begin(), match_positions.end(), ComparePair1st);
+
+ // Compute the snippet.
+ Snippet snippet;
+ snippet.ComputeSnippet(match_positions, document);
+
+ // Now "highlight" all matches in the snippet with **.
+ string16 star_snippet;
+ Snippet::MatchPositions::const_iterator match;
+ size_t pos = 0;
+ for (match = snippet.matches().begin();
+ match != snippet.matches().end(); ++match) {
+ star_snippet += snippet.text().substr(pos, match->first - pos);
+ star_snippet += UTF8ToUTF16("**");
+ star_snippet += snippet.text().substr(match->first,
+ match->second - match->first);
+ star_snippet += UTF8ToUTF16("**");
+ pos = match->second;
+ }
+ star_snippet += snippet.text().substr(pos);
+
+ return star_snippet;
+}
+
+TEST(Snippets, SimpleQuery) {
+ ASSERT_EQ(" ... eferred to collectively as the \"Services\" in this "
+ "**document** and excluding any services provided to you by "
+ "Goo ... ... way, Mountain View, CA 94043, United States. This "
+ "**document** explains how the agreement is made up, and sets "
+ "o ... ",
+ UTF16ToUTF8(BuildSnippet(kSampleDocument, "document")));
+}
+
+// Test that two words that are near each other don't produce two elided bits.
+TEST(Snippets, NearbyWords) {
+ ASSERT_EQ(" ... lace of business is at 1600 Amphitheatre Parkway, "
+ "**Mountain** **View**, CA 94043, United States. This "
+ "document explains ... ",
+ UTF16ToUTF8(BuildSnippet(kSampleDocument, "mountain view")));
+}
+
+// The above tests already test that we get byte offsets correct, but here's
+// one that gets the "TM" in its snippet.
+TEST(Snippets, UTF8) {
+ ASSERT_EQ(" ... ogle\xe2\x84\xa2 Terms of Service Welcome to Google! "
+ "1. Your **relationship** with Google 1.1 Your use of Google's "
+ "products, so ... ",
+ UTF16ToUTF8(BuildSnippet(kSampleDocument, "relationship")));
+}
+
+// Bug: 1274923
+// TODO(jungshik): Move this bug report to crbugs.com
+// Fails consistently. From the report, "Broken by latest ICU. Need new expected
+// results."
+TEST(Snippets, FAILS_ThaiUTF8) {
+ // There are 3 instances of '\u0E43\u0E2B\u0E49'
+ // (\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89) in kThaiSample.
+ // The 1st is more than |kSniipetContext| graphemes away from the
+ // 2nd while the 2nd and 3rd are within that window. However, with
+ // the 2nd match added, the snippet goes over the size limit so that
+ // the snippet ends right before the 3rd match.
+ ASSERT_EQ(" ... "
+ " \xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9"
+ "\xE0\xB8\xA5\xE0\xB8\xAA\xE0\xB9\x88\xE0\xB8\xA7\xE0\xB8\x99"
+ "\xE0\xB8\x9A\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0\xB8\xA5 "
+ "\xE0\xB9\x80\xE0\xB8\xA1\xE0\xB8\xB7\xE0\xB9\x88\xE0\xB8\xAD"
+ "\xE0\xB8\x84\xE0\xB8\xB8\xE0\xB8\x93\xE0\xB8\xA5\xE0\xB8\x87"
+ "\xE0\xB8\x97\xE0\xB8\xB0\xE0\xB9\x80\xE0\xB8\x9A\xE0\xB8\xB5"
+ "\xE0\xB8\xA2\xE0\xB8\x99\xE0\xB9\x80\xE0\xB8\x9E\xE0\xB8\xB7"
+ "\xE0\xB9\x88\xE0\xB8\xAD\xE0\xB9\x83\xE0\xB8\x8A\xE0\xB9\x89"
+ "\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xB4\xE0\xB8\x81\xE0\xB8\xB2"
+ "\xE0\xB8\xA3\xE0\xB8\x82\xE0\xB8\xAD\xE0\xB8\x87 Google "
+ "\xE0\xB8\xAB\xE0\xB8\xA3\xE0\xB8\xB7\xE0\xB8\xAD**\xE0\xB9\x83"
+ "\xE0\xB8\xAB\xE0\xB9\x89**\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD"
+ "\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5\xE0\xB8\x94\xE0\xB8\xB1"
+ "\xE0\xB8\x87\xE0\xB8\x81\xE0\xB8\xA5\xE0\xB9\x88\xE0\xB8\xB2"
+ "\xE0\xB8\xA7\xE0\xB9\x82\xE0\xB8\x94\xE0\xB8\xA2\xE0\xB8\xAA"
+ "\xE0\xB8\xA1\xE0\xB8\xB1\xE0\xB8\x84\xE0\xB8\xA3\xE0\xB9\x83"
+ "\xE0\xB8\x88 \xE0\xB9\x80\xE0\xB8\xA3\xE0\xB8\xB2\xE0\xB8\xAD"
+ "\xE0\xB8\xB2\xE0\xB8\x88\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\xA1"
+ "\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9"
+ "\xE0\xB8\xA5\xE0\xB8\xAA\xE0\xB9\x88\xE0\xB8\xA7\xE0\xB8\x99"
+ "\xE0\xB8\x9A\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0\xB8\xA5"
+ "\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB9\x80\xE0\xB8\x81"
+ "\xE0\xB9\x87\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\x9A"
+ "\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\xA1 ... ... "
+ "\xE0\xB8\x88\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\x84\xE0\xB8\xB8"
+ "\xE0\xB8\x93\xE0\xB9\x80\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xB2"
+ "\xE0\xB8\x81\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB8\x82\xE0\xB9\x89"
+ "\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5\xE0\xB8\x88"
+ "\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xB4"
+ "\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3\xE0\xB8\xAD\xE0\xB8\xB7"
+ "\xE0\xB9\x88\xE0\xB8\x99\xE0\xB8\x82\xE0\xB8\xAD\xE0\xB8\x87 "
+ "Google \xE0\xB8\xAB\xE0\xB8\xA3\xE0\xB8\xB7\xE0\xB8\xAD"
+ "\xE0\xB8\x9A\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0\xB8\xA5"
+ "\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB8\xAA\xE0\xB8\xB2"
+ "\xE0\xB8\xA1 \xE0\xB9\x80\xE0\xB8\x9E\xE0\xB8\xB7\xE0\xB9\x88"
+ "\xE0\xB8\xAD**\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89**\xE0\xB8\x9C"
+ "\xE0\xB8\xB9\xE0\xB9\x89\xE0\xB9\x83\xE0\xB8\x8A\xE0\xB9\x89"
+ "\xE0\xB9\x84\xE0\xB8\x94\xE0\xB9\x89\xE0\xB8\xA3\xE0\xB8\xB1"
+ "\xE0\xB8\x9A\xE0\xB8\x9B\xE0\xB8\xA3\xE0\xB8\xB0\xE0\xB8\xAA"
+ "\xE0\xB8\x9A\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3\xE0\xB8\x93"
+ "\xE0\xB9\x8C\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB8\x94"
+ "\xE0\xB8\xB5\xE0\xB8\x82\xE0\xB8\xB6\xE0\xB9\x89\xE0\xB8\x99 "
+ "\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\xA1\xE0\xB8\x97\xE0\xB8\xB1"
+ "\xE0\xB9\x89\xE0\xB8\x87\xE0\xB8\x9B\xE0\xB8\xA3\xE0\xB8\xB1"
+ "\xE0\xB8\x9A\xE0\xB9\x81\xE0\xB8\x95\xE0\xB9\x88\xE0\xB8\x87"
+ "\xE0\xB9\x80\xE0\xB8\x99\xE0\xB8\xB7\xE0\xB9\x89\xE0\xB8\xAD"
+ "\xE0\xB8\xAB\xE0\xB8\xB2",
+ UTF16ToUTF8(BuildSnippet(kThaiSample,
+ "\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89")));
+}
+
+TEST(Snippets, ExtractMatchPositions) {
+ struct TestData {
+ const std::string offsets_string;
+ const size_t expected_match_count;
+ const size_t expected_matches[10];
+ } data[] = {
+ { "0 0 1 2 0 0 4 1 0 0 1 5", 1, { 1, 6 } },
+ { "0 0 1 4 0 0 2 1", 1, { 1, 5 } },
+ { "0 0 4 1 0 0 2 1", 2, { 2, 3, 4, 5 } },
+ { "0 0 0 1", 1, { 0, 1 } },
+ { "0 0 0 1 0 0 0 2", 1, { 0, 2 } },
+ { "0 0 1 1 0 0 1 2", 1, { 1, 3 } },
+ { "0 0 1 2 0 0 4 3 0 0 3 1", 1, { 1, 7 } },
+ { "0 0 1 4 0 0 2 5", 1, { 1, 7 } },
+ { "0 0 1 2 0 0 1 1", 1, { 1, 3 } },
+ { "0 0 1 1 0 0 5 2 0 0 10 1 0 0 3 10", 2, { 1, 2, 3, 13 } },
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
+ Snippet::MatchPositions matches;
+ Snippet::ExtractMatchPositions(data[i].offsets_string, "0", &matches);
+ EXPECT_EQ(data[i].expected_match_count, matches.size());
+ for (size_t j = 0; j < data[i].expected_match_count; ++j) {
+ EXPECT_EQ(data[i].expected_matches[2 * j], matches[j].first);
+ EXPECT_EQ(data[i].expected_matches[2 * j + 1], matches[j].second);
+ }
+ }
+}
diff --git a/chrome/browser/history/starred_url_database.cc b/chrome/browser/history/starred_url_database.cc
new file mode 100644
index 0000000..cf2a306
--- /dev/null
+++ b/chrome/browser/history/starred_url_database.cc
@@ -0,0 +1,628 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/starred_url_database.h"
+
+#include "app/sql/connection.h"
+#include "app/sql/statement.h"
+#include "base/file_util.h"
+#include "base/logging.h"
+#include "base/json/json_writer.h"
+#include "base/scoped_vector.h"
+#include "base/stl_util-inl.h"
+#include "base/string_util.h"
+#include "base/values.h"
+#include "chrome/browser/bookmarks/bookmark_codec.h"
+#include "chrome/browser/bookmarks/bookmark_model.h"
+#include "chrome/browser/history/history.h"
+#include "chrome/browser/history/query_parser.h"
+
+// The following table is used to store star (aka bookmark) information. This
+// class derives from URLDatabase, which has its own schema.
+//
+// starred
+// id Unique identifier (primary key) for the entry.
+// type Type of entry, if 0 this corresponds to a URL, 1 for
+// a system grouping, 2 for a user created group, 3 for
+// other.
+// url_id ID of the url, only valid if type == 0
+// group_id ID of the group, only valid if type != 0. This id comes
+// from the UI and is NOT the same as id.
+// title User assigned title.
+// date_added Creation date.
+// visual_order Visual order within parent.
+// parent_id Group ID of the parent this entry is contained in, if 0
+// entry is not in a group.
+// date_modified Time the group was last modified. See comments in
+// StarredEntry::date_group_modified
+// NOTE: group_id and parent_id come from the UI, id is assigned by the
+// db.
+
+namespace history {
+
+namespace {
+
+// Fields used by FillInStarredEntry.
+#define STAR_FIELDS \
+ " starred.id, starred.type, starred.title, starred.date_added, " \
+ "starred.visual_order, starred.parent_id, urls.url, urls.id, " \
+ "starred.group_id, starred.date_modified "
+const char kHistoryStarFields[] = STAR_FIELDS;
+
+void FillInStarredEntry(const sql::Statement& s, StarredEntry* entry) {
+ DCHECK(entry);
+ entry->id = s.ColumnInt64(0);
+ switch (s.ColumnInt(1)) {
+ case 0:
+ entry->type = history::StarredEntry::URL;
+ entry->url = GURL(s.ColumnString(6));
+ break;
+ case 1:
+ entry->type = history::StarredEntry::BOOKMARK_BAR;
+ break;
+ case 2:
+ entry->type = history::StarredEntry::USER_GROUP;
+ break;
+ case 3:
+ entry->type = history::StarredEntry::OTHER;
+ break;
+ default:
+ NOTREACHED();
+ break;
+ }
+ entry->title = s.ColumnString16(2);
+ entry->date_added = base::Time::FromInternalValue(s.ColumnInt64(3));
+ entry->visual_order = s.ColumnInt(4);
+ entry->parent_group_id = s.ColumnInt64(5);
+ entry->url_id = s.ColumnInt64(7);
+ entry->group_id = s.ColumnInt64(8);
+ entry->date_group_modified = base::Time::FromInternalValue(s.ColumnInt64(9));
+}
+
+} // namespace
+
+StarredURLDatabase::StarredURLDatabase() {
+}
+
+StarredURLDatabase::~StarredURLDatabase() {
+}
+
+bool StarredURLDatabase::MigrateBookmarksToFile(const FilePath& path) {
+ if (!GetDB().DoesTableExist("starred"))
+ return true;
+
+ if (EnsureStarredIntegrity() && !MigrateBookmarksToFileImpl(path)) {
+ NOTREACHED() << " Bookmarks migration failed";
+ return false;
+ }
+
+ if (!GetDB().Execute("DROP TABLE starred")) {
+ NOTREACHED() << "Unable to drop starred table";
+ return false;
+ }
+ return true;
+}
+
+bool StarredURLDatabase::GetAllStarredEntries(
+ std::vector<StarredEntry>* entries) {
+ DCHECK(entries);
+ std::string sql = "SELECT ";
+ sql.append(kHistoryStarFields);
+ sql.append("FROM starred LEFT JOIN urls ON starred.url_id = urls.id ");
+ sql += "ORDER BY parent_id, visual_order";
+
+ sql::Statement s(GetDB().GetUniqueStatement(sql.c_str()));
+ if (!s) {
+ NOTREACHED() << "Statement prepare failed";
+ return false;
+ }
+
+ history::StarredEntry entry;
+ while (s.Step()) {
+ FillInStarredEntry(s, &entry);
+ // Reset the url for non-url types. This is needed as we're reusing the
+ // same entry for the loop.
+ if (entry.type != history::StarredEntry::URL)
+ entry.url = GURL();
+ entries->push_back(entry);
+ }
+ return true;
+}
+
+bool StarredURLDatabase::EnsureStarredIntegrity() {
+ std::set<StarredNode*> roots;
+ std::set<StarID> groups_with_duplicate_ids;
+ std::set<StarredNode*> unparented_urls;
+ std::set<StarID> empty_url_ids;
+
+ if (!BuildStarNodes(&roots, &groups_with_duplicate_ids, &unparented_urls,
+ &empty_url_ids)) {
+ return false;
+ }
+
+ bool valid = EnsureStarredIntegrityImpl(&roots, groups_with_duplicate_ids,
+ &unparented_urls, empty_url_ids);
+
+ STLDeleteElements(&roots);
+ STLDeleteElements(&unparented_urls);
+ return valid;
+}
+
+bool StarredURLDatabase::UpdateStarredEntryRow(StarID star_id,
+ const string16& title,
+ UIStarID parent_group_id,
+ int visual_order,
+ base::Time date_modified) {
+ DCHECK(star_id && visual_order >= 0);
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE starred SET title=?, parent_id=?, visual_order=?, "
+ "date_modified=? WHERE id=?"));
+ if (!statement)
+ return 0;
+
+ statement.BindString16(0, title);
+ statement.BindInt64(1, parent_group_id);
+ statement.BindInt(2, visual_order);
+ statement.BindInt64(3, date_modified.ToInternalValue());
+ statement.BindInt64(4, star_id);
+ return statement.Run();
+}
+
+bool StarredURLDatabase::AdjustStarredVisualOrder(UIStarID parent_group_id,
+ int start_visual_order,
+ int delta) {
+ DCHECK(parent_group_id && start_visual_order >= 0);
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE starred SET visual_order=visual_order+? "
+ "WHERE parent_id=? AND visual_order >= ?"));
+ if (!statement)
+ return false;
+
+ statement.BindInt(0, delta);
+ statement.BindInt64(1, parent_group_id);
+ statement.BindInt(2, start_visual_order);
+ return statement.Run();
+}
+
+StarID StarredURLDatabase::CreateStarredEntryRow(URLID url_id,
+ UIStarID group_id,
+ UIStarID parent_group_id,
+ const string16& title,
+ const base::Time& date_added,
+ int visual_order,
+ StarredEntry::Type type) {
+ DCHECK(visual_order >= 0 &&
+ (type != history::StarredEntry::URL || url_id));
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "INSERT INTO starred "
+ "(type, url_id, group_id, title, date_added, visual_order, parent_id, "
+ "date_modified) VALUES (?,?,?,?,?,?,?,?)"));
+ if (!statement)
+ return 0;
+
+ switch (type) {
+ case history::StarredEntry::URL:
+ statement.BindInt(0, 0);
+ break;
+ case history::StarredEntry::BOOKMARK_BAR:
+ statement.BindInt(0, 1);
+ break;
+ case history::StarredEntry::USER_GROUP:
+ statement.BindInt(0, 2);
+ break;
+ case history::StarredEntry::OTHER:
+ statement.BindInt(0, 3);
+ break;
+ default:
+ NOTREACHED();
+ }
+ statement.BindInt64(1, url_id);
+ statement.BindInt64(2, group_id);
+ statement.BindString16(3, title);
+ statement.BindInt64(4, date_added.ToInternalValue());
+ statement.BindInt(5, visual_order);
+ statement.BindInt64(6, parent_group_id);
+ statement.BindInt64(7, base::Time().ToInternalValue());
+ if (statement.Run())
+ return GetDB().GetLastInsertRowId();
+ return 0;
+}
+
+bool StarredURLDatabase::DeleteStarredEntryRow(StarID star_id) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM starred WHERE id=?"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, star_id);
+ return statement.Run();
+}
+
+bool StarredURLDatabase::GetStarredEntry(StarID star_id, StarredEntry* entry) {
+ DCHECK(entry && star_id);
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT" STAR_FIELDS "FROM starred LEFT JOIN urls ON "
+ "starred.url_id = urls.id WHERE starred.id=?"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, star_id);
+
+ if (statement.Step()) {
+ FillInStarredEntry(statement, entry);
+ return true;
+ }
+ return false;
+}
+
+StarID StarredURLDatabase::CreateStarredEntry(StarredEntry* entry) {
+ entry->id = 0; // Ensure 0 for failure case.
+
+ // Adjust the visual order when we are inserting it somewhere.
+ if (entry->parent_group_id)
+ AdjustStarredVisualOrder(entry->parent_group_id, entry->visual_order, 1);
+
+ // Insert the new entry.
+ switch (entry->type) {
+ case StarredEntry::USER_GROUP:
+ entry->id = CreateStarredEntryRow(0, entry->group_id,
+ entry->parent_group_id, entry->title, entry->date_added,
+ entry->visual_order, entry->type);
+ break;
+
+ case StarredEntry::URL: {
+ // Get the row for this URL.
+ URLRow url_row;
+ if (!GetRowForURL(entry->url, &url_row)) {
+ // Create a new URL row for this entry.
+ url_row = URLRow(entry->url);
+ url_row.set_title(entry->title);
+ url_row.set_hidden(false);
+ entry->url_id = this->AddURL(url_row);
+ } else {
+ entry->url_id = url_row.id(); // The caller doesn't have to set this.
+ }
+
+ // Create the star entry referring to the URL row.
+ entry->id = CreateStarredEntryRow(entry->url_id, entry->group_id,
+ entry->parent_group_id, entry->title, entry->date_added,
+ entry->visual_order, entry->type);
+
+ // Update the URL row to refer to this new starred entry.
+ UpdateURLRow(entry->url_id, url_row);
+ break;
+ }
+
+ default:
+ NOTREACHED();
+ break;
+ }
+ return entry->id;
+}
+
+UIStarID StarredURLDatabase::GetMaxGroupID() {
+ sql::Statement max_group_id_statement(GetDB().GetUniqueStatement(
+ "SELECT MAX(group_id) FROM starred"));
+ if (!max_group_id_statement) {
+ NOTREACHED() << GetDB().GetErrorMessage();
+ return 0;
+ }
+ if (!max_group_id_statement.Step()) {
+ NOTREACHED() << GetDB().GetErrorMessage();
+ return 0;
+ }
+ return max_group_id_statement.ColumnInt64(0);
+}
+
+bool StarredURLDatabase::BuildStarNodes(
+ std::set<StarredURLDatabase::StarredNode*>* roots,
+ std::set<StarID>* groups_with_duplicate_ids,
+ std::set<StarredNode*>* unparented_urls,
+ std::set<StarID>* empty_url_ids) {
+ std::vector<StarredEntry> star_entries;
+ if (!GetAllStarredEntries(&star_entries)) {
+ NOTREACHED() << "Unable to get bookmarks from database";
+ return false;
+ }
+
+ // Create the group/bookmark-bar/other nodes.
+ std::map<UIStarID, StarredNode*> group_id_to_node_map;
+ for (size_t i = 0; i < star_entries.size(); ++i) {
+ if (star_entries[i].type != StarredEntry::URL) {
+ if (group_id_to_node_map.find(star_entries[i].group_id) !=
+ group_id_to_node_map.end()) {
+ // There's already a group with this ID.
+ groups_with_duplicate_ids->insert(star_entries[i].id);
+ } else {
+ // Create the node and update the mapping.
+ StarredNode* node = new StarredNode(star_entries[i]);
+ group_id_to_node_map[star_entries[i].group_id] = node;
+ }
+ }
+ }
+
+ // Iterate again, creating nodes for URL bookmarks and parenting all
+ // bookmarks/folders. In addition populate the empty_url_ids with all entries
+ // of type URL that have an empty URL.
+ std::map<StarID, StarredNode*> id_to_node_map;
+ for (size_t i = 0; i < star_entries.size(); ++i) {
+ if (star_entries[i].type == StarredEntry::URL) {
+ if (star_entries[i].url.is_empty()) {
+ empty_url_ids->insert(star_entries[i].id);
+ } else if (!star_entries[i].parent_group_id ||
+ group_id_to_node_map.find(star_entries[i].parent_group_id) ==
+ group_id_to_node_map.end()) {
+ // This entry has no parent, or we couldn't find the parent.
+ StarredNode* node = new StarredNode(star_entries[i]);
+ unparented_urls->insert(node);
+ } else {
+ // Add the node to its parent.
+ StarredNode* parent =
+ group_id_to_node_map[star_entries[i].parent_group_id];
+ StarredNode* node = new StarredNode(star_entries[i]);
+ parent->Add(parent->GetChildCount(), node);
+ }
+ } else if (groups_with_duplicate_ids->find(star_entries[i].id) ==
+ groups_with_duplicate_ids->end()) {
+ // The entry is a group (or bookmark bar/other node) that isn't
+ // marked as a duplicate.
+ if (!star_entries[i].parent_group_id ||
+ group_id_to_node_map.find(star_entries[i].parent_group_id) ==
+ group_id_to_node_map.end()) {
+ // Entry has no parent, or the parent wasn't found.
+ roots->insert(group_id_to_node_map[star_entries[i].group_id]);
+ } else {
+ // Parent the group node.
+ StarredNode* parent =
+ group_id_to_node_map[star_entries[i].parent_group_id];
+ StarredNode* node = group_id_to_node_map[star_entries[i].group_id];
+ if (!node->HasAncestor(parent) && !parent->HasAncestor(node)) {
+ parent->Add(parent->GetChildCount(), node);
+ } else {
+ // The node has a cycle. Add it to the list of roots so the cycle is
+ // broken.
+ roots->insert(node);
+ }
+ }
+ }
+ }
+ return true;
+}
+
+StarredURLDatabase::StarredNode* StarredURLDatabase::GetNodeByType(
+ const std::set<StarredURLDatabase::StarredNode*>& nodes,
+ StarredEntry::Type type) {
+ for (std::set<StarredNode*>::const_iterator i = nodes.begin();
+ i != nodes.end(); ++i) {
+ if ((*i)->value.type == type)
+ return *i;
+ }
+ return NULL;
+}
+
+bool StarredURLDatabase::EnsureVisualOrder(
+ StarredURLDatabase::StarredNode* node) {
+ for (int i = 0; i < node->GetChildCount(); ++i) {
+ if (node->GetChild(i)->value.visual_order != i) {
+ StarredEntry& entry = node->GetChild(i)->value;
+ entry.visual_order = i;
+ LOG(WARNING) << "Bookmark visual order is wrong";
+ if (!UpdateStarredEntryRow(entry.id, entry.title, entry.parent_group_id,
+ i, entry.date_group_modified)) {
+ NOTREACHED() << "Unable to update visual order";
+ return false;
+ }
+ }
+ if (!EnsureVisualOrder(node->GetChild(i)))
+ return false;
+ }
+ return true;
+}
+
+bool StarredURLDatabase::EnsureStarredIntegrityImpl(
+ std::set<StarredURLDatabase::StarredNode*>* roots,
+ const std::set<StarID>& groups_with_duplicate_ids,
+ std::set<StarredNode*>* unparented_urls,
+ const std::set<StarID>& empty_url_ids) {
+ // Make sure the bookmark bar entry exists.
+ StarredNode* bookmark_node =
+ GetNodeByType(*roots, StarredEntry::BOOKMARK_BAR);
+ if (!bookmark_node) {
+ LOG(WARNING) << "No bookmark bar folder in database";
+ // If there is no bookmark bar entry in the db things are really
+ // screwed. Return false, which won't trigger migration and we'll just
+ // drop the tables.
+ return false;
+ }
+
+ // Make sure the other node exists.
+ StarredNode* other_node = GetNodeByType(*roots, StarredEntry::OTHER);
+ if (!other_node) {
+ LOG(WARNING) << "No bookmark other folder in database";
+ StarredEntry entry;
+ entry.group_id = GetMaxGroupID() + 1;
+ if (entry.group_id == 1) {
+ NOTREACHED() << "Unable to get new id for other bookmarks folder";
+ return false;
+ }
+ entry.id = CreateStarredEntryRow(
+ 0, entry.group_id, 0, UTF8ToUTF16("other"), base::Time::Now(), 0,
+ history::StarredEntry::OTHER);
+ if (!entry.id) {
+ NOTREACHED() << "Unable to create other bookmarks folder";
+ return false;
+ }
+ entry.type = StarredEntry::OTHER;
+ StarredNode* other_node = new StarredNode(entry);
+ roots->insert(other_node);
+ }
+
+ // We could potentially make sure only one group with type
+ // BOOKMARK_BAR/OTHER, but history backend enforces this.
+
+ // Nuke any entries with no url.
+ for (std::set<StarID>::const_iterator i = empty_url_ids.begin();
+ i != empty_url_ids.end(); ++i) {
+ LOG(WARNING) << "Bookmark exists with no URL";
+ if (!DeleteStarredEntryRow(*i)) {
+ NOTREACHED() << "Unable to delete bookmark";
+ return false;
+ }
+ }
+
+ // Make sure the visual order of the nodes is correct.
+ for (std::set<StarredNode*>::const_iterator i = roots->begin();
+ i != roots->end(); ++i) {
+ if (!EnsureVisualOrder(*i))
+ return false;
+ }
+
+ // Move any unparented bookmarks to the bookmark bar.
+ {
+ std::set<StarredNode*>::iterator i = unparented_urls->begin();
+ while (i != unparented_urls->end()) {
+ LOG(WARNING) << "Bookmark not in a bookmark folder found";
+ if (!Move(*i, bookmark_node))
+ return false;
+ unparented_urls->erase(i++);
+ }
+ }
+
+ // Nuke any groups with duplicate ids. A duplicate id means there are two
+ // folders in the starred table with the same group_id. We only keep the
+ // first folder, all other groups are removed.
+ for (std::set<StarID>::const_iterator i = groups_with_duplicate_ids.begin();
+ i != groups_with_duplicate_ids.end(); ++i) {
+ LOG(WARNING) << "Duplicate group id in bookmark database";
+ if (!DeleteStarredEntryRow(*i)) {
+ NOTREACHED() << "Unable to delete folder";
+ return false;
+ }
+ }
+
+ // Move unparented user groups back to the bookmark bar.
+ {
+ std::set<StarredNode*>::iterator i = roots->begin();
+ while (i != roots->end()) {
+ if ((*i)->value.type == StarredEntry::USER_GROUP) {
+ LOG(WARNING) << "Bookmark folder not on bookmark bar found";
+ if (!Move(*i, bookmark_node))
+ return false;
+ roots->erase(i++);
+ } else {
+ ++i;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool StarredURLDatabase::Move(StarredNode* source, StarredNode* new_parent) {
+ history::StarredEntry& entry = source->value;
+ entry.visual_order = new_parent->GetChildCount();
+ entry.parent_group_id = new_parent->value.group_id;
+ if (!UpdateStarredEntryRow(entry.id, entry.title,
+ entry.parent_group_id, entry.visual_order,
+ entry.date_group_modified)) {
+ NOTREACHED() << "Unable to move folder";
+ return false;
+ }
+ new_parent->Add(new_parent->GetChildCount(), source);
+ return true;
+}
+
+bool StarredURLDatabase::MigrateBookmarksToFileImpl(const FilePath& path) {
+ std::vector<history::StarredEntry> entries;
+ if (!GetAllStarredEntries(&entries))
+ return false;
+
+ // Create the bookmark bar and other folder nodes.
+ history::StarredEntry entry;
+ entry.type = history::StarredEntry::BOOKMARK_BAR;
+ BookmarkNode bookmark_bar_node(0, GURL());
+ bookmark_bar_node.Reset(entry);
+ entry.type = history::StarredEntry::OTHER;
+ BookmarkNode other_node(0, GURL());
+ other_node.Reset(entry);
+
+ std::map<history::UIStarID, history::StarID> group_id_to_id_map;
+ typedef std::map<history::StarID, BookmarkNode*> IDToNodeMap;
+ IDToNodeMap id_to_node_map;
+
+ history::UIStarID other_folder_group_id = 0;
+ history::StarID other_folder_id = 0;
+
+ // Iterate through the entries building a mapping between group_id and id.
+ for (std::vector<history::StarredEntry>::const_iterator i = entries.begin();
+ i != entries.end(); ++i) {
+ if (i->type != history::StarredEntry::URL) {
+ group_id_to_id_map[i->group_id] = i->id;
+ if (i->type == history::StarredEntry::OTHER) {
+ other_folder_id = i->id;
+ other_folder_group_id = i->group_id;
+ }
+ }
+ }
+
+ // Register the bookmark bar and other folder nodes in the maps.
+ id_to_node_map[HistoryService::kBookmarkBarID] = &bookmark_bar_node;
+ group_id_to_id_map[HistoryService::kBookmarkBarID] =
+ HistoryService::kBookmarkBarID;
+ if (other_folder_group_id) {
+ id_to_node_map[other_folder_id] = &other_node;
+ group_id_to_id_map[other_folder_group_id] = other_folder_id;
+ }
+
+ // Iterate through the entries again creating the nodes.
+ for (std::vector<history::StarredEntry>::iterator i = entries.begin();
+ i != entries.end(); ++i) {
+ if (!i->parent_group_id) {
+ DCHECK(i->type == history::StarredEntry::BOOKMARK_BAR ||
+ i->type == history::StarredEntry::OTHER);
+ // Only entries with no parent should be the bookmark bar and other
+ // bookmarks folders.
+ continue;
+ }
+
+ BookmarkNode* node = id_to_node_map[i->id];
+ if (!node) {
+ // Creating a node results in creating the parent. As such, it is
+ // possible for the node representing a group to have been created before
+ // encountering the details.
+
+ // The created nodes are owned by the root node.
+ node = new BookmarkNode(0, i->url);
+ id_to_node_map[i->id] = node;
+ }
+ node->Reset(*i);
+
+ DCHECK(group_id_to_id_map.find(i->parent_group_id) !=
+ group_id_to_id_map.end());
+ history::StarID parent_id = group_id_to_id_map[i->parent_group_id];
+ BookmarkNode* parent = id_to_node_map[parent_id];
+ if (!parent) {
+ // Haven't encountered the parent yet, create it now.
+ parent = new BookmarkNode(0, GURL());
+ id_to_node_map[parent_id] = parent;
+ }
+
+ // Add the node to its parent. |entries| is ordered by parent then
+ // visual order so that we know we maintain visual order by always adding
+ // to the end.
+ parent->Add(parent->GetChildCount(), node);
+ }
+
+ // Save to file.
+ BookmarkCodec encoder;
+ scoped_ptr<Value> encoded_bookmarks(
+ encoder.Encode(&bookmark_bar_node, &other_node));
+ std::string content;
+ base::JSONWriter::Write(encoded_bookmarks.get(), true, &content);
+
+ return (file_util::WriteFile(path, content.c_str(),
+ static_cast<int>(content.length())) != -1);
+}
+
+} // namespace history
diff --git a/chrome/browser/history/starred_url_database.h b/chrome/browser/history/starred_url_database.h
new file mode 100644
index 0000000..8d327d8
--- /dev/null
+++ b/chrome/browser/history/starred_url_database.h
@@ -0,0 +1,185 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_STARRED_URL_DATABASE_H_
+#define CHROME_BROWSER_HISTORY_STARRED_URL_DATABASE_H_
+
+#include <set>
+
+#include "app/tree_node_model.h"
+#include "base/basictypes.h"
+#include "base/gtest_prod_util.h"
+#include "base/string16.h"
+#include "chrome/browser/history/history_types.h"
+#include "chrome/browser/history/url_database.h"
+
+class FilePath;
+
+namespace sql {
+class Connection;
+}
+
+namespace history {
+
+// Bookmarks were originally part of the url database, they have since been
+// moved to a separate file. This file exists purely for historical reasons and
+// contains just enough to allow migration.
+class StarredURLDatabase : public URLDatabase {
+ public:
+ // Must call InitStarTable() AND any additional init functions provided by
+ // URLDatabase before using this class' functions.
+ StarredURLDatabase();
+ virtual ~StarredURLDatabase();
+
+ protected:
+ // The unit tests poke our innards.
+ friend class HistoryTest;
+ friend class StarredURLDatabaseTest;
+ FRIEND_TEST_ALL_PREFIXES(HistoryTest, CreateStarGroup);
+
+ // Writes bookmarks to the specified file.
+ bool MigrateBookmarksToFile(const FilePath& path);
+
+ // Returns the database for the functions in this interface.
+ virtual sql::Connection& GetDB() = 0;
+
+ private:
+ // Makes sure the starred table is in a sane state. This does the following:
+ // . Makes sure there is a bookmark bar and other nodes. If no bookmark bar
+ // node is found, the table is dropped and recreated.
+ // . Removes any bookmarks with no URL. This can happen if a URL is removed
+ // from the urls table without updating the starred table correctly.
+ // . Makes sure the visual order of all nodes is correct.
+ // . Moves all bookmarks and folders that are not descendants of the bookmark
+ // bar or other folders to the bookmark bar.
+ // . Makes sure there isn't a cycle in the folders. A cycle means some folder
+ // has as its parent one of its children.
+ //
+ // This returns false if the starred table is in a bad state and couldn't
+ // be fixed, true otherwise.
+ //
+ // This should be invoked after migration.
+ bool EnsureStarredIntegrity();
+
+ // Gets all the starred entries.
+ bool GetAllStarredEntries(std::vector<StarredEntry>* entries);
+
+ // Sets the title, parent_id, parent_group_id, visual_order and date_modifed
+ // of the specified star entry.
+ //
+ // WARNING: Does not update the visual order.
+ bool UpdateStarredEntryRow(StarID star_id,
+ const string16& title,
+ UIStarID parent_group_id,
+ int visual_order,
+ base::Time date_modified);
+
+ // Adjusts the visual order of all children of parent_group_id with a
+ // visual_order >= start_visual_order by delta. For example,
+ // AdjustStarredVisualOrder(10, 0, 1) increments the visual order all children
+ // of group 10 with a visual order >= 0 by 1.
+ bool AdjustStarredVisualOrder(UIStarID parent_group_id,
+ int start_visual_order,
+ int delta);
+
+ // Creates a starred entry with the specified parameters in the database.
+ // Returns the newly created id, or 0 on failure.
+ //
+ // WARNING: Does not update the visual order.
+ StarID CreateStarredEntryRow(URLID url_id,
+ UIStarID group_id,
+ UIStarID parent_group_id,
+ const string16& title,
+ const base::Time& date_added,
+ int visual_order,
+ StarredEntry::Type type);
+
+ // Deletes the entry from the starred database base on the starred id (NOT
+ // the url id).
+ //
+ // WARNING: Does not update the visual order.
+ bool DeleteStarredEntryRow(StarID star_id);
+
+ // Gets the details for the specified star entry in entry.
+ bool GetStarredEntry(StarID star_id, StarredEntry* entry);
+
+ // Creates a starred entry with the requested information. The structure will
+ // be updated with the ID of the newly created entry. The URL table will be
+ // updated to point to the entry. The URL row will be created if it doesn't
+ // exist.
+ //
+ // We currently only support one entry per URL. This URL should not already be
+ // starred when calling this function or it will fail and will return 0.
+ StarID CreateStarredEntry(StarredEntry* entry);
+
+ // Used when checking integrity of starred table.
+ typedef TreeNodeWithValue<history::StarredEntry> StarredNode;
+
+ // Returns the max group id, or 0 if there is an error.
+ UIStarID GetMaxGroupID();
+
+ // Gets all the bookmarks and folders creating a StarredNode for each
+ // bookmark and folder. On success all the root nodes (bookmark bar node,
+ // other folder node, folders with no parent or folders with a parent that
+ // would make a cycle) are added to roots.
+ //
+ // If a group_id occurs more than once, all but the first ones id is added to
+ // groups_with_duplicate_ids.
+ //
+ // All bookmarks not on the bookmark bar/other folder are added to
+ // unparented_urls.
+ //
+ // It's up to the caller to delete the nodes returned in roots and
+ // unparented_urls.
+ //
+ // This is used during integrity enforcing/checking of the starred table.
+ bool BuildStarNodes(
+ std::set<StarredNode*>* roots,
+ std::set<StarID>* groups_with_duplicate_ids,
+ std::set<StarredNode*>* unparented_urls,
+ std::set<StarID>* empty_url_ids);
+
+ // Sets the visual order of all of node's children match the order in |node|.
+ // If the order differs, the database is updated. Returns false if the order
+ // differed and the db couldn't be updated.
+ bool EnsureVisualOrder(StarredNode* node);
+
+ // Returns the first node in nodes with the specified type, or null if there
+ // is not a node with the specified type.
+ StarredNode* GetNodeByType(
+ const std::set<StarredNode*>& nodes,
+ StarredEntry::Type type);
+
+ // Implementation for setting starred integrity. See description of
+ // EnsureStarredIntegrity for the details of what this does.
+ //
+ // All entries in roots that are not the bookmark bar and other node are
+ // moved to be children of the bookmark bar node. Similarly all nodes
+ // in unparented_urls are moved to be children of the bookmark bar.
+ //
+ // Returns true on success, false if the starred table is in a bad state and
+ // couldn't be repaired.
+ bool EnsureStarredIntegrityImpl(
+ std::set<StarredNode*>* roots,
+ const std::set<StarID>& groups_with_duplicate_ids,
+ std::set<StarredNode*>* unparented_urls,
+ const std::set<StarID>& empty_url_ids);
+
+ // Resets the visual order and parent_group_id of source's StarredEntry
+ // and adds it to the end of new_parent's children.
+ //
+ // This is used if the starred table is an unexpected state and an entry
+ // needs to be moved.
+ bool Move(StarredNode* source, StarredNode* new_parent);
+
+ // Does the work of migrating bookmarks to a temporary file that
+ // BookmarkStorage will read from.
+ bool MigrateBookmarksToFileImpl(const FilePath& path);
+
+ DISALLOW_COPY_AND_ASSIGN(StarredURLDatabase);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_STARRED_URL_DATABASE_H_
diff --git a/chrome/browser/history/starred_url_database_unittest.cc b/chrome/browser/history/starred_url_database_unittest.cc
new file mode 100644
index 0000000..f82e645
--- /dev/null
+++ b/chrome/browser/history/starred_url_database_unittest.cc
@@ -0,0 +1,284 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <vector>
+
+#include "base/file_util.h"
+#include "base/path_service.h"
+#include "base/string_util.h"
+#include "chrome/browser/history/history.h"
+#include "chrome/browser/history/starred_url_database.h"
+#include "chrome/common/chrome_paths.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace history {
+
+class StarredURLDatabaseTest : public testing::Test,
+ public StarredURLDatabase {
+ public:
+ StarredURLDatabaseTest() {
+ }
+
+ void AddPage(const GURL& url) {
+ URLRow row(url);
+ row.set_visit_count(1);
+ EXPECT_TRUE(AddURL(row));
+ }
+
+ void CompareEntryByID(const StarredEntry& entry) {
+ DCHECK(entry.id != 0);
+ StarredEntry db_value;
+ EXPECT_TRUE(GetStarredEntry(entry.id, &db_value));
+ EXPECT_EQ(entry.id, db_value.id);
+ EXPECT_TRUE(entry.title == db_value.title);
+ EXPECT_EQ(entry.date_added.ToTimeT(), db_value.date_added.ToTimeT());
+ EXPECT_EQ(entry.group_id, db_value.group_id);
+ EXPECT_EQ(entry.parent_group_id, db_value.parent_group_id);
+ EXPECT_EQ(entry.visual_order, db_value.visual_order);
+ EXPECT_EQ(entry.type, db_value.type);
+ EXPECT_EQ(entry.url_id, db_value.url_id);
+ if (entry.type == StarredEntry::URL)
+ EXPECT_TRUE(entry.url == db_value.url);
+ }
+
+ int GetStarredEntryCount() {
+ DCHECK(db_.is_open());
+ std::vector<StarredEntry> entries;
+ GetAllStarredEntries(&entries);
+ return static_cast<int>(entries.size());
+ }
+
+ StarID CreateStarredEntry(StarredEntry* entry) {
+ return StarredURLDatabase::CreateStarredEntry(entry);
+ }
+
+ bool GetStarredEntry(StarID star_id, StarredEntry* entry) {
+ return StarredURLDatabase::GetStarredEntry(star_id, entry);
+ }
+
+ bool EnsureStarredIntegrity() {
+ return StarredURLDatabase::EnsureStarredIntegrity();
+ }
+
+ private:
+ // Test setup.
+ void SetUp() {
+ PathService::Get(base::DIR_TEMP, &db_file_);
+ db_file_ = db_file_.AppendASCII("VisitTest.db");
+ file_util::Delete(db_file_, false);
+
+ // Copy db file over that contains starred table.
+ FilePath old_history_path;
+ PathService::Get(chrome::DIR_TEST_DATA, &old_history_path);
+ old_history_path = old_history_path.AppendASCII("bookmarks");
+ old_history_path = old_history_path.Append(
+ FILE_PATH_LITERAL("History_with_empty_starred"));
+ file_util::CopyFile(old_history_path, db_file_);
+
+ EXPECT_TRUE(db_.Open(db_file_));
+
+ // Initialize the tables for this test.
+ CreateURLTable(false);
+ CreateMainURLIndex();
+ EnsureStarredIntegrity();
+ }
+ void TearDown() {
+ db_.Close();
+ file_util::Delete(db_file_, false);
+ }
+
+ // Provided for URL/StarredURLDatabase.
+ virtual sql::Connection& GetDB() {
+ return db_;
+ }
+
+ FilePath db_file_;
+ sql::Connection db_;
+};
+
+//-----------------------------------------------------------------------------
+
+TEST_F(StarredURLDatabaseTest, FixOrphanedGroup) {
+ const int initial_count = GetStarredEntryCount();
+
+ // Create a group that isn't parented to the other/bookmark folders.
+ StarredEntry g_entry;
+ g_entry.type = StarredEntry::USER_GROUP;
+ g_entry.parent_group_id = 100;
+ g_entry.visual_order = 10;
+ g_entry.group_id = 100;
+ CreateStarredEntry(&g_entry);
+
+ ASSERT_TRUE(EnsureStarredIntegrity());
+
+ // Make sure no new entries were added.
+ ASSERT_EQ(initial_count + 1, GetStarredEntryCount());
+
+ // Make sure the group was moved to the bookmark bar folder.
+ ASSERT_TRUE(GetStarredEntry(g_entry.id, &g_entry));
+ ASSERT_EQ(HistoryService::kBookmarkBarID, g_entry.parent_group_id);
+ ASSERT_EQ(0, g_entry.visual_order);
+}
+
+TEST_F(StarredURLDatabaseTest, FixOrphanedBookmarks) {
+ const int initial_count = GetStarredEntryCount();
+
+ // Create two bookmarks that aren't in a random folder no on the bookmark bar.
+ StarredEntry entry1;
+ entry1.parent_group_id = 100;
+ entry1.visual_order = 10;
+ entry1.url = GURL("http://google.com/1");
+ CreateStarredEntry(&entry1);
+
+ StarredEntry entry2;
+ entry2.parent_group_id = 101;
+ entry2.visual_order = 20;
+ entry2.url = GURL("http://google.com/2");
+ CreateStarredEntry(&entry2);
+
+ ASSERT_TRUE(EnsureStarredIntegrity());
+
+ // Make sure no new entries were added.
+ ASSERT_EQ(initial_count + 2, GetStarredEntryCount());
+
+ // Make sure the entries were moved to the bookmark bar and the visual order
+ // order was updated appropriately.
+ ASSERT_TRUE(GetStarredEntry(entry1.id, &entry1));
+ ASSERT_EQ(HistoryService::kBookmarkBarID, entry1.parent_group_id);
+
+ ASSERT_TRUE(GetStarredEntry(entry2.id, &entry2));
+ ASSERT_EQ(HistoryService::kBookmarkBarID, entry2.parent_group_id);
+ ASSERT_TRUE((entry1.visual_order == 0 && entry2.visual_order == 1) ||
+ (entry1.visual_order == 1 && entry2.visual_order == 0));
+}
+
+TEST_F(StarredURLDatabaseTest, FixGroupCycleDepth0) {
+ const int initial_count = GetStarredEntryCount();
+
+ // Create a group that is parented to itself.
+ StarredEntry entry1;
+ entry1.group_id = entry1.parent_group_id = 100;
+ entry1.visual_order = 10;
+ entry1.type = StarredEntry::USER_GROUP;
+ CreateStarredEntry(&entry1);
+
+ ASSERT_TRUE(EnsureStarredIntegrity());
+
+ // Make sure no new entries were added.
+ ASSERT_EQ(initial_count + 1, GetStarredEntryCount());
+
+ // Make sure the group were moved to the bookmark bar and the visual order
+ // order was updated appropriately.
+ ASSERT_TRUE(GetStarredEntry(entry1.id, &entry1));
+ ASSERT_EQ(HistoryService::kBookmarkBarID, entry1.parent_group_id);
+ ASSERT_EQ(0, entry1.visual_order);
+}
+
+TEST_F(StarredURLDatabaseTest, FixGroupCycleDepth1) {
+ const int initial_count = GetStarredEntryCount();
+
+ StarredEntry entry1;
+ entry1.group_id = 100;
+ entry1.parent_group_id = 101;
+ entry1.visual_order = 10;
+ entry1.type = StarredEntry::USER_GROUP;
+ CreateStarredEntry(&entry1);
+
+ StarredEntry entry2;
+ entry2.group_id = 101;
+ entry2.parent_group_id = 100;
+ entry2.visual_order = 11;
+ entry2.type = StarredEntry::USER_GROUP;
+ CreateStarredEntry(&entry2);
+
+ ASSERT_TRUE(EnsureStarredIntegrity());
+
+ // Make sure no new entries were added.
+ ASSERT_EQ(initial_count + 2, GetStarredEntryCount());
+
+ // Because the groups caused a cycle, entry1 is moved the bookmark bar, which
+ // breaks the cycle.
+ ASSERT_TRUE(GetStarredEntry(entry1.id, &entry1));
+ ASSERT_TRUE(GetStarredEntry(entry2.id, &entry2));
+ ASSERT_EQ(HistoryService::kBookmarkBarID, entry1.parent_group_id);
+ ASSERT_EQ(100, entry2.parent_group_id);
+ ASSERT_EQ(0, entry1.visual_order);
+ ASSERT_EQ(0, entry2.visual_order);
+}
+
+TEST_F(StarredURLDatabaseTest, FixVisualOrder) {
+ const int initial_count = GetStarredEntryCount();
+
+ // Star two urls.
+ StarredEntry entry1;
+ entry1.url = GURL("http://google.com/1");
+ entry1.parent_group_id = HistoryService::kBookmarkBarID;
+ entry1.visual_order = 5;
+ CreateStarredEntry(&entry1);
+
+ // Add url2 and star it.
+ StarredEntry entry2;
+ entry2.url = GURL("http://google.com/2");
+ entry2.parent_group_id = HistoryService::kBookmarkBarID;
+ entry2.visual_order = 10;
+ CreateStarredEntry(&entry2);
+
+ ASSERT_TRUE(EnsureStarredIntegrity());
+
+ // Make sure no new entries were added.
+ ASSERT_EQ(initial_count + 2, GetStarredEntryCount());
+
+ StarredEntry entry;
+ ASSERT_TRUE(GetStarredEntry(entry1.id, &entry));
+ entry1.visual_order = 0;
+ CompareEntryByID(entry1);
+
+ ASSERT_TRUE(GetStarredEntry(entry2.id, &entry));
+ entry2.visual_order = 1;
+ CompareEntryByID(entry2);
+}
+
+TEST_F(StarredURLDatabaseTest, FixDuplicateGroupIDs) {
+ const int initial_count = GetStarredEntryCount();
+
+ // Create two groups with the same group id.
+ StarredEntry entry1;
+ entry1.type = StarredEntry::USER_GROUP;
+ entry1.group_id = 10;
+ entry1.parent_group_id = HistoryService::kBookmarkBarID;
+ CreateStarredEntry(&entry1);
+ StarredEntry entry2 = entry1;
+ CreateStarredEntry(&entry2);
+
+ ASSERT_TRUE(EnsureStarredIntegrity());
+
+ // Make sure only one group exists.
+ ASSERT_EQ(initial_count + 1, GetStarredEntryCount());
+
+ StarredEntry entry;
+ ASSERT_TRUE(GetStarredEntry(entry1.id, &entry) ||
+ GetStarredEntry(entry2.id, &entry));
+}
+
+TEST_F(StarredURLDatabaseTest, RemoveStarredEntriesWithEmptyURL) {
+ const int initial_count = GetStarredEntryCount();
+
+ StarredEntry entry;
+ entry.url = GURL("http://google.com");
+ entry.title = UTF8ToUTF16("FOO");
+ entry.parent_group_id = HistoryService::kBookmarkBarID;
+
+ ASSERT_NE(0, CreateStarredEntry(&entry));
+
+ // Remove the URL.
+ DeleteURLRow(entry.url_id);
+
+ // Fix up the table.
+ ASSERT_TRUE(EnsureStarredIntegrity());
+
+ // The entry we just created should have been nuked.
+ ASSERT_EQ(initial_count, GetStarredEntryCount());
+}
+
+} // namespace history
diff --git a/chrome/browser/history/text_database.cc b/chrome/browser/history/text_database.cc
new file mode 100644
index 0000000..3327869
--- /dev/null
+++ b/chrome/browser/history/text_database.cc
@@ -0,0 +1,378 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <limits>
+#include <set>
+#include <string>
+
+#include "chrome/browser/history/text_database.h"
+
+#include "app/sql/connection.h"
+#include "app/sql/statement.h"
+#include "app/sql/transaction.h"
+#include "base/file_util.h"
+#include "base/histogram.h"
+#include "base/logging.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/diagnostics/sqlite_diagnostics.h"
+
+// There are two tables in each database, one full-text search (FTS) table which
+// indexes the contents and title of the pages. The other is a regular SQLITE
+// table which contains non-indexed information about the page. All columns of
+// a FTS table are indexed using the text search algorithm, which isn't what we
+// want for things like times. If this were in the FTS table, there would be
+// different words in the index for each time number.
+//
+// "pages" FTS table:
+// url URL of the page so searches will match the URL.
+// title Title of the page.
+// body Body of the page.
+//
+// "info" regular table:
+// time Time the corresponding FTS entry was visited.
+//
+// We do joins across these two tables by using their internal rowids, which we
+// keep in sync between the two tables. The internal rowid is the only part of
+// an FTS table that is indexed like a normal table, and the index over it is
+// free since sqlite always indexes the internal rowid.
+
+namespace history {
+
+namespace {
+
+// Version 1 uses FTS2 for index files.
+// Version 2 uses FTS3.
+static const int kCurrentVersionNumber = 2;
+static const int kCompatibleVersionNumber = 2;
+
+// Snippet computation relies on the index of the columns in the original
+// create statement. These are the 0-based indices (as strings) of the
+// corresponding columns.
+const char kTitleColumnIndex[] = "1";
+const char kBodyColumnIndex[] = "2";
+
+// The string prepended to the database identifier to generate the filename.
+const FilePath::CharType kFilePrefix[] = FILE_PATH_LITERAL("History Index ");
+
+} // namespace
+
+TextDatabase::TextDatabase(const FilePath& path,
+ DBIdent id,
+ bool allow_create)
+ : path_(path),
+ ident_(id),
+ allow_create_(allow_create) {
+ // Compute the file name.
+ file_name_ = path_.Append(IDToFileName(ident_));
+}
+
+TextDatabase::~TextDatabase() {
+}
+
+// static
+const FilePath::CharType* TextDatabase::file_base() {
+ return kFilePrefix;
+}
+
+// static
+FilePath TextDatabase::IDToFileName(DBIdent id) {
+ // Identifiers are intended to be a combination of the year and month, for
+ // example, 200801 for January 2008. We convert this to
+ // "History Index 2008-01". However, we don't make assumptions about this
+ // scheme: the caller should assign IDs as it feels fit with the knowledge
+ // that they will apppear on disk in this form.
+ FilePath::StringType filename(file_base());
+ StringAppendF(&filename, FILE_PATH_LITERAL("%d-%02d"),
+ id / 100, id % 100);
+ return FilePath(filename);
+}
+
+// static
+TextDatabase::DBIdent TextDatabase::FileNameToID(const FilePath& file_path) {
+ FilePath::StringType file_name = file_path.BaseName().value();
+
+ // We don't actually check the prefix here. Since the file system could
+ // be case insensitive in ways we can't predict (NTFS), checking could
+ // potentially be the wrong thing to do. Instead, we just look for a suffix.
+ static const size_t kIDStringLength = 7; // Room for "xxxx-xx".
+ if (file_name.length() < kIDStringLength)
+ return 0;
+ const FilePath::StringType suffix(
+ &file_name[file_name.length() - kIDStringLength]);
+
+ if (suffix.length() != kIDStringLength ||
+ suffix[4] != FILE_PATH_LITERAL('-')) {
+ return 0;
+ }
+
+ int year = StringToInt(suffix.substr(0, 4));
+ int month = StringToInt(suffix.substr(5, 2));
+
+ return year * 100 + month;
+}
+
+bool TextDatabase::Init() {
+ // Make sure, if we're not allowed to create the file, that it exists.
+ if (!allow_create_) {
+ if (!file_util::PathExists(file_name_))
+ return false;
+ }
+
+ // Set the exceptional sqlite error handler.
+ db_.set_error_delegate(GetErrorHandlerForTextDb());
+
+ // Set the database page size to something a little larger to give us
+ // better performance (we're typically seek rather than bandwidth limited).
+ // This only has an effect before any tables have been created, otherwise
+ // this is a NOP. Must be a power of 2 and a max of 8192.
+ db_.set_page_size(2096);
+
+ // The default cache size is 2000 which give >8MB of data. Since we will often
+ // have 2-3 of these objects, each with their own 8MB, this adds up very fast.
+ // We therefore reduce the size so when there are multiple objects, we're not
+ // too big.
+ db_.set_cache_size(512);
+
+ // Run the database in exclusive mode. Nobody else should be accessing the
+ // database while we're running, and this will give somewhat improved perf.
+ db_.set_exclusive_locking();
+
+ // Attach the database to our index file.
+ if (!db_.Open(file_name_))
+ return false;
+
+ // Meta table tracking version information.
+ if (!meta_table_.Init(&db_, kCurrentVersionNumber, kCompatibleVersionNumber))
+ return false;
+ if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
+ // This version is too new. We don't bother notifying the user on this
+ // error, and just fail to use the file. Normally if they have version skew,
+ // they will get it for the main history file and it won't be necessary
+ // here. If that's not the case, since this is only indexed data, it's
+ // probably better to just not give FTS results than strange errors when
+ // everything else is working OK.
+ LOG(WARNING) << "Text database is too new.";
+ return false;
+ }
+
+ return CreateTables();
+}
+
+void TextDatabase::BeginTransaction() {
+ db_.BeginTransaction();
+}
+
+void TextDatabase::CommitTransaction() {
+ db_.CommitTransaction();
+}
+
+bool TextDatabase::CreateTables() {
+ // FTS table of page contents.
+ if (!db_.DoesTableExist("pages")) {
+ if (!db_.Execute("CREATE VIRTUAL TABLE pages USING fts3("
+ "TOKENIZE icu,"
+ "url LONGVARCHAR,"
+ "title LONGVARCHAR,"
+ "body LONGVARCHAR)"))
+ return false;
+ }
+
+ // Non-FTS table containing URLs and times so we can efficiently find them
+ // using a regular index (all FTS columns are special and are treated as
+ // full-text-search, which is not what we want when retrieving this data).
+ if (!db_.DoesTableExist("info")) {
+ // Note that there is no point in creating an index over time. Since
+ // we must always query the entire FTS table (it can not efficiently do
+ // subsets), we will always end up doing that first, and joining the info
+ // table off of that.
+ if (!db_.Execute("CREATE TABLE info(time INTEGER NOT NULL)"))
+ return false;
+ }
+
+ // Create the index. This will fail when the index already exists, so we just
+ // ignore the error.
+ db_.Execute("CREATE INDEX info_time ON info(time)");
+ return true;
+}
+
+bool TextDatabase::AddPageData(base::Time time,
+ const std::string& url,
+ const std::string& title,
+ const std::string& contents) {
+ sql::Transaction committer(&db_);
+ if (!committer.Begin())
+ return false;
+
+ // Add to the pages table.
+ sql::Statement add_to_pages(db_.GetCachedStatement(SQL_FROM_HERE,
+ "INSERT INTO pages (url, title, body) VALUES (?,?,?)"));
+ if (!add_to_pages) {
+ NOTREACHED() << db_.GetErrorMessage();
+ return false;
+ }
+ add_to_pages.BindString(0, url);
+ add_to_pages.BindString(1, title);
+ add_to_pages.BindString(2, contents);
+ if (!add_to_pages.Run()) {
+ NOTREACHED() << db_.GetErrorMessage();
+ return false;
+ }
+
+ int64 rowid = db_.GetLastInsertRowId();
+
+ // Add to the info table with the same rowid.
+ sql::Statement add_to_info(db_.GetCachedStatement(SQL_FROM_HERE,
+ "INSERT INTO info (rowid, time) VALUES (?,?)"));
+ if (!add_to_info) {
+ NOTREACHED() << db_.GetErrorMessage();
+ return false;
+ }
+ add_to_info.BindInt64(0, rowid);
+ add_to_info.BindInt64(1, time.ToInternalValue());
+ if (!add_to_info.Run()) {
+ NOTREACHED() << db_.GetErrorMessage();
+ return false;
+ }
+
+ return committer.Commit();
+}
+
+void TextDatabase::DeletePageData(base::Time time, const std::string& url) {
+ // First get all rows that match. Selecing on time (which has an index) allows
+ // us to avoid brute-force searches on the full-text-index table (there will
+ // generally be only one match per time).
+ sql::Statement select_ids(db_.GetCachedStatement(SQL_FROM_HERE,
+ "SELECT info.rowid "
+ "FROM info JOIN pages ON info.rowid = pages.rowid "
+ "WHERE info.time=? AND pages.url=?"));
+ if (!select_ids)
+ return;
+ select_ids.BindInt64(0, time.ToInternalValue());
+ select_ids.BindString(1, url);
+ std::set<int64> rows_to_delete;
+ while (select_ids.Step())
+ rows_to_delete.insert(select_ids.ColumnInt64(0));
+
+ // Delete from the pages table.
+ sql::Statement delete_page(db_.GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM pages WHERE rowid=?"));
+ if (!delete_page)
+ return;
+ for (std::set<int64>::const_iterator i = rows_to_delete.begin();
+ i != rows_to_delete.end(); ++i) {
+ delete_page.BindInt64(0, *i);
+ if (!delete_page.Run()) {
+ NOTREACHED();
+ return;
+ }
+ delete_page.Reset();
+ }
+
+ // Delete from the info table.
+ sql::Statement delete_info(db_.GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM info WHERE rowid=?"));
+ if (!delete_info)
+ return;
+ for (std::set<int64>::const_iterator i = rows_to_delete.begin();
+ i != rows_to_delete.end(); ++i) {
+ delete_info.BindInt64(0, *i);
+ if (!delete_info.Run()) {
+ NOTREACHED();
+ return;
+ }
+ delete_info.Reset();
+ }
+}
+
+void TextDatabase::Optimize() {
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "SELECT OPTIMIZE(pages) FROM pages LIMIT 1"));
+ if (!statement)
+ return;
+ statement.Run();
+}
+
+void TextDatabase::GetTextMatches(const std::string& query,
+ const QueryOptions& options,
+ std::vector<Match>* results,
+ URLSet* found_urls,
+ base::Time* first_time_searched) {
+ *first_time_searched = options.begin_time;
+
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "SELECT url, title, time, offsets(pages), body "
+ "FROM pages LEFT OUTER JOIN info ON pages.rowid = info.rowid "
+ "WHERE pages MATCH ? AND time >= ? AND time < ? "
+ "ORDER BY time DESC "
+ "LIMIT ?"));
+ if (!statement)
+ return;
+
+ // When their values indicate "unspecified", saturate the numbers to the max
+ // or min to get the correct result.
+ int64 effective_begin_time = options.begin_time.is_null() ?
+ 0 : options.begin_time.ToInternalValue();
+ int64 effective_end_time = options.end_time.is_null() ?
+ std::numeric_limits<int64>::max() : options.end_time.ToInternalValue();
+ int effective_max_count = options.max_count ?
+ options.max_count : std::numeric_limits<int>::max();
+
+ statement.BindString(0, query);
+ statement.BindInt64(1, effective_begin_time);
+ statement.BindInt64(2, effective_end_time);
+ statement.BindInt(3, effective_max_count);
+
+ while (statement.Step()) {
+ // TODO(brettw) allow canceling the query in the middle.
+ // if (canceled_or_something)
+ // break;
+
+ GURL url(statement.ColumnString(0));
+ URLSet::const_iterator found_url = found_urls->find(url);
+ if (found_url != found_urls->end())
+ continue; // Don't add this duplicate.
+
+ // Fill the results into the vector (avoid copying the URL with Swap()).
+ results->resize(results->size() + 1);
+ Match& match = results->at(results->size() - 1);
+ match.url.Swap(&url);
+
+ match.title = statement.ColumnString16(1);
+ match.time = base::Time::FromInternalValue(statement.ColumnInt64(2));
+
+ // Extract any matches in the title.
+ std::string offsets_str = statement.ColumnString(3);
+ Snippet::ExtractMatchPositions(offsets_str, kTitleColumnIndex,
+ &match.title_match_positions);
+ Snippet::ConvertMatchPositionsToWide(statement.ColumnString(1),
+ &match.title_match_positions);
+
+ // Extract the matches in the body.
+ Snippet::MatchPositions match_positions;
+ Snippet::ExtractMatchPositions(offsets_str, kBodyColumnIndex,
+ &match_positions);
+
+ // Compute the snippet based on those matches.
+ std::string body = statement.ColumnString(4);
+ match.snippet.ComputeSnippet(match_positions, body);
+ }
+
+ // When we have returned all the results possible (or determined that there
+ // are none), then we have searched all the time requested, so we can
+ // set the first_time_searched to that value.
+ if (results->size() == 0 ||
+ options.max_count == 0 || // Special case for wanting all the results.
+ static_cast<int>(results->size()) < options.max_count) {
+ *first_time_searched = options.begin_time;
+ } else {
+ // Since we got the results in order, we know the last item is the last
+ // time we considered.
+ *first_time_searched = results->back().time;
+ }
+
+ statement.Reset();
+}
+
+} // namespace history
diff --git a/chrome/browser/history/text_database.h b/chrome/browser/history/text_database.h
new file mode 100644
index 0000000..e34c071
--- /dev/null
+++ b/chrome/browser/history/text_database.h
@@ -0,0 +1,162 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
+#define CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
+
+#include <set>
+#include <vector>
+
+#include "app/sql/connection.h"
+#include "app/sql/meta_table.h"
+#include "base/basictypes.h"
+#include "base/file_path.h"
+#include "base/string16.h"
+#include "chrome/browser/history/history_types.h"
+#include "googleurl/src/gurl.h"
+
+namespace history {
+
+// Encapsulation of a full-text indexed database file.
+class TextDatabase {
+ public:
+ typedef int DBIdent;
+
+ typedef std::set<GURL> URLSet;
+
+ // Returned from the search function.
+ struct Match {
+ // URL of the match.
+ GURL url;
+
+ // The title is returned because the title in the text database and the URL
+ // database may differ. This happens because we capture the title when the
+ // body is captured, and don't update it later.
+ string16 title;
+
+ // Time the page that was returned was visited.
+ base::Time time;
+
+ // Identifies any found matches in the title of the document. These are not
+ // included in the snippet.
+ Snippet::MatchPositions title_match_positions;
+
+ // Snippet of the match we generated from the body.
+ Snippet snippet;
+ };
+
+ // Note: You must call init which must succeed before using this class.
+ //
+ // Computes the matches for the query, returning results in decreasing order
+ // of visit time.
+ //
+ // This function will attach the new database to the given database
+ // connection. This allows one sqlite3 object to share many TextDatabases,
+ // meaning that they will all share the same cache, which allows us to limit
+ // the total size that text indexing databasii can take up.
+ //
+ // |file_name| is the name of the file on disk.
+ //
+ // ID is the identifier for the database. It should uniquely identify it among
+ // other databases on disk and in the sqlite connection.
+ //
+ // |allow_create| indicates if we want to allow creation of the file if it
+ // doesn't exist. For files associated with older time periods, we don't want
+ // to create them if they don't exist, so this flag would be false.
+ TextDatabase(const FilePath& path,
+ DBIdent id,
+ bool allow_create);
+ ~TextDatabase();
+
+ // Initializes the database connection and creates the file if the class
+ // was created with |allow_create|. If the file couldn't be opened or
+ // created, this will return false. No other functions should be called
+ // after this.
+ bool Init();
+
+ // Allows updates to be batched. This gives higher performance when multiple
+ // updates are happening because every insert doesn't require a sync to disk.
+ // Transactions can be nested, only the outermost one will actually count.
+ void BeginTransaction();
+ void CommitTransaction();
+
+ // For testing, returns the file name of the database so it can be deleted
+ // after the test. This is valid even before Init() is called.
+ const FilePath& file_name() const { return file_name_; }
+
+ // Returns a NULL-terminated string that is the base of history index files,
+ // which is the part before the database identifier. For example
+ // "History Index *". This is for finding existing database files.
+ static const FilePath::CharType* file_base();
+
+ // Converts a filename on disk (optionally including a path) to a database
+ // identifier. If the filename doesn't have the correct format, returns 0.
+ static DBIdent FileNameToID(const FilePath& file_path);
+
+ // Changing operations -------------------------------------------------------
+
+ // Adds the given data to the page. Returns true on success. The data should
+ // already be converted to UTF-8.
+ bool AddPageData(base::Time time,
+ const std::string& url,
+ const std::string& title,
+ const std::string& contents);
+
+ // Deletes the indexed data exactly matching the given URL/time pair.
+ void DeletePageData(base::Time time, const std::string& url);
+
+ // Optimizes the tree inside the database. This will, in addition to making
+ // access faster, remove any deleted data from the database (normally it is
+ // added again as "removed" and it is manually cleaned up when it decides to
+ // optimize it naturally). It is bad for privacy if a user is deleting a
+ // page from history but it still exists in the full text database in some
+ // form. This function will clean that up.
+ void Optimize();
+
+ // Querying ------------------------------------------------------------------
+
+ // Executes the given query. See QueryOptions for more info on input.
+ //
+ // The results are appended to any existing ones in |*results|, and the first
+ // time considered for the output is in |first_time_searched|
+ // (see QueryResults for more).
+ //
+ // Any URLs found will be added to |unique_urls|. If a URL is already in the
+ // set, additional results will not be added (giving the ability to uniquify
+ // URL results).
+ //
+ // Callers must run QueryParser on the user text and pass the results of the
+ // QueryParser to this method as the query string.
+ void GetTextMatches(const std::string& query,
+ const QueryOptions& options,
+ std::vector<Match>* results,
+ URLSet* unique_urls,
+ base::Time* first_time_searched);
+
+ // Converts the given database identifier to a filename. This does not include
+ // the path, just the file and extension.
+ static FilePath IDToFileName(DBIdent id);
+
+ private:
+ // Ensures that the tables and indices are created. Returns true on success.
+ bool CreateTables();
+
+ // The sql database. Not valid until Init is called.
+ sql::Connection db_;
+
+ const FilePath path_;
+ const DBIdent ident_;
+ const bool allow_create_;
+
+ // Full file name of the file on disk, computed in Init().
+ FilePath file_name_;
+
+ sql::MetaTable meta_table_;
+
+ DISALLOW_COPY_AND_ASSIGN(TextDatabase);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
diff --git a/chrome/browser/history/text_database_manager.cc b/chrome/browser/history/text_database_manager.cc
new file mode 100644
index 0000000..ff1ae38
--- /dev/null
+++ b/chrome/browser/history/text_database_manager.cc
@@ -0,0 +1,550 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/text_database_manager.h"
+
+#include "base/compiler_specific.h"
+#include "base/file_util.h"
+#include "base/histogram.h"
+#include "base/logging.h"
+#include "base/message_loop.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/history/history_publisher.h"
+#include "chrome/browser/history/visit_database.h"
+#include "chrome/common/mru_cache.h"
+
+using base::Time;
+using base::TimeDelta;
+using base::TimeTicks;
+
+namespace history {
+
+namespace {
+
+// The number of database files we will be attached to at once.
+const int kCacheDBSize = 5;
+
+std::string ConvertStringForIndexer(const string16& input) {
+ // TODO(evanm): other transformations here?
+ return UTF16ToUTF8(CollapseWhitespace(input, false));
+}
+
+// Data older than this will be committed to the full text index even if we
+// haven't gotten a title and/or body.
+const int kExpirationSec = 20;
+
+} // namespace
+
+// TextDatabaseManager::PageInfo -----------------------------------------------
+
+TextDatabaseManager::PageInfo::PageInfo(URLID url_id,
+ VisitID visit_id,
+ Time visit_time)
+ : url_id_(url_id),
+ visit_id_(visit_id),
+ visit_time_(visit_time) {
+ added_time_ = TimeTicks::Now();
+}
+
+void TextDatabaseManager::PageInfo::set_title(const string16& ttl) {
+ if (ttl.empty()) // Make the title nonempty when we set it for EverybodySet.
+ title_ = ASCIIToUTF16(" ");
+ else
+ title_ = ttl;
+}
+
+void TextDatabaseManager::PageInfo::set_body(const string16& bdy) {
+ if (bdy.empty()) // Make the body nonempty when we set it for EverybodySet.
+ body_ = ASCIIToUTF16(" ");
+ else
+ body_ = bdy;
+}
+
+bool TextDatabaseManager::PageInfo::Expired(TimeTicks now) const {
+ return now - added_time_ > TimeDelta::FromSeconds(kExpirationSec);
+}
+
+// TextDatabaseManager ---------------------------------------------------------
+
+TextDatabaseManager::TextDatabaseManager(const FilePath& dir,
+ URLDatabase* url_database,
+ VisitDatabase* visit_database)
+ : dir_(dir),
+ url_database_(url_database),
+ visit_database_(visit_database),
+ recent_changes_(RecentChangeList::NO_AUTO_EVICT),
+ transaction_nesting_(0),
+ db_cache_(DBCache::NO_AUTO_EVICT),
+ present_databases_loaded_(false),
+ ALLOW_THIS_IN_INITIALIZER_LIST(factory_(this)),
+ history_publisher_(NULL) {
+}
+
+TextDatabaseManager::~TextDatabaseManager() {
+ if (transaction_nesting_)
+ CommitTransaction();
+}
+
+// static
+TextDatabase::DBIdent TextDatabaseManager::TimeToID(Time time) {
+ Time::Exploded exploded;
+ time.UTCExplode(&exploded);
+
+ // We combine the month and year into a 6-digit number (200801 for
+ // January, 2008). The month is 1-based.
+ return exploded.year * 100 + exploded.month;
+}
+
+// static
+Time TextDatabaseManager::IDToTime(TextDatabase::DBIdent id) {
+ Time::Exploded exploded;
+ memset(&exploded, 0, sizeof(Time::Exploded));
+ exploded.year = id / 100;
+ exploded.month = id % 100;
+ return Time::FromUTCExploded(exploded);
+}
+
+bool TextDatabaseManager::Init(const HistoryPublisher* history_publisher) {
+ history_publisher_ = history_publisher;
+
+ // Start checking recent changes and committing them.
+ ScheduleFlushOldChanges();
+ return true;
+}
+
+void TextDatabaseManager::BeginTransaction() {
+ transaction_nesting_++;
+}
+
+void TextDatabaseManager::CommitTransaction() {
+ DCHECK(transaction_nesting_);
+ transaction_nesting_--;
+ if (transaction_nesting_)
+ return; // Still more nesting of transactions before committing.
+
+ // Commit all databases with open transactions on them.
+ for (DBIdentSet::const_iterator i = open_transactions_.begin();
+ i != open_transactions_.end(); ++i) {
+ DBCache::iterator iter = db_cache_.Get(*i);
+ if (iter == db_cache_.end()) {
+ NOTREACHED() << "All open transactions should be cached.";
+ continue;
+ }
+ iter->second->CommitTransaction();
+ }
+ open_transactions_.clear();
+
+ // Now that the transaction is over, we can expire old connections.
+ db_cache_.ShrinkToSize(kCacheDBSize);
+}
+
+void TextDatabaseManager::InitDBList() {
+ if (present_databases_loaded_)
+ return;
+
+ present_databases_loaded_ = true;
+
+ // Find files on disk matching our pattern so we can quickly test for them.
+ FilePath::StringType filepattern(TextDatabase::file_base());
+ filepattern.append(FILE_PATH_LITERAL("*"));
+ file_util::FileEnumerator enumerator(
+ dir_, false, file_util::FileEnumerator::FILES, filepattern);
+ FilePath cur_file;
+ while (!(cur_file = enumerator.Next()).empty()) {
+ // Convert to the number representing this file.
+ TextDatabase::DBIdent id = TextDatabase::FileNameToID(cur_file);
+ if (id) // Will be 0 on error.
+ present_databases_.insert(id);
+ }
+}
+
+void TextDatabaseManager::AddPageURL(const GURL& url,
+ URLID url_id,
+ VisitID visit_id,
+ Time time) {
+ // Delete any existing page info.
+ RecentChangeList::iterator found = recent_changes_.Peek(url);
+ if (found != recent_changes_.end())
+ recent_changes_.Erase(found);
+
+ // Just save this info for later. We will save it when it expires or when all
+ // the data is complete.
+ recent_changes_.Put(url, PageInfo(url_id, visit_id, time));
+}
+
+void TextDatabaseManager::AddPageTitle(const GURL& url,
+ const string16& title) {
+ RecentChangeList::iterator found = recent_changes_.Peek(url);
+ if (found == recent_changes_.end()) {
+ // This page is not in our cache of recent pages. This is very much an edge
+ // case as normally a title will come in <20 seconds after the page commits,
+ // and TabContents will avoid spamming us with >1 title per page. However,
+ // it could come up if your connection is unhappy, and we don't want to
+ // miss anything.
+ //
+ // To solve this problem, we'll just associate the most recent visit with
+ // the new title and index that using the regular code path.
+ URLRow url_row;
+ if (!url_database_->GetRowForURL(url, &url_row))
+ return; // URL is unknown, give up.
+ VisitRow visit;
+ if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit))
+ return; // No recent visit, give up.
+
+ if (visit.is_indexed) {
+ // If this page was already indexed, we could have a body that came in
+ // first and we don't want to overwrite it. We could go query for the
+ // current body, or have a special setter for only the title, but this is
+ // not worth it for this edge case.
+ //
+ // It will be almost impossible for the title to take longer than
+ // kExpirationSec yet we got a body in less than that time, since the
+ // title should always come in first.
+ return;
+ }
+
+ AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time,
+ title, string16());
+ return; // We don't know about this page, give up.
+ }
+
+ PageInfo& info = found->second;
+ if (info.has_body()) {
+ // This info is complete, write to the database.
+ AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(),
+ title, info.body());
+ recent_changes_.Erase(found);
+ return;
+ }
+
+ info.set_title(title);
+}
+
+void TextDatabaseManager::AddPageContents(const GURL& url,
+ const string16& body) {
+ RecentChangeList::iterator found = recent_changes_.Peek(url);
+ if (found == recent_changes_.end()) {
+ // This page is not in our cache of recent pages. This means that the page
+ // took more than kExpirationSec to load. Often, this will be the result of
+ // a very slow iframe or other resource on the page that makes us think its
+ // still loading.
+ //
+ // As a fallback, set the most recent visit's contents using the input, and
+ // use the last set title in the URL table as the title to index.
+ URLRow url_row;
+ if (!url_database_->GetRowForURL(url, &url_row))
+ return; // URL is unknown, give up.
+ VisitRow visit;
+ if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit))
+ return; // No recent visit, give up.
+
+ // Use the title from the URL row as the title for the indexing.
+ AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time,
+ url_row.title(), body);
+ return;
+ }
+
+ PageInfo& info = found->second;
+ if (info.has_title()) {
+ // This info is complete, write to the database.
+ AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(),
+ info.title(), body);
+ recent_changes_.Erase(found);
+ return;
+ }
+
+ info.set_body(body);
+}
+
+bool TextDatabaseManager::AddPageData(const GURL& url,
+ URLID url_id,
+ VisitID visit_id,
+ Time visit_time,
+ const string16& title,
+ const string16& body) {
+ TextDatabase* db = GetDBForTime(visit_time, true);
+ if (!db)
+ return false;
+
+ TimeTicks beginning_time = TimeTicks::Now();
+
+ // First delete any recently-indexed data for this page. This will delete
+ // anything in the main database, but we don't bother looking through the
+ // archived database.
+ VisitVector visits;
+ visit_database_->GetVisitsForURL(url_id, &visits);
+ size_t our_visit_row_index = visits.size();
+ for (size_t i = 0; i < visits.size(); i++) {
+ // While we're going trough all the visits, also find our row so we can
+ // avoid another DB query.
+ if (visits[i].visit_id == visit_id) {
+ our_visit_row_index = i;
+ } else if (visits[i].is_indexed) {
+ visits[i].is_indexed = false;
+ visit_database_->UpdateVisitRow(visits[i]);
+ DeletePageData(visits[i].visit_time, url, NULL);
+ }
+ }
+
+ if (visit_id) {
+ // We're supposed to update the visit database.
+ if (our_visit_row_index >= visits.size()) {
+ NOTREACHED() << "We should always have found a visit when given an ID.";
+ return false;
+ }
+
+ DCHECK(visit_time == visits[our_visit_row_index].visit_time);
+
+ // Update the visit database to reference our addition.
+ visits[our_visit_row_index].is_indexed = true;
+ if (!visit_database_->UpdateVisitRow(visits[our_visit_row_index]))
+ return false;
+ }
+
+ // Now index the data.
+ std::string url_str = URLDatabase::GURLToDatabaseURL(url);
+ bool success = db->AddPageData(visit_time, url_str,
+ ConvertStringForIndexer(title),
+ ConvertStringForIndexer(body));
+
+ UMA_HISTOGRAM_TIMES("History.AddFTSData",
+ TimeTicks::Now() - beginning_time);
+
+ if (history_publisher_)
+ history_publisher_->PublishPageContent(visit_time, url, title, body);
+
+ return success;
+}
+
+void TextDatabaseManager::DeletePageData(Time time, const GURL& url,
+ ChangeSet* change_set) {
+ TextDatabase::DBIdent db_ident = TimeToID(time);
+
+ // We want to open the database for writing, but only if it exists. To
+ // achieve this, we check whether it exists by saying we're not going to
+ // write to it (avoiding the autocreation code normally called when writing)
+ // and then access it for writing only if it succeeds.
+ TextDatabase* db = GetDB(db_ident, false);
+ if (!db)
+ return;
+ db = GetDB(db_ident, true);
+
+ if (change_set)
+ change_set->Add(db_ident);
+
+ db->DeletePageData(time, URLDatabase::GURLToDatabaseURL(url));
+}
+
+void TextDatabaseManager::DeleteFromUncommitted(
+ const std::set<GURL>& restrict_urls, Time begin, Time end) {
+ // First find the beginning of the range to delete. Recall that the list
+ // has the most recent item at the beginning. There won't normally be very
+ // many items, so a brute-force search is fine.
+ RecentChangeList::iterator cur = recent_changes_.begin();
+ if (!end.is_null()) {
+ // Walk from the beginning of the list backwards in time to find the newest
+ // entry that should be deleted.
+ while (cur != recent_changes_.end() && cur->second.visit_time() >= end)
+ ++cur;
+ }
+
+ // Now delete all visits up to the oldest one we were supposed to delete.
+ // Note that if begin is_null, it will be less than or equal to any other
+ // time.
+ if (restrict_urls.empty()) {
+ while (cur != recent_changes_.end() && cur->second.visit_time() >= begin)
+ cur = recent_changes_.Erase(cur);
+ } else {
+ while (cur != recent_changes_.end() && cur->second.visit_time() >= begin) {
+ if (restrict_urls.find(cur->first) != restrict_urls.end())
+ cur = recent_changes_.Erase(cur);
+ else
+ ++cur;
+ }
+ }
+}
+
+void TextDatabaseManager::DeleteAll() {
+ DCHECK_EQ(0, transaction_nesting_) << "Calling deleteAll in a transaction.";
+
+ InitDBList();
+
+ // Close all open databases.
+ db_cache_.Clear();
+
+ // Now go through and delete all the files.
+ for (DBIdentSet::iterator i = present_databases_.begin();
+ i != present_databases_.end(); ++i) {
+ FilePath file_name = dir_.Append(TextDatabase::IDToFileName(*i));
+ file_util::Delete(file_name, false);
+ }
+}
+
+void TextDatabaseManager::OptimizeChangedDatabases(
+ const ChangeSet& change_set) {
+ for (ChangeSet::DBSet::const_iterator i =
+ change_set.changed_databases_.begin();
+ i != change_set.changed_databases_.end(); ++i) {
+ // We want to open the database for writing, but only if it exists. To
+ // achieve this, we check whether it exists by saying we're not going to
+ // write to it (avoiding the autocreation code normally called when writing)
+ // and then access it for writing only if it succeeds.
+ TextDatabase* db = GetDB(*i, false);
+ if (!db)
+ continue;
+ db = GetDB(*i, true);
+ if (!db)
+ continue; // The file may have changed or something.
+ db->Optimize();
+ }
+}
+
+void TextDatabaseManager::GetTextMatches(
+ const string16& query,
+ const QueryOptions& options,
+ std::vector<TextDatabase::Match>* results,
+ Time* first_time_searched) {
+ results->clear();
+
+ InitDBList();
+ if (present_databases_.empty()) {
+ // Nothing to search.
+ *first_time_searched = options.begin_time;
+ return;
+ }
+
+ // Get the query into the proper format for the individual DBs.
+ string16 fts_query16;
+ query_parser_.ParseQuery(query, &fts_query16);
+ std::string fts_query = UTF16ToUTF8(fts_query16);
+
+ // Need a copy of the options so we can modify the max count for each call
+ // to the individual databases.
+ QueryOptions cur_options(options);
+
+ // Compute the minimum and maximum values for the identifiers that could
+ // encompass the input time range.
+ TextDatabase::DBIdent min_ident = options.begin_time.is_null() ?
+ *present_databases_.begin() :
+ TimeToID(options.begin_time);
+ TextDatabase::DBIdent max_ident = options.end_time.is_null() ?
+ *present_databases_.rbegin() :
+ TimeToID(options.end_time);
+
+ // Iterate over the databases from the most recent backwards.
+ bool checked_one = false;
+ TextDatabase::URLSet found_urls;
+ for (DBIdentSet::reverse_iterator i = present_databases_.rbegin();
+ i != present_databases_.rend();
+ ++i) {
+ // TODO(brettw) allow canceling the query in the middle.
+ // if (canceled_or_something)
+ // break;
+
+ // This code is stupid, we just loop until we find the correct starting
+ // time range rather than search in an intelligent way. Users will have a
+ // few dozen files at most, so this should not be an issue.
+ if (*i > max_ident)
+ continue; // Haven't gotten to the time range yet.
+ if (*i < min_ident)
+ break; // Covered all the time range.
+
+ TextDatabase* cur_db = GetDB(*i, false);
+ if (!cur_db)
+ continue;
+
+ // Adjust the max count according to how many results we've already got.
+ if (options.max_count) {
+ cur_options.max_count = options.max_count -
+ static_cast<int>(results->size());
+ }
+
+ // Since we are going backwards in time, it is always OK to pass the
+ // current first_time_searched, since it will always be smaller than
+ // any previous set.
+ cur_db->GetTextMatches(fts_query, cur_options,
+ results, &found_urls, first_time_searched);
+ checked_one = true;
+
+ DCHECK(options.max_count == 0 ||
+ static_cast<int>(results->size()) <= options.max_count);
+ if (options.max_count &&
+ static_cast<int>(results->size()) >= options.max_count)
+ break; // Got the max number of results.
+ }
+
+ // When there were no databases in the range, we need to fix up the min time.
+ if (!checked_one)
+ *first_time_searched = options.begin_time;
+}
+
+TextDatabase* TextDatabaseManager::GetDB(TextDatabase::DBIdent id,
+ bool for_writing) {
+ DBCache::iterator found_db = db_cache_.Get(id);
+ if (found_db != db_cache_.end()) {
+ if (transaction_nesting_ && for_writing &&
+ open_transactions_.find(id) == open_transactions_.end()) {
+ // If we currently have an open transaction, that database is not yet
+ // part of the transaction, and the database will be written to, it needs
+ // to be part of our transaction.
+ found_db->second->BeginTransaction();
+ open_transactions_.insert(id);
+ }
+ return found_db->second;
+ }
+
+ // Need to make the database.
+ TextDatabase* new_db = new TextDatabase(dir_, id, for_writing);
+ if (!new_db->Init()) {
+ delete new_db;
+ return NULL;
+ }
+ db_cache_.Put(id, new_db);
+ present_databases_.insert(id);
+
+ if (transaction_nesting_ && for_writing) {
+ // If we currently have an open transaction and the new database will be
+ // written to, it needs to be part of our transaction.
+ new_db->BeginTransaction();
+ open_transactions_.insert(id);
+ }
+
+ // When no transaction is open, allow this new one to kick out an old one.
+ if (!transaction_nesting_)
+ db_cache_.ShrinkToSize(kCacheDBSize);
+
+ return new_db;
+}
+
+TextDatabase* TextDatabaseManager::GetDBForTime(Time time,
+ bool create_if_necessary) {
+ return GetDB(TimeToID(time), create_if_necessary);
+}
+
+void TextDatabaseManager::ScheduleFlushOldChanges() {
+ factory_.RevokeAll();
+ MessageLoop::current()->PostDelayedTask(FROM_HERE, factory_.NewRunnableMethod(
+ &TextDatabaseManager::FlushOldChanges),
+ kExpirationSec * Time::kMillisecondsPerSecond);
+}
+
+void TextDatabaseManager::FlushOldChanges() {
+ FlushOldChangesForTime(TimeTicks::Now());
+}
+
+void TextDatabaseManager::FlushOldChangesForTime(TimeTicks now) {
+ // The end of the list is the oldest, so we just start from there committing
+ // things until we get something too new.
+ RecentChangeList::reverse_iterator i = recent_changes_.rbegin();
+ while (i != recent_changes_.rend() && i->second.Expired(now)) {
+ AddPageData(i->first, i->second.url_id(), i->second.visit_id(),
+ i->second.visit_time(), i->second.title(), i->second.body());
+ i = recent_changes_.Erase(i);
+ }
+
+ ScheduleFlushOldChanges();
+}
+
+} // namespace history
diff --git a/chrome/browser/history/text_database_manager.h b/chrome/browser/history/text_database_manager.h
new file mode 100644
index 0000000..7f25bf7
--- /dev/null
+++ b/chrome/browser/history/text_database_manager.h
@@ -0,0 +1,311 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_
+#define CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_
+
+#include <set>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/file_path.h"
+#include "base/gtest_prod_util.h"
+#include "base/string16.h"
+#include "base/task.h"
+#include "chrome/browser/history/history_types.h"
+#include "chrome/browser/history/text_database.h"
+#include "chrome/browser/history/query_parser.h"
+#include "chrome/browser/history/url_database.h"
+#include "chrome/common/mru_cache.h"
+
+struct sqlite3;
+
+namespace history {
+
+class HistoryPublisher;
+class VisitDatabase;
+
+// Manages a set of text databases representing different time periods. This
+// will page them in and out as necessary, and will manage queries for times
+// spanning multiple databases.
+//
+// It will also keep a list of partial changes, such as page adds and title and
+// body sets, all of which come in at different times for a given page. When
+// all data is received or enough time has elapsed since adding, the indexed
+// data will be comitted.
+//
+// This allows us to minimize inserts and modifications, which are slow for the
+// full text database, since each page's information is added exactly once.
+//
+// Note: be careful to delete the relevant entries from this uncommitted list
+// when clearing history or this information may get added to the database soon
+// after the clear.
+class TextDatabaseManager {
+ public:
+ // Tracks a set of changes (only deletes need to be supported now) to the
+ // databases. This is opaque to the caller, but allows it to pass back a list
+ // of all database that it has caused a change to.
+ //
+ // This is necessary for the feature where we optimize full text databases
+ // which have changed as a result of the user deleting history via
+ // OptimizeChangedDatabases. We want to do each affected database only once at
+ // the end of the delete, but we don't want the caller to have to worry about
+ // our internals.
+ class ChangeSet {
+ public:
+ ChangeSet() {}
+
+ private:
+ friend class TextDatabaseManager;
+
+ typedef std::set<TextDatabase::DBIdent> DBSet;
+
+ void Add(TextDatabase::DBIdent id) { changed_databases_.insert(id); }
+
+ DBSet changed_databases_;
+ };
+
+ // You must call Init() to complete initialization.
+ //
+ // |dir| is the directory that will hold the full text database files (there
+ // will be many files named by their date ranges).
+ //
+ // The visit database is a pointer owned by the caller for the main database
+ // (of recent visits). The visit database will be updated to refer to the
+ // added text database entries.
+ explicit TextDatabaseManager(const FilePath& dir,
+ URLDatabase* url_database,
+ VisitDatabase* visit_database);
+ ~TextDatabaseManager();
+
+ // Must call before using other functions. If it returns false, no other
+ // functions should be called.
+ bool Init(const HistoryPublisher* history_publisher);
+
+ // Returns the directory that holds the full text database files.
+ const FilePath& GetDir() { return dir_; }
+
+ // Allows scoping updates. This also allows things to go faster since every
+ // page add doesn't need to be committed to disk (slow). Note that files will
+ // still get created during a transaction.
+ void BeginTransaction();
+ void CommitTransaction();
+
+ // Sets specific information for the given page to be added to the database.
+ // In normal operation, URLs will be added as the user visits them, the titles
+ // and bodies will come in some time after that. These changes will be
+ // automatically coalesced and added to the database some time in the future
+ // using AddPageData().
+ //
+ // AddPageURL must be called for a given URL (+ its corresponding ID) before
+ // either the title or body set. The visit ID specifies the visit that will
+ // get updated to refer to the full text indexed information. The visit time
+ // should be the time corresponding to that visit in the database.
+ void AddPageURL(const GURL& url, URLID url_id, VisitID visit_id,
+ base::Time visit_time);
+ void AddPageTitle(const GURL& url, const string16& title);
+ void AddPageContents(const GURL& url, const string16& body);
+
+ // Adds the given data to the appropriate database file, returning true on
+ // success. The visit database row identified by |visit_id| will be updated
+ // to refer to the full text index entry. If the visit ID is 0, the visit
+ // database will not be updated.
+ bool AddPageData(const GURL& url,
+ URLID url_id,
+ VisitID visit_id,
+ base::Time visit_time,
+ const string16& title,
+ const string16& body);
+
+ // Deletes the instance of indexed data identified by the given time and URL.
+ // Any changes will be tracked in the optional change set for use when calling
+ // OptimizeChangedDatabases later. change_set can be NULL.
+ void DeletePageData(base::Time time, const GURL& url,
+ ChangeSet* change_set);
+
+ // The text database manager keeps a list of changes that are made to the
+ // file AddPageURL/Title/Body that may not be committed to the database yet.
+ // This function removes entires from this list happening between the given
+ // time range. It is called when the user clears their history for a time
+ // range, and we don't want any of our data to "leak." If restrict_urls is
+ // not empty, only changes on those URLs are deleted.
+ //
+ // Either or both times my be is_null to be unbounded in that direction. When
+ // non-null, the range is [begin, end).
+ void DeleteFromUncommitted(const std::set<GURL>& restrict_urls,
+ base::Time begin, base::Time end);
+
+ // Deletes all full text search data by removing the files from the disk.
+ // This must be called OUTSIDE of a transaction since it actually deletes the
+ // files rather than messing with the database.
+ void DeleteAll();
+
+ // Calls optimize on all the databases identified in a given change set (see
+ // the definition of ChangeSet above for more). Optimizing means that old data
+ // will be removed rather than marked unused.
+ void OptimizeChangedDatabases(const ChangeSet& change_set);
+
+ // Executes the given query. See QueryOptions for more info on input.
+ //
+ // The results are filled into |results|, and the first time considered for
+ // the output is in |first_time_searched| (see QueryResults for more).
+ //
+ // This function will return more than one match per URL if there is more than
+ // one entry for that URL in the database.
+ void GetTextMatches(const string16& query,
+ const QueryOptions& options,
+ std::vector<TextDatabase::Match>* results,
+ base::Time* first_time_searched);
+
+ private:
+ // These tests call ExpireRecentChangesForTime to force expiration.
+ FRIEND_TEST_ALL_PREFIXES(TextDatabaseManagerTest, InsertPartial);
+ FRIEND_TEST_ALL_PREFIXES(TextDatabaseManagerTest, PartialComplete);
+ FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, DeleteURLAndFavicon);
+ FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, FlushRecentURLsUnstarred);
+ FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest,
+ FlushRecentURLsUnstarredRestricted);
+
+ // Stores "recent stuff" that has happened with the page, since the page
+ // visit, title, and body all come in at different times.
+ class PageInfo {
+ public:
+ PageInfo(URLID url_id, VisitID visit_id, base::Time visit_time);
+
+ // Getters.
+ URLID url_id() const { return url_id_; }
+ VisitID visit_id() const { return visit_id_; }
+ base::Time visit_time() const { return visit_time_; }
+ const string16& title() const { return title_; }
+ const string16& body() const { return body_; }
+
+ // Setters, we can only update the title and body.
+ void set_title(const string16& ttl);
+ void set_body(const string16& bdy);
+
+ // Returns true if both the title or body of the entry has been set. Since
+ // both the title and body setters will "fix" empty strings to be a space,
+ // these indicate if the setter was ever called.
+ bool has_title() const { return !title_.empty(); }
+ bool has_body() { return !body_.empty(); }
+
+ // Returns true if this entry was added too long ago and we should give up
+ // waiting for more data. The current time is passed in as an argument so we
+ // can check many without re-querying the timer.
+ bool Expired(base::TimeTicks now) const;
+
+ private:
+ URLID url_id_;
+ VisitID visit_id_;
+
+ // Time of the visit of the URL. This will be the value stored in the URL
+ // and visit tables for the entry.
+ base::Time visit_time_;
+
+ // When this page entry was created. We have a cap on the maximum time that
+ // an entry will be in the queue before being flushed to the database.
+ base::TimeTicks added_time_;
+
+ // Will be the string " " when they are set to distinguish set and unset.
+ string16 title_;
+ string16 body_;
+ };
+
+ // Converts the given time to a database identifier or vice-versa.
+ static TextDatabase::DBIdent TimeToID(base::Time time);
+ static base::Time IDToTime(TextDatabase::DBIdent id);
+
+ // Returns a text database for the given identifier or time. This file will
+ // be created if it doesn't exist and |for_writing| is set. On error,
+ // including the case where the file doesn't exist and |for_writing|
+ // is false, it will return NULL.
+ //
+ // When |for_writing| is set, a transaction on the database will be opened
+ // if there is a transaction open on this manager.
+ //
+ // The pointer will be tracked in the cache. The caller should not store it
+ // or delete it since it will get automatically deleted as necessary.
+ TextDatabase* GetDB(TextDatabase::DBIdent id, bool for_writing);
+ TextDatabase* GetDBForTime(base::Time time, bool for_writing);
+
+ // Populates the present_databases_ list based on which files are on disk.
+ // When the list is already initialized, this will do nothing, so you can
+ // call it whenever you want to ensure the present_databases_ set is filled.
+ void InitDBList();
+
+ // Schedules a call to ExpireRecentChanges in the future.
+ void ScheduleFlushOldChanges();
+
+ // Checks the recent_changes_ list and commits partial data that has been
+ // around too long.
+ void FlushOldChanges();
+
+ // Given "now," this will expire old things from the recent_changes_ list.
+ // This is used as the backend for FlushOldChanges and is called directly
+ // by the unit tests with fake times.
+ void FlushOldChangesForTime(base::TimeTicks now);
+
+ // Directory holding our index files.
+ const FilePath dir_;
+
+ // Non-owning pointers to the recent history databases for URLs and visits.
+ URLDatabase* url_database_;
+ VisitDatabase* visit_database_;
+
+ // Lists recent additions that we have not yet filled out with the title and
+ // body. Sorted by time, we will flush them when they are complete or have
+ // been in the queue too long without modification.
+ //
+ // We kind of abuse the MRUCache because we never move things around in it
+ // using Get. Instead, we keep them in the order they were inserted, since
+ // this is the metric we use to measure age. The MRUCache gives us an ordered
+ // list with fast lookup by URL.
+ typedef MRUCache<GURL, PageInfo> RecentChangeList;
+ RecentChangeList recent_changes_;
+
+ // Nesting levels of transactions. Since sqlite only allows one open
+ // transaction, we simulate nested transactions by mapping the outermost one
+ // to a real transaction. Since this object never needs to do ROLLBACK, losing
+ // the ability for all transactions to rollback is inconsequential.
+ int transaction_nesting_;
+
+ // The cache owns the TextDatabase pointers, they will be automagically
+ // deleted when the cache entry is removed or expired.
+ typedef OwningMRUCache<TextDatabase::DBIdent, TextDatabase*> DBCache;
+ DBCache db_cache_;
+
+ // Tells us about the existance of database files on disk. All existing
+ // databases will be in here, and non-existant ones will not, so we don't
+ // have to check the disk every time.
+ //
+ // This set is populated LAZILY by InitDBList(), you should call that function
+ // before accessing the list.
+ //
+ // Note that iterators will work on the keys in-order. Normally, reverse
+ // iterators will be used to iterate the keys in reverse-order.
+ typedef std::set<TextDatabase::DBIdent> DBIdentSet;
+ DBIdentSet present_databases_;
+ bool present_databases_loaded_; // Set by InitDBList when populated.
+
+ // Lists all databases with open transactions. These will have to be closed
+ // when the transaction is committed.
+ DBIdentSet open_transactions_;
+
+ QueryParser query_parser_;
+
+ // Generates tasks for our periodic checking of expired "recent changes".
+ ScopedRunnableMethodFactory<TextDatabaseManager> factory_;
+
+ // This object is created and managed by the history backend. We maintain an
+ // opaque pointer to the object for our use.
+ // This can be NULL if there are no indexers registered to receive indexing
+ // data from us.
+ const HistoryPublisher* history_publisher_;
+
+ DISALLOW_COPY_AND_ASSIGN(TextDatabaseManager);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_
diff --git a/chrome/browser/history/text_database_manager_unittest.cc b/chrome/browser/history/text_database_manager_unittest.cc
new file mode 100644
index 0000000..8e7f27e
--- /dev/null
+++ b/chrome/browser/history/text_database_manager_unittest.cc
@@ -0,0 +1,537 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "app/sql/connection.h"
+#include "base/file_path.h"
+#include "base/file_util.h"
+#include "base/message_loop.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/history/text_database_manager.h"
+#include "chrome/browser/history/visit_database.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using base::Time;
+using base::TimeDelta;
+using base::TimeTicks;
+
+namespace history {
+
+namespace {
+
+const char* kURL1 = "http://www.google.com/asdf";
+const char* kTitle1 = "Google A";
+const char* kBody1 = "FOO page one.";
+
+const char* kURL2 = "http://www.google.com/qwer";
+const char* kTitle2 = "Google B";
+const char* kBody2 = "FOO two.";
+
+const char* kURL3 = "http://www.google.com/zxcv";
+const char* kTitle3 = "Google C";
+const char* kBody3 = "FOO drei";
+
+const char* kURL4 = "http://www.google.com/hjkl";
+const char* kTitle4 = "Google D";
+const char* kBody4 = "FOO lalala four.";
+
+const char* kURL5 = "http://www.google.com/uiop";
+const char* kTitle5 = "Google cinq";
+const char* kBody5 = "FOO page one.";
+
+// This provides a simple implementation of a URL+VisitDatabase using an
+// in-memory sqlite connection. The text database manager expects to be able to
+// update the visit database to keep in sync.
+class InMemDB : public URLDatabase, public VisitDatabase {
+ public:
+ InMemDB() {
+ EXPECT_TRUE(db_.OpenInMemory());
+ CreateURLTable(false);
+ InitVisitTable();
+ }
+ ~InMemDB() {
+ }
+
+ private:
+ virtual sql::Connection& GetDB() { return db_; }
+
+ sql::Connection db_;
+
+ DISALLOW_COPY_AND_ASSIGN(InMemDB);
+};
+
+// Adds all the pages once, and the first page once more in the next month.
+// The times of all the pages will be filled into |*times|.
+void AddAllPages(TextDatabaseManager& manager, VisitDatabase* visit_db,
+ std::vector<Time>* times) {
+ Time::Exploded exploded;
+ memset(&exploded, 0, sizeof(Time::Exploded));
+
+ // Put the visits in two different months so it will query across databases.
+ exploded.year = 2008;
+ exploded.month = 1;
+ exploded.day_of_month = 3;
+
+ VisitRow visit_row;
+ visit_row.url_id = 1;
+ visit_row.visit_time = Time::FromUTCExploded(exploded);
+ visit_row.referring_visit = 0;
+ visit_row.transition = 0;
+ visit_row.segment_id = 0;
+ visit_row.is_indexed = false;
+ VisitID visit_id = visit_db->AddVisit(&visit_row);
+
+ times->push_back(visit_row.visit_time);
+ manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id,
+ visit_row.visit_time, UTF8ToUTF16(kTitle1),
+ UTF8ToUTF16(kBody1));
+
+ exploded.day_of_month++;
+ visit_row.url_id = 2;
+ visit_row.visit_time = Time::FromUTCExploded(exploded);
+ visit_id = visit_db->AddVisit(&visit_row);
+ times->push_back(visit_row.visit_time);
+ manager.AddPageData(GURL(kURL2), visit_row.url_id, visit_row.visit_id,
+ visit_row.visit_time, UTF8ToUTF16(kTitle2),
+ UTF8ToUTF16(kBody2));
+
+ exploded.day_of_month++;
+ visit_row.url_id = 2;
+ visit_row.visit_time = Time::FromUTCExploded(exploded);
+ visit_id = visit_db->AddVisit(&visit_row);
+ times->push_back(visit_row.visit_time);
+ manager.AddPageData(GURL(kURL3), visit_row.url_id, visit_row.visit_id,
+ visit_row.visit_time, UTF8ToUTF16(kTitle3),
+ UTF8ToUTF16(kBody3));
+
+ // Put the next ones in the next month.
+ exploded.month++;
+ visit_row.url_id = 2;
+ visit_row.visit_time = Time::FromUTCExploded(exploded);
+ visit_id = visit_db->AddVisit(&visit_row);
+ times->push_back(visit_row.visit_time);
+ manager.AddPageData(GURL(kURL4), visit_row.url_id, visit_row.visit_id,
+ visit_row.visit_time, UTF8ToUTF16(kTitle4),
+ UTF8ToUTF16(kBody4));
+
+ exploded.day_of_month++;
+ visit_row.url_id = 2;
+ visit_row.visit_time = Time::FromUTCExploded(exploded);
+ visit_id = visit_db->AddVisit(&visit_row);
+ times->push_back(visit_row.visit_time);
+ manager.AddPageData(GURL(kURL5), visit_row.url_id, visit_row.visit_id,
+ visit_row.visit_time, UTF8ToUTF16(kTitle5),
+ UTF8ToUTF16(kBody5));
+
+ // Put the first one in again in the second month.
+ exploded.day_of_month++;
+ visit_row.url_id = 2;
+ visit_row.visit_time = Time::FromUTCExploded(exploded);
+ visit_id = visit_db->AddVisit(&visit_row);
+ times->push_back(visit_row.visit_time);
+ manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id,
+ visit_row.visit_time, UTF8ToUTF16(kTitle1),
+ UTF8ToUTF16(kBody1));
+}
+
+bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results,
+ const char* url) {
+ GURL gurl(url);
+ for (size_t i = 0; i < results.size(); i++) {
+ if (results[i].url == gurl)
+ return true;
+ }
+ return false;
+}
+
+} // namespace
+
+class TextDatabaseManagerTest : public testing::Test {
+ public:
+ // Called manually by the test so it can report failure to initialize.
+ bool Init() {
+ return file_util::CreateNewTempDirectory(
+ FILE_PATH_LITERAL("TestSearchTest"), &dir_);
+ }
+
+ protected:
+ void SetUp() {
+ }
+
+ void TearDown() {
+ file_util::Delete(dir_, true);
+ }
+
+ MessageLoop message_loop_;
+
+ // Directory containing the databases.
+ FilePath dir_;
+};
+
+// Tests basic querying.
+TEST_F(TextDatabaseManagerTest, InsertQuery) {
+ ASSERT_TRUE(Init());
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
+ ASSERT_TRUE(manager.Init(NULL));
+
+ std::vector<Time> times;
+ AddAllPages(manager, &visit_db, &times);
+
+ QueryOptions options;
+ options.begin_time = times[0] - TimeDelta::FromDays(100);
+ options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
+ std::vector<TextDatabase::Match> results;
+ Time first_time_searched;
+ manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
+ &results, &first_time_searched);
+
+ // We should have matched every page.
+ EXPECT_EQ(6U, results.size());
+ EXPECT_TRUE(ResultsHaveURL(results, kURL1));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL2));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL3));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL4));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL5));
+
+ // The first time searched should have been the first page's time or before
+ // (it could have eliminated some time for us).
+ EXPECT_TRUE(first_time_searched <= times[0]);
+}
+
+// Tests that adding page components piecemeal will get them added properly.
+// This does not supply a visit to update, this mode is used only by the unit
+// tests right now, but we test it anyway.
+TEST_F(TextDatabaseManagerTest, InsertCompleteNoVisit) {
+ ASSERT_TRUE(Init());
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
+ ASSERT_TRUE(manager.Init(NULL));
+
+ // First add one without a visit.
+ const GURL url(kURL1);
+ manager.AddPageURL(url, 0, 0, Time::Now());
+ manager.AddPageTitle(url, UTF8ToUTF16(kTitle1));
+ manager.AddPageContents(url, UTF8ToUTF16(kBody1));
+
+ // Check that the page got added.
+ QueryOptions options;
+ std::vector<TextDatabase::Match> results;
+ Time first_time_searched;
+
+ manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
+ &results, &first_time_searched);
+ ASSERT_EQ(1U, results.size());
+ EXPECT_EQ(kTitle1, UTF16ToUTF8(results[0].title));
+}
+
+// Like InsertCompleteNoVisit but specifies a visit to update. We check that the
+// visit was updated properly.
+TEST_F(TextDatabaseManagerTest, InsertCompleteVisit) {
+ ASSERT_TRUE(Init());
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
+ ASSERT_TRUE(manager.Init(NULL));
+
+ // First add a visit to a page. We can just make up a URL ID since there is
+ // not actually any URL database around.
+ VisitRow visit;
+ visit.url_id = 1;
+ visit.visit_time = Time::Now();
+ visit.referring_visit = 0;
+ visit.transition = PageTransition::LINK;
+ visit.segment_id = 0;
+ visit.is_indexed = false;
+ visit_db.AddVisit(&visit);
+
+ // Add a full text indexed entry for that visit.
+ const GURL url(kURL2);
+ manager.AddPageURL(url, visit.url_id, visit.visit_id, visit.visit_time);
+ manager.AddPageContents(url, UTF8ToUTF16(kBody2));
+ manager.AddPageTitle(url, UTF8ToUTF16(kTitle2));
+
+ // Check that the page got added.
+ QueryOptions options;
+ std::vector<TextDatabase::Match> results;
+ Time first_time_searched;
+
+ manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
+ &results, &first_time_searched);
+ ASSERT_EQ(1U, results.size());
+ EXPECT_EQ(kTitle2, UTF16ToUTF8(results[0].title));
+
+ // Check that the visit got updated for its new indexed state.
+ VisitRow out_visit;
+ ASSERT_TRUE(visit_db.GetRowForVisit(visit.visit_id, &out_visit));
+ EXPECT_TRUE(out_visit.is_indexed);
+}
+
+// Tests that partial inserts that expire are added to the database.
+TEST_F(TextDatabaseManagerTest, InsertPartial) {
+ ASSERT_TRUE(Init());
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
+ ASSERT_TRUE(manager.Init(NULL));
+
+ // Add the first one with just a URL.
+ GURL url1(kURL1);
+ manager.AddPageURL(url1, 0, 0, Time::Now());
+
+ // Now add a second one with a URL and title.
+ GURL url2(kURL2);
+ manager.AddPageURL(url2, 0, 0, Time::Now());
+ manager.AddPageTitle(url2, UTF8ToUTF16(kTitle2));
+
+ // The third one has a URL and body.
+ GURL url3(kURL3);
+ manager.AddPageURL(url3, 0, 0, Time::Now());
+ manager.AddPageContents(url3, UTF8ToUTF16(kBody3));
+
+ // Expire stuff very fast. This assumes that the time between the first
+ // AddPageURL and this line is less than the expiration time (20 seconds).
+ TimeTicks added_time = TimeTicks::Now();
+ TimeTicks expire_time = added_time + TimeDelta::FromSeconds(5);
+ manager.FlushOldChangesForTime(expire_time);
+
+ // Do a query, nothing should be added yet.
+ QueryOptions options;
+ std::vector<TextDatabase::Match> results;
+ Time first_time_searched;
+ manager.GetTextMatches(UTF8ToUTF16("google"), options,
+ &results, &first_time_searched);
+ ASSERT_EQ(0U, results.size());
+
+ // Compute a time threshold that will cause everything to be flushed, and
+ // poke at the manager's internals to cause this to happen.
+ expire_time = added_time + TimeDelta::FromDays(1);
+ manager.FlushOldChangesForTime(expire_time);
+
+ // Now we should have all 3 URLs added.
+ manager.GetTextMatches(UTF8ToUTF16("google"), options,
+ &results, &first_time_searched);
+ ASSERT_EQ(3U, results.size());
+ EXPECT_TRUE(ResultsHaveURL(results, kURL1));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL2));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL3));
+}
+
+// Tests that partial inserts (due to timeouts) will still get updated if the
+// data comes in later.
+TEST_F(TextDatabaseManagerTest, PartialComplete) {
+ ASSERT_TRUE(Init());
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
+ ASSERT_TRUE(manager.Init(NULL));
+
+ Time added_time = Time::Now();
+ GURL url(kURL1);
+
+ // We have to have the URL in the URL and visit databases for this test to
+ // work.
+ URLRow url_row(url);
+ url_row.set_title(UTF8ToUTF16("chocolate"));
+ URLID url_id = visit_db.AddURL(url_row);
+ ASSERT_TRUE(url_id);
+ VisitRow visit_row;
+ visit_row.url_id = url_id;
+ visit_row.visit_time = added_time;
+ visit_db.AddVisit(&visit_row);
+
+ // Add a URL with no title or body, and say that it expired.
+ manager.AddPageURL(url, 0, 0, added_time);
+ TimeTicks expire_time = TimeTicks::Now() + TimeDelta::FromDays(1);
+ manager.FlushOldChangesForTime(expire_time);
+
+ // Add the title. We should be able to query based on that. The title in the
+ // URL row we set above should not come into the picture.
+ manager.AddPageTitle(url, UTF8ToUTF16("Some unique title"));
+ Time first_time_searched;
+ QueryOptions options;
+ std::vector<TextDatabase::Match> results;
+ manager.GetTextMatches(UTF8ToUTF16("unique"), options,
+ &results, &first_time_searched);
+ EXPECT_EQ(1U, results.size());
+ manager.GetTextMatches(UTF8ToUTF16("chocolate"), options,
+ &results, &first_time_searched);
+ EXPECT_EQ(0U, results.size());
+
+ // Now add the body, which should be queryable.
+ manager.AddPageContents(url, UTF8ToUTF16("Very awesome body"));
+ manager.GetTextMatches(UTF8ToUTF16("awesome"), options, &results, &first_time_searched);
+ EXPECT_EQ(1U, results.size());
+
+ // Adding the body will actually copy the title from the URL table rather
+ // than the previously indexed row (we made them not match above). This isn't
+ // necessarily what we want, but it's how it's implemented, and we don't want
+ // to regress it.
+ manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, &results, &first_time_searched);
+ EXPECT_EQ(1U, results.size());
+}
+
+// Tests that changes get properly committed to disk.
+TEST_F(TextDatabaseManagerTest, Writing) {
+ ASSERT_TRUE(Init());
+
+ QueryOptions options;
+ std::vector<TextDatabase::Match> results;
+ Time first_time_searched;
+
+ InMemDB visit_db;
+
+ // Create the manager and write some stuff to it.
+ {
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
+ ASSERT_TRUE(manager.Init(NULL));
+
+ std::vector<Time> times;
+ AddAllPages(manager, &visit_db, &times);
+
+ // We should have matched every page.
+ manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched);
+ EXPECT_EQ(6U, results.size());
+ }
+ results.clear();
+
+ // Recreate the manager and make sure it finds the written stuff.
+ {
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
+ ASSERT_TRUE(manager.Init(NULL));
+
+ // We should have matched every page again.
+ manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
+ &results, &first_time_searched);
+ EXPECT_EQ(6U, results.size());
+ }
+}
+
+// Tests that changes get properly committed to disk, as in the Writing test
+// above, but when there is a transaction around the adds.
+TEST_F(TextDatabaseManagerTest, WritingTransaction) {
+ ASSERT_TRUE(Init());
+
+ QueryOptions options;
+ std::vector<TextDatabase::Match> results;
+ Time first_time_searched;
+
+ InMemDB visit_db;
+
+ // Create the manager and write some stuff to it.
+ {
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
+ ASSERT_TRUE(manager.Init(NULL));
+
+ std::vector<Time> times;
+ manager.BeginTransaction();
+ AddAllPages(manager, &visit_db, &times);
+ // "Forget" to commit, it should be autocommittedd for us.
+
+ // We should have matched every page.
+ manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
+ &results, &first_time_searched);
+ EXPECT_EQ(6U, results.size());
+ }
+ results.clear();
+
+ // Recreate the manager and make sure it finds the written stuff.
+ {
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
+ ASSERT_TRUE(manager.Init(NULL));
+
+ // We should have matched every page again.
+ manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
+ &results, &first_time_searched);
+ EXPECT_EQ(6U, results.size());
+ }
+}
+
+// Tests querying where the maximum number of items is met.
+TEST_F(TextDatabaseManagerTest, QueryMax) {
+ ASSERT_TRUE(Init());
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
+ ASSERT_TRUE(manager.Init(NULL));
+
+ std::vector<Time> times;
+ AddAllPages(manager, &visit_db, &times);
+
+ string16 foo = UTF8ToUTF16("FOO");
+
+ QueryOptions options;
+ options.begin_time = times[0] - TimeDelta::FromDays(100);
+ options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
+ options.max_count = 2;
+ std::vector<TextDatabase::Match> results;
+ Time first_time_searched;
+ manager.GetTextMatches(foo, options, &results, &first_time_searched);
+
+ // We should have gotten the last two pages as results (the first page is
+ // also the last).
+ EXPECT_EQ(2U, results.size());
+ EXPECT_TRUE(first_time_searched <= times[4]);
+ EXPECT_TRUE(ResultsHaveURL(results, kURL5));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL1));
+
+ // Asking for 4 pages, the first one should be in another DB.
+ options.max_count = 4;
+ manager.GetTextMatches(foo, options, &results, &first_time_searched);
+
+ EXPECT_EQ(4U, results.size());
+ EXPECT_TRUE(first_time_searched <= times[4]);
+ EXPECT_TRUE(ResultsHaveURL(results, kURL3));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL4));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL5));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL1));
+}
+
+// Tests querying backwards in time in chunks.
+TEST_F(TextDatabaseManagerTest, QueryBackwards) {
+ ASSERT_TRUE(Init());
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
+ ASSERT_TRUE(manager.Init(NULL));
+
+ std::vector<Time> times;
+ AddAllPages(manager, &visit_db, &times);
+
+ string16 foo = UTF8ToUTF16("FOO");
+
+ // First do a query for all time, but with a max of 2. This will give us the
+ // last two results and will tell us where to start searching when we want
+ // to go back in time.
+ QueryOptions options;
+ options.begin_time = times[0] - TimeDelta::FromDays(100);
+ options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
+ options.max_count = 2;
+ std::vector<TextDatabase::Match> results;
+ Time first_time_searched;
+ manager.GetTextMatches(foo, options, &results, &first_time_searched);
+
+ // Check that we got the last two results.
+ EXPECT_EQ(2U, results.size());
+ EXPECT_TRUE(first_time_searched <= times[4]);
+ EXPECT_TRUE(ResultsHaveURL(results, kURL5));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL1));
+
+ // Query the previous two URLs and make sure we got the correct ones.
+ options.end_time = first_time_searched;
+ manager.GetTextMatches(foo, options, &results, &first_time_searched);
+ EXPECT_EQ(2U, results.size());
+ EXPECT_TRUE(first_time_searched <= times[2]);
+ EXPECT_TRUE(ResultsHaveURL(results, kURL3));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL4));
+
+ // Query the previous two URLs...
+ options.end_time = first_time_searched;
+ manager.GetTextMatches(foo, options, &results, &first_time_searched);
+ EXPECT_EQ(2U, results.size());
+ EXPECT_TRUE(first_time_searched <= times[0]);
+ EXPECT_TRUE(ResultsHaveURL(results, kURL2));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL1));
+
+ // Try to query some more, there should be no results.
+ options.end_time = first_time_searched;
+ manager.GetTextMatches(foo, options, &results, &first_time_searched);
+ EXPECT_EQ(0U, results.size());
+}
+
+} // namespace history
diff --git a/chrome/browser/history/text_database_unittest.cc b/chrome/browser/history/text_database_unittest.cc
new file mode 100644
index 0000000..f604301
--- /dev/null
+++ b/chrome/browser/history/text_database_unittest.cc
@@ -0,0 +1,332 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string>
+
+#include "base/file_util.h"
+#include "base/path_service.h"
+#include "base/scoped_ptr.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/history/text_database.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/platform_test.h"
+
+using base::Time;
+
+namespace history {
+
+namespace {
+
+// Note that all pages have "COUNTTAG" which allows us to count the number of
+// pages in the database withoujt adding any extra functions to the DB object.
+const char kURL1[] = "http://www.google.com/";
+const int kTime1 = 1000;
+const char kTitle1[] = "Google";
+const char kBody1[] =
+ "COUNTTAG Web Images Maps News Shopping Gmail more My Account | "
+ "Sign out Advanced Search Preferences Language Tools Advertising Programs "
+ "- Business Solutions - About Google, 2008 Google";
+
+const char kURL2[] = "http://images.google.com/";
+const int kTime2 = 2000;
+const char kTitle2[] = "Google Image Search";
+const char kBody2[] =
+ "COUNTTAG Web Images Maps News Shopping Gmail more My Account | "
+ "Sign out Advanced Image Search Preferences The most comprehensive image "
+ "search on the web. Want to help improve Google Image Search? Try Google "
+ "Image Labeler. Advertising Programs - Business Solutions - About Google "
+ "2008 Google";
+
+const char kURL3[] = "http://slashdot.org/";
+const int kTime3 = 3000;
+const char kTitle3[] = "Slashdot: News for nerds, stuff that matters";
+const char kBody3[] =
+ "COUNTTAG Slashdot Log In Create Account Subscribe Firehose Why "
+ "Log In? Why Subscribe? Nickname Password Public Terminal Sections "
+ "Main Apple AskSlashdot Backslash Books Developers Games Hardware "
+ "Interviews IT Linux Mobile Politics Science YRO";
+
+// Returns the number of rows currently in the database.
+int RowCount(TextDatabase* db) {
+ QueryOptions options;
+ options.begin_time = Time::FromInternalValue(0);
+ // Leave end_time at now.
+
+ std::vector<TextDatabase::Match> results;
+ Time first_time_searched;
+ TextDatabase::URLSet unique_urls;
+ db->GetTextMatches("COUNTTAG", options, &results, &unique_urls,
+ &first_time_searched);
+ return static_cast<int>(results.size());
+}
+
+// Adds each of the test pages to the database.
+void AddAllTestData(TextDatabase* db) {
+ EXPECT_TRUE(db->AddPageData(
+ Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1));
+ EXPECT_TRUE(db->AddPageData(
+ Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2));
+ EXPECT_TRUE(db->AddPageData(
+ Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3));
+ EXPECT_EQ(3, RowCount(db));
+}
+
+bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results,
+ const char* url) {
+ GURL gurl(url);
+ for (size_t i = 0; i < results.size(); i++) {
+ if (results[i].url == gurl)
+ return true;
+ }
+ return false;
+}
+
+} // namespace
+
+class TextDatabaseTest : public PlatformTest {
+ public:
+ TextDatabaseTest() : db_(NULL) {
+ }
+
+ protected:
+ void SetUp() {
+ PlatformTest::SetUp();
+ PathService::Get(base::DIR_TEMP, &temp_path_);
+ }
+
+ void TearDown() {
+ for (size_t i = 0; i < opened_files_.size(); i++)
+ file_util::Delete(opened_files_[i], false);
+ file_util::Delete(file_name_, false);
+ PlatformTest::TearDown();
+ }
+
+ // Create databases with this function, which will ensure that the files are
+ // deleted on shutdown. Only open one database for each file. Returns NULL on
+ // failure.
+ //
+ // Set |delete_file| to delete any existing file. If we are trying to create
+ // the file for the first time, we don't want a previous test left in a
+ // weird state to have left a file that would affect us.
+ TextDatabase* CreateDB(TextDatabase::DBIdent id,
+ bool allow_create,
+ bool delete_file) {
+ TextDatabase* db = new TextDatabase(temp_path_, id, allow_create);
+
+ if (delete_file)
+ file_util::Delete(db->file_name(), false);
+
+ if (!db->Init()) {
+ delete db;
+ return NULL;
+ }
+ opened_files_.push_back(db->file_name());
+ return db;
+ }
+
+ // Directory containing the databases.
+ FilePath temp_path_;
+
+ // Name of the main database file.
+ FilePath file_name_;
+ sqlite3* db_;
+
+ std::vector<FilePath> opened_files_;
+};
+
+TEST_F(TextDatabaseTest, AttachDetach) {
+ // First database with one page.
+ const int kIdee1 = 200801;
+ scoped_ptr<TextDatabase> db1(CreateDB(kIdee1, true, true));
+ ASSERT_TRUE(!!db1.get());
+ EXPECT_TRUE(db1->AddPageData(
+ Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1));
+
+ // Second database with one page.
+ const int kIdee2 = 200802;
+ scoped_ptr<TextDatabase> db2(CreateDB(kIdee2, true, true));
+ ASSERT_TRUE(!!db2.get());
+ EXPECT_TRUE(db2->AddPageData(
+ Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2));
+
+ // Detach, then reattach database one. The file should exist, so we force
+ // opening an existing file.
+ db1.reset();
+ db1.reset(CreateDB(kIdee1, false, false));
+ ASSERT_TRUE(!!db1.get());
+
+ // We should not be able to attach this random database for which no file
+ // exists.
+ const int kIdeeNoExisto = 999999999;
+ scoped_ptr<TextDatabase> db3(CreateDB(kIdeeNoExisto, false, true));
+ EXPECT_FALSE(!!db3.get());
+}
+
+TEST_F(TextDatabaseTest, AddRemove) {
+ // Create a database and add some pages to it.
+ const int kIdee1 = 200801;
+ scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
+ ASSERT_TRUE(!!db.get());
+ URLID id1 = db->AddPageData(
+ Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1);
+ EXPECT_NE(0, id1);
+ URLID id2 = db->AddPageData(
+ Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2);
+ EXPECT_NE(0, id2);
+ URLID id3 = db->AddPageData(
+ Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3);
+ EXPECT_NE(0, id3);
+ EXPECT_EQ(3, RowCount(db.get()));
+
+ // Make sure we can delete some of the data.
+ db->DeletePageData(Time::FromInternalValue(kTime1), kURL1);
+ EXPECT_EQ(2, RowCount(db.get()));
+
+ // Close and reopen.
+ db.reset(new TextDatabase(temp_path_, kIdee1, false));
+ EXPECT_TRUE(db->Init());
+
+ // Verify that the deleted ID is gone and try to delete another one.
+ EXPECT_EQ(2, RowCount(db.get()));
+ db->DeletePageData(Time::FromInternalValue(kTime2), kURL2);
+ EXPECT_EQ(1, RowCount(db.get()));
+}
+
+TEST_F(TextDatabaseTest, Query) {
+ // Make a database with some pages.
+ const int kIdee1 = 200801;
+ scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
+ EXPECT_TRUE(!!db.get());
+ AddAllTestData(db.get());
+
+ // Get all the results.
+ QueryOptions options;
+ options.begin_time = Time::FromInternalValue(0);
+
+ std::vector<TextDatabase::Match> results;
+ Time first_time_searched;
+ TextDatabase::URLSet unique_urls;
+ db->GetTextMatches("COUNTTAG", options, &results, &unique_urls,
+ &first_time_searched);
+ EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
+
+ // All 3 sites should be returned in order.
+ ASSERT_EQ(3U, results.size());
+ EXPECT_EQ(GURL(kURL1), results[2].url);
+ EXPECT_EQ(GURL(kURL2), results[1].url);
+ EXPECT_EQ(GURL(kURL3), results[0].url);
+
+ // Verify the info on those results.
+ EXPECT_TRUE(Time::FromInternalValue(kTime1) == results[2].time);
+ EXPECT_TRUE(Time::FromInternalValue(kTime2) == results[1].time);
+ EXPECT_TRUE(Time::FromInternalValue(kTime3) == results[0].time);
+
+ EXPECT_EQ(std::string(kTitle1), UTF16ToUTF8(results[2].title));
+ EXPECT_EQ(std::string(kTitle2), UTF16ToUTF8(results[1].title));
+ EXPECT_EQ(std::string(kTitle3), UTF16ToUTF8(results[0].title));
+
+ // Should have no matches in the title.
+ EXPECT_EQ(0U, results[0].title_match_positions.size());
+ EXPECT_EQ(0U, results[1].title_match_positions.size());
+ EXPECT_EQ(0U, results[2].title_match_positions.size());
+
+ // We don't want to be dependent on the exact snippet algorithm, but we know
+ // since we searched for "COUNTTAG" which occurs at the beginning of each
+ // document, that each snippet should start with that.
+ EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[0].snippet.text()),
+ "COUNTTAG", false));
+ EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[1].snippet.text()),
+ "COUNTTAG", false));
+ EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[2].snippet.text()),
+ "COUNTTAG", false));
+}
+
+TEST_F(TextDatabaseTest, TimeRange) {
+ // Make a database with some pages.
+ const int kIdee1 = 200801;
+ scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
+ ASSERT_TRUE(!!db.get());
+ AddAllTestData(db.get());
+
+ // Beginning should be inclusive, and the ending exclusive.
+ // Get all the results.
+ QueryOptions options;
+ options.begin_time = Time::FromInternalValue(kTime1);
+ options.end_time = Time::FromInternalValue(kTime3);
+
+ std::vector<TextDatabase::Match> results;
+ Time first_time_searched;
+ TextDatabase::URLSet unique_urls;
+ db->GetTextMatches("COUNTTAG", options, &results, &unique_urls,
+ &first_time_searched);
+ EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
+
+ // The first and second should have been returned.
+ EXPECT_EQ(2U, results.size());
+ EXPECT_TRUE(ResultsHaveURL(results, kURL1));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL2));
+ EXPECT_FALSE(ResultsHaveURL(results, kURL3));
+ EXPECT_EQ(kTime1, first_time_searched.ToInternalValue());
+
+ // ---------------------------------------------------------------------------
+ // Do a query where there isn't a result on the begin boundary, so we can
+ // test that the first time searched is set to the minimum time considered
+ // instead of the min value.
+ options.begin_time = Time::FromInternalValue((kTime2 - kTime1) / 2 + kTime1);
+ options.end_time = Time::FromInternalValue(kTime3 + 1);
+ results.clear(); // GetTextMatches does *not* clear the results.
+ db->GetTextMatches("COUNTTAG", options, &results, &unique_urls,
+ &first_time_searched);
+ EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
+ EXPECT_EQ(options.begin_time.ToInternalValue(),
+ first_time_searched.ToInternalValue());
+
+ // Should have two results, the second and third.
+ EXPECT_EQ(2U, results.size());
+ EXPECT_FALSE(ResultsHaveURL(results, kURL1));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL2));
+ EXPECT_TRUE(ResultsHaveURL(results, kURL3));
+
+ // No results should also set the first_time_searched.
+ options.begin_time = Time::FromInternalValue(kTime3 + 1);
+ options.end_time = Time::FromInternalValue(kTime3 * 100);
+ results.clear();
+ db->GetTextMatches("COUNTTAG", options, &results, &unique_urls,
+ &first_time_searched);
+ EXPECT_EQ(options.begin_time.ToInternalValue(),
+ first_time_searched.ToInternalValue());
+}
+
+// Make sure that max_count works.
+TEST_F(TextDatabaseTest, MaxCount) {
+ // Make a database with some pages.
+ const int kIdee1 = 200801;
+ scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
+ ASSERT_TRUE(!!db.get());
+ AddAllTestData(db.get());
+
+ // Set up the query to return all the results with "Google" (should be 2), but
+ // with a maximum of 1.
+ QueryOptions options;
+ options.begin_time = Time::FromInternalValue(kTime1);
+ options.end_time = Time::FromInternalValue(kTime3 + 1);
+ options.max_count = 1;
+
+ std::vector<TextDatabase::Match> results;
+ Time first_time_searched;
+ TextDatabase::URLSet unique_urls;
+ db->GetTextMatches("google", options, &results, &unique_urls,
+ &first_time_searched);
+ EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
+
+ // There should be one result, the most recent one.
+ EXPECT_EQ(1U, results.size());
+ EXPECT_TRUE(ResultsHaveURL(results, kURL2));
+
+ // The max time considered should be the date of the returned item.
+ EXPECT_EQ(kTime2, first_time_searched.ToInternalValue());
+}
+
+} // namespace history
diff --git a/chrome/browser/history/thumbnail_database.cc b/chrome/browser/history/thumbnail_database.cc
new file mode 100644
index 0000000..8bf203d
--- /dev/null
+++ b/chrome/browser/history/thumbnail_database.cc
@@ -0,0 +1,551 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/thumbnail_database.h"
+
+#include "app/sql/statement.h"
+#include "app/sql/transaction.h"
+#include "base/command_line.h"
+#include "base/file_util.h"
+#if defined(OS_MACOSX)
+#include "base/mac_util.h"
+#endif
+#include "base/ref_counted_memory.h"
+#include "base/time.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/diagnostics/sqlite_diagnostics.h"
+#include "chrome/browser/history/history_publisher.h"
+#include "chrome/browser/history/url_database.h"
+#include "chrome/common/chrome_switches.h"
+#include "chrome/common/thumbnail_score.h"
+#include "gfx/codec/jpeg_codec.h"
+#include "third_party/skia/include/core/SkBitmap.h"
+
+namespace history {
+
+// Version number of the database.
+static const int kCurrentVersionNumber = 3;
+static const int kCompatibleVersionNumber = 3;
+
+ThumbnailDatabase::ThumbnailDatabase() : history_publisher_(NULL),
+ use_top_sites_(false) {
+}
+
+ThumbnailDatabase::~ThumbnailDatabase() {
+ // The DBCloseScoper will delete the DB and the cache.
+}
+
+sql::InitStatus ThumbnailDatabase::Init(
+ const FilePath& db_name,
+ const HistoryPublisher* history_publisher) {
+ history_publisher_ = history_publisher;
+ sql::InitStatus status = OpenDatabase(&db_, db_name);
+ if (status != sql::INIT_OK)
+ return status;
+
+ // Scope initialization in a transaction so we can't be partially initialized.
+ sql::Transaction transaction(&db_);
+ transaction.Begin();
+
+#if defined(OS_MACOSX)
+ // Exclude the thumbnails file and its journal from backups.
+ mac_util::SetFileBackupExclusion(db_name, true);
+ FilePath::StringType db_name_string(db_name.value());
+ db_name_string += "-journal";
+ FilePath db_journal_name(db_name_string);
+ mac_util::SetFileBackupExclusion(db_journal_name, true);
+#endif
+
+ // Create the tables.
+ if (!meta_table_.Init(&db_, kCurrentVersionNumber,
+ kCompatibleVersionNumber) ||
+ !InitThumbnailTable() ||
+ !InitFavIconsTable(&db_, false)) {
+ db_.Close();
+ return sql::INIT_FAILURE;
+ }
+ InitFavIconsIndex();
+
+ // Version check. We should not encounter a database too old for us to handle
+ // in the wild, so we try to continue in that case.
+ if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
+ LOG(WARNING) << "Thumbnail database is too new.";
+ return sql::INIT_TOO_NEW;
+ }
+
+ int cur_version = meta_table_.GetVersionNumber();
+ if (cur_version == 2) {
+ if (!UpgradeToVersion3()) {
+ LOG(WARNING) << "Unable to update to thumbnail database to version 3.";
+ db_.Close();
+ return sql::INIT_FAILURE;
+ }
+ ++cur_version;
+ }
+
+ LOG_IF(WARNING, cur_version < kCurrentVersionNumber) <<
+ "Thumbnail database version " << cur_version << " is too old to handle.";
+
+ // Initialization is complete.
+ if (!transaction.Commit()) {
+ db_.Close();
+ return sql::INIT_FAILURE;
+ }
+
+ return sql::INIT_OK;
+}
+
+sql::InitStatus ThumbnailDatabase::OpenDatabase(sql::Connection* db,
+ const FilePath& db_name) {
+ // Set the exceptional sqlite error handler.
+ db->set_error_delegate(GetErrorHandlerForThumbnailDb());
+
+ // Set the database page size to something larger to give us
+ // better performance (we're typically seek rather than bandwidth limited).
+ // This only has an effect before any tables have been created, otherwise
+ // this is a NOP. Must be a power of 2 and a max of 8192. We use a bigger
+ // one because we're storing larger data (4-16K) in it, so we want a few
+ // blocks per element.
+ db->set_page_size(4096);
+
+ // The UI is generally designed to work well when the thumbnail database is
+ // slow, so we can tolerate much less caching. The file is also very large
+ // and so caching won't save a significant percentage of it for us,
+ // reducing the benefit of caching in the first place. With the default cache
+ // size of 2000 pages, it will take >8MB of memory, so reducing it can be a
+ // big savings.
+ db->set_cache_size(64);
+
+ // Run the database in exclusive mode. Nobody else should be accessing the
+ // database while we're running, and this will give somewhat improved perf.
+ db->set_exclusive_locking();
+
+ if (!db->Open(db_name))
+ return sql::INIT_FAILURE;
+
+ return sql::INIT_OK;
+}
+
+bool ThumbnailDatabase::InitThumbnailTable() {
+ if (!db_.DoesTableExist("thumbnails")) {
+ if (CommandLine::ForCurrentProcess()-> HasSwitch(switches::kTopSites)) {
+ use_top_sites_ = true;
+ return true;
+ }
+ if (!db_.Execute("CREATE TABLE thumbnails ("
+ "url_id INTEGER PRIMARY KEY,"
+ "boring_score DOUBLE DEFAULT 1.0,"
+ "good_clipping INTEGER DEFAULT 0,"
+ "at_top INTEGER DEFAULT 0,"
+ "last_updated INTEGER DEFAULT 0,"
+ "data BLOB)"))
+ return false;
+ }
+ return true;
+}
+
+bool ThumbnailDatabase::UpgradeToVersion3() {
+ if (use_top_sites_) {
+ meta_table_.SetVersionNumber(3);
+ meta_table_.SetCompatibleVersionNumber(
+ std::min(3, kCompatibleVersionNumber));
+ return true; // Not needed after migration to TopSites.
+ }
+
+ // sqlite doesn't like the "ALTER TABLE xxx ADD (column_one, two,
+ // three)" syntax, so list out the commands we need to execute:
+ const char* alterations[] = {
+ "ALTER TABLE thumbnails ADD boring_score DOUBLE DEFAULT 1.0",
+ "ALTER TABLE thumbnails ADD good_clipping INTEGER DEFAULT 0",
+ "ALTER TABLE thumbnails ADD at_top INTEGER DEFAULT 0",
+ "ALTER TABLE thumbnails ADD last_updated INTEGER DEFAULT 0",
+ NULL
+ };
+
+ for (int i = 0; alterations[i] != NULL; ++i) {
+ if (!db_.Execute(alterations[i])) {
+ NOTREACHED();
+ return false;
+ }
+ }
+
+ meta_table_.SetVersionNumber(3);
+ meta_table_.SetCompatibleVersionNumber(std::min(3, kCompatibleVersionNumber));
+ return true;
+}
+
+bool ThumbnailDatabase::RecreateThumbnailTable() {
+ if (use_top_sites_)
+ return true; // Not needed after migration to TopSites.
+
+ if (!db_.Execute("DROP TABLE thumbnails"))
+ return false;
+ return InitThumbnailTable();
+}
+
+bool ThumbnailDatabase::InitFavIconsTable(sql::Connection* db,
+ bool is_temporary) {
+ // Note: if you update the schema, don't forget to update
+ // CopyToTemporaryFaviconTable as well.
+ const char* name = is_temporary ? "temp_favicons" : "favicons";
+ if (!db->DoesTableExist(name)) {
+ std::string sql;
+ sql.append("CREATE TABLE ");
+ sql.append(name);
+ sql.append("("
+ "id INTEGER PRIMARY KEY,"
+ "url LONGVARCHAR NOT NULL,"
+ "last_updated INTEGER DEFAULT 0,"
+ "image_data BLOB)");
+ if (!db->Execute(sql.c_str()))
+ return false;
+ }
+ return true;
+}
+
+void ThumbnailDatabase::InitFavIconsIndex() {
+ // Add an index on the url column. We ignore errors. Since this is always
+ // called during startup, the index will normally already exist.
+ db_.Execute("CREATE INDEX favicons_url ON favicons(url)");
+}
+
+void ThumbnailDatabase::BeginTransaction() {
+ db_.BeginTransaction();
+}
+
+void ThumbnailDatabase::CommitTransaction() {
+ db_.CommitTransaction();
+}
+
+void ThumbnailDatabase::Vacuum() {
+ DCHECK(db_.transaction_nesting() == 0) <<
+ "Can not have a transaction when vacuuming.";
+ db_.Execute("VACUUM");
+}
+
+void ThumbnailDatabase::SetPageThumbnail(
+ const GURL& url,
+ URLID id,
+ const SkBitmap& thumbnail,
+ const ThumbnailScore& score,
+ base::Time time) {
+ if (use_top_sites_)
+ return; // Not possible after migration to TopSites.
+
+ if (!thumbnail.isNull()) {
+ bool add_thumbnail = true;
+ ThumbnailScore current_score;
+ if (ThumbnailScoreForId(id, &current_score)) {
+ add_thumbnail = ShouldReplaceThumbnailWith(current_score, score);
+ }
+
+ if (add_thumbnail) {
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "INSERT OR REPLACE INTO thumbnails "
+ "(url_id, boring_score, good_clipping, at_top, last_updated, data) "
+ "VALUES (?,?,?,?,?,?)"));
+ if (!statement)
+ return;
+
+ // We use 90 quality (out of 100) which is pretty high, because
+ // we're very sensitive to artifacts for these small sized,
+ // highly detailed images.
+ std::vector<unsigned char> jpeg_data;
+ SkAutoLockPixels thumbnail_lock(thumbnail);
+ bool encoded = gfx::JPEGCodec::Encode(
+ reinterpret_cast<unsigned char*>(thumbnail.getAddr32(0, 0)),
+ gfx::JPEGCodec::FORMAT_BGRA, thumbnail.width(),
+ thumbnail.height(),
+ static_cast<int>(thumbnail.rowBytes()), 90,
+ &jpeg_data);
+
+ if (encoded) {
+ statement.BindInt64(0, id);
+ statement.BindDouble(1, score.boring_score);
+ statement.BindBool(2, score.good_clipping);
+ statement.BindBool(3, score.at_top);
+ statement.BindInt64(4, score.time_at_snapshot.ToTimeT());
+ statement.BindBlob(5, &jpeg_data[0],
+ static_cast<int>(jpeg_data.size()));
+ if (!statement.Run())
+ NOTREACHED() << db_.GetErrorMessage();
+ }
+
+ // Publish the thumbnail to any indexers listening to us.
+ // The tests may send an invalid url. Hence avoid publishing those.
+ if (url.is_valid() && history_publisher_ != NULL)
+ history_publisher_->PublishPageThumbnail(jpeg_data, url, time);
+ }
+ } else {
+ if (!DeleteThumbnail(id) )
+ DLOG(WARNING) << "Unable to delete thumbnail";
+ }
+}
+
+bool ThumbnailDatabase::GetPageThumbnail(URLID id,
+ std::vector<unsigned char>* data) {
+ if (use_top_sites_)
+ return false; // Not possible after migration to TopSites.
+
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "SELECT data FROM thumbnails WHERE url_id=?"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, id);
+ if (!statement.Step())
+ return false; // don't have a thumbnail for this ID
+
+ statement.ColumnBlobAsVector(0, data);
+ return true;
+}
+
+bool ThumbnailDatabase::DeleteThumbnail(URLID id) {
+ if (use_top_sites_)
+ return true; // Not possible after migration to TopSites.
+
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM thumbnails WHERE url_id = ?"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, id);
+ return statement.Run();
+}
+
+bool ThumbnailDatabase::ThumbnailScoreForId(URLID id,
+ ThumbnailScore* score) {
+ if (use_top_sites_)
+ return false; // Not possible after migration to TopSites.
+
+ // Fetch the current thumbnail's information to make sure we
+ // aren't replacing a good thumbnail with one that's worse.
+ sql::Statement select_statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "SELECT boring_score, good_clipping, at_top, last_updated "
+ "FROM thumbnails WHERE url_id=?"));
+ if (!select_statement) {
+ NOTREACHED() << "Couldn't build select statement!";
+ } else {
+ select_statement.BindInt64(0, id);
+ if (select_statement.Step()) {
+ double current_boring_score = select_statement.ColumnDouble(0);
+ bool current_clipping = select_statement.ColumnBool(1);
+ bool current_at_top = select_statement.ColumnBool(2);
+ base::Time last_updated =
+ base::Time::FromTimeT(select_statement.ColumnInt64(3));
+ *score = ThumbnailScore(current_boring_score, current_clipping,
+ current_at_top, last_updated);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ThumbnailDatabase::SetFavIcon(URLID icon_id,
+ scoped_refptr<RefCountedMemory> icon_data,
+ base::Time time) {
+ DCHECK(icon_id);
+ if (icon_data->size()) {
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE favicons SET image_data=?, last_updated=? WHERE id=?"));
+ if (!statement)
+ return 0;
+
+ statement.BindBlob(0, icon_data->front(),
+ static_cast<int>(icon_data->size()));
+ statement.BindInt64(1, time.ToTimeT());
+ statement.BindInt64(2, icon_id);
+ return statement.Run();
+ } else {
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE favicons SET image_data=NULL, last_updated=? WHERE id=?"));
+ if (!statement)
+ return 0;
+
+ statement.BindInt64(0, time.ToTimeT());
+ statement.BindInt64(1, icon_id);
+ return statement.Run();
+ }
+}
+
+bool ThumbnailDatabase::SetFavIconLastUpdateTime(FavIconID icon_id,
+ base::Time time) {
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE favicons SET last_updated=? WHERE id=?"));
+ if (!statement)
+ return 0;
+
+ statement.BindInt64(0, time.ToTimeT());
+ statement.BindInt64(1, icon_id);
+ return statement.Run();
+}
+
+FavIconID ThumbnailDatabase::GetFavIconIDForFavIconURL(const GURL& icon_url) {
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "SELECT id FROM favicons WHERE url=?"));
+ if (!statement)
+ return 0;
+
+ statement.BindString(0, URLDatabase::GURLToDatabaseURL(icon_url));
+ if (!statement.Step())
+ return 0; // not cached
+
+ return statement.ColumnInt64(0);
+}
+
+bool ThumbnailDatabase::GetFavIcon(
+ FavIconID icon_id,
+ base::Time* last_updated,
+ std::vector<unsigned char>* png_icon_data,
+ GURL* icon_url) {
+ DCHECK(icon_id);
+
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "SELECT last_updated, image_data, url FROM favicons WHERE id=?"));
+ if (!statement)
+ return 0;
+
+ statement.BindInt64(0, icon_id);
+
+ if (!statement.Step())
+ return false; // No entry for the id.
+
+ *last_updated = base::Time::FromTimeT(statement.ColumnInt64(0));
+ if (statement.ColumnByteLength(1) > 0)
+ statement.ColumnBlobAsVector(1, png_icon_data);
+ if (icon_url)
+ *icon_url = GURL(statement.ColumnString(2));
+
+ return true;
+}
+
+FavIconID ThumbnailDatabase::AddFavIcon(const GURL& icon_url) {
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "INSERT INTO favicons (url) VALUES (?)"));
+ if (!statement)
+ return 0;
+
+ statement.BindString(0, URLDatabase::GURLToDatabaseURL(icon_url));
+ if (!statement.Run())
+ return 0;
+ return db_.GetLastInsertRowId();
+}
+
+bool ThumbnailDatabase::DeleteFavIcon(FavIconID id) {
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM favicons WHERE id = ?"));
+ if (!statement)
+ return false;
+ statement.BindInt64(0, id);
+ return statement.Run();
+}
+
+FavIconID ThumbnailDatabase::CopyToTemporaryFavIconTable(FavIconID source) {
+ sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
+ "INSERT INTO temp_favicons (url, last_updated, image_data)"
+ "SELECT url, last_updated, image_data "
+ "FROM favicons WHERE id = ?"));
+ if (!statement)
+ return 0;
+ statement.BindInt64(0, source);
+ if (!statement.Run())
+ return 0;
+
+ // We return the ID of the newly inserted favicon.
+ return db_.GetLastInsertRowId();
+}
+
+bool ThumbnailDatabase::CommitTemporaryFavIconTable() {
+ // Delete the old favicons table.
+ if (!db_.Execute("DROP TABLE favicons"))
+ return false;
+
+ // Rename the temporary one.
+ if (!db_.Execute("ALTER TABLE temp_favicons RENAME TO favicons"))
+ return false;
+
+ // The renamed table needs the index (the temporary table doesn't have one).
+ InitFavIconsIndex();
+ return true;
+}
+
+bool ThumbnailDatabase::NeedsMigrationToTopSites() {
+ return !use_top_sites_;
+}
+
+bool ThumbnailDatabase::RenameAndDropThumbnails(const FilePath& old_db_file,
+ const FilePath& new_db_file) {
+ // Init favicons table - same schema as the thumbnails.
+ sql::Connection favicons;
+ if (OpenDatabase(&favicons, new_db_file) != sql::INIT_OK)
+ return false;
+
+ if (!InitFavIconsTable(&favicons, false)) {
+ NOTREACHED() << "Couldn't init favicons table.";
+ favicons.Close();
+ return false;
+ }
+ favicons.Close();
+
+ // Can't attach within a transaction.
+ CommitTransaction();
+
+ // Attach new DB.
+ {
+ // This block is needed because otherwise the attach statement is
+ // never cleared from cache and we can't close the DB :P
+ sql::Statement attach(db_.GetUniqueStatement("ATTACH ? AS new_favicons"));
+ if (!attach) {
+ NOTREACHED() << "Unable to attach database.";
+ // Keep the transaction open, even though we failed.
+ BeginTransaction();
+ return false;
+ }
+
+#if defined(OS_POSIX)
+ attach.BindString(0, new_db_file.value());
+#else
+ attach.BindString(0, WideToUTF8(new_db_file.value()));
+#endif
+
+ if (!attach.Run()) {
+ NOTREACHED() << db_.GetErrorMessage();
+ BeginTransaction();
+ return false;
+ }
+ }
+
+ // Move favicons to the new DB.
+ if (!db_.Execute("INSERT OR REPLACE INTO new_favicons.favicons "
+ "SELECT * FROM favicons")) {
+ NOTREACHED() << "Unable to copy favicons.";
+ BeginTransaction();
+ return false;
+ }
+
+ if (!db_.Execute("DETACH new_favicons")) {
+ NOTREACHED() << "Unable to detach database.";
+ BeginTransaction();
+ return false;
+ }
+
+ db_.Close();
+
+ // Reset the DB to point to new file.
+ if (OpenDatabase(&db_, new_db_file) != sql::INIT_OK)
+ return false;
+
+ file_util::Delete(old_db_file, false);
+
+ InitFavIconsIndex();
+
+ // Reopen the transaction.
+ BeginTransaction();
+ use_top_sites_ = true;
+ return true;
+}
+
+} // namespace history
diff --git a/chrome/browser/history/thumbnail_database.h b/chrome/browser/history/thumbnail_database.h
new file mode 100644
index 0000000..81d498c
--- /dev/null
+++ b/chrome/browser/history/thumbnail_database.h
@@ -0,0 +1,193 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_THUMBNAIL_DATABASE_H_
+#define CHROME_BROWSER_HISTORY_THUMBNAIL_DATABASE_H_
+
+#include <vector>
+
+#include "app/sql/connection.h"
+#include "app/sql/init_status.h"
+#include "app/sql/meta_table.h"
+#include "base/ref_counted.h"
+#include "chrome/browser/history/history_types.h"
+
+class FilePath;
+class RefCountedMemory;
+struct ThumbnailScore;
+class SkBitmap;
+
+namespace base {
+class Time;
+}
+
+namespace history {
+
+class ExpireHistoryBackend;
+class HistoryPublisher;
+
+// This database interface is owned by the history backend and runs on the
+// history thread. It is a totally separate component from history partially
+// because we may want to move it to its own thread in the future. The
+// operations we will do on this database will be slow, but we can tolerate
+// higher latency (it's OK for thumbnails to come in slower than the rest
+// of the data). Moving this to a separate thread would not block potentially
+// higher priority history operations.
+class ThumbnailDatabase {
+ public:
+ ThumbnailDatabase();
+ ~ThumbnailDatabase();
+
+ // Must be called after creation but before any other methods are called.
+ // When not INIT_OK, no other functions should be called.
+ sql::InitStatus Init(const FilePath& db_name,
+ const HistoryPublisher* history_publisher);
+
+ // Open database on a given filename. If the file does not exist,
+ // it is created.
+ // |db| is the database to open.
+ // |db_name| is a path to the database file.
+ static sql::InitStatus OpenDatabase(sql::Connection* db,
+ const FilePath& db_name);
+
+ // Transactions on the database.
+ void BeginTransaction();
+ void CommitTransaction();
+ int transaction_nesting() const {
+ return db_.transaction_nesting();
+ }
+
+ // Vacuums the database. This will cause sqlite to defragment and collect
+ // unused space in the file. It can be VERY SLOW.
+ void Vacuum();
+
+ // Thumbnails ----------------------------------------------------------------
+
+ // Sets the given data to be the thumbnail for the given URL,
+ // overwriting any previous data. If the SkBitmap contains no pixel
+ // data, the thumbnail will be deleted.
+ void SetPageThumbnail(const GURL& url,
+ URLID id,
+ const SkBitmap& thumbnail,
+ const ThumbnailScore& score,
+ base::Time time);
+
+ // Retrieves thumbnail data for the given URL, returning true on success,
+ // false if there is no such thumbnail or there was some other error.
+ bool GetPageThumbnail(URLID id, std::vector<unsigned char>* data);
+
+ // Delete the thumbnail with the provided id. Returns false on failure
+ bool DeleteThumbnail(URLID id);
+
+ // If there is a thumbnail score for the id provided, retrieves the
+ // current thumbnail score and places it in |score| and returns
+ // true. Returns false otherwise.
+ bool ThumbnailScoreForId(URLID id, ThumbnailScore* score);
+
+ // Called by the to delete all old thumbnails and make a clean table.
+ // Returns true on success.
+ bool RecreateThumbnailTable();
+
+ // FavIcons ------------------------------------------------------------------
+
+ // Sets the bits for a favicon. This should be png encoded data.
+ // The time indicates the access time, and is used to detect when the favicon
+ // should be refreshed.
+ bool SetFavIcon(FavIconID icon_id,
+ scoped_refptr<RefCountedMemory> icon_data,
+ base::Time time);
+
+ // Sets the time the favicon was last updated.
+ bool SetFavIconLastUpdateTime(FavIconID icon_id, base::Time time);
+
+ // Returns the id of the entry in the favicon database with the specified url.
+ // Returns 0 if no entry exists for the specified url.
+ FavIconID GetFavIconIDForFavIconURL(const GURL& icon_url);
+
+ // Gets the png encoded favicon and last updated time for the specified
+ // favicon id.
+ bool GetFavIcon(FavIconID icon_id,
+ base::Time* last_updated,
+ std::vector<unsigned char>* png_icon_data,
+ GURL* icon_url);
+
+ // Adds the favicon URL to the favicon db, returning its id.
+ FavIconID AddFavIcon(const GURL& icon_url);
+
+ // Delete the favicon with the provided id. Returns false on failure
+ bool DeleteFavIcon(FavIconID id);
+
+ // Temporary FavIcons --------------------------------------------------------
+
+ // Create a temporary table to store favicons. Favicons will be copied to
+ // this table by CopyToTemporaryFavIconTable() and then the original table
+ // will be dropped, leaving only those copied favicons remaining. This is
+ // used to quickly delete most of the favicons when clearing history.
+ bool InitTemporaryFavIconsTable() {
+ return InitFavIconsTable(&db_, true);
+ }
+
+ // Copies the given favicon from the "main" favicon table to the temporary
+ // one. This is only valid in between calls to InitTemporaryFavIconsTable()
+ // and CommitTemporaryFavIconTable().
+ //
+ // The ID of the favicon will change when this copy takes place. The new ID
+ // is returned, or 0 on failure.
+ FavIconID CopyToTemporaryFavIconTable(FavIconID source);
+
+ // Replaces the main URL table with the temporary table created by
+ // InitTemporaryFavIconsTable(). This will mean all favicons not copied over
+ // will be deleted. Returns true on success.
+ bool CommitTemporaryFavIconTable();
+
+ // Returns true iff the thumbnails table exists.
+ // Migrating to TopSites is dropping the thumbnails table.
+ bool NeedsMigrationToTopSites();
+
+ // Renames the database file and drops the Thumbnails table.
+ bool RenameAndDropThumbnails(const FilePath& old_db_file,
+ const FilePath& new_db_file);
+
+ private:
+ friend class ExpireHistoryBackend;
+
+ // Creates the thumbnail table, returning true if the table already exists
+ // or was successfully created.
+ bool InitThumbnailTable();
+
+ // Creates the favicon table, returning true if the table already exists,
+ // or was successfully created. |is_temporary| will be false when generating
+ // the "regular" favicons table. The expirer sets this to true to generate the
+ // temporary table, which will have a different name but the same schema.
+ // |db| is the connection to use for initializing the table.
+ // A different connection is used in RenameAndDropThumbnails, when we
+ // need to copy the favicons between two database files.
+ bool InitFavIconsTable(sql::Connection* db, bool is_temporary);
+
+ // Adds support for the new metadata on web page thumbnails.
+ bool UpgradeToVersion3();
+
+ // Creates the index over the favicon table. This will be called during
+ // initialization after the table is created. This is a separate function
+ // because it is used by SwapFaviconTables to create an index over the
+ // newly-renamed favicons table (formerly the temporary table with no index).
+ void InitFavIconsIndex();
+
+ sql::Connection db_;
+ sql::MetaTable meta_table_;
+
+ // This object is created and managed by the history backend. We maintain an
+ // opaque pointer to the object for our use.
+ // This can be NULL if there are no indexers registered to receive indexing
+ // data from us.
+ const HistoryPublisher* history_publisher_;
+
+ // True if migration to TopSites has been done and the thumbnails
+ // table should not be used.
+ bool use_top_sites_;
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_THUMBNAIL_DATABASE_H_
diff --git a/chrome/browser/history/thumbnail_database_unittest.cc b/chrome/browser/history/thumbnail_database_unittest.cc
new file mode 100644
index 0000000..4d2c2bf
--- /dev/null
+++ b/chrome/browser/history/thumbnail_database_unittest.cc
@@ -0,0 +1,371 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <algorithm>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/file_path.h"
+#include "base/file_util.h"
+#include "base/path_service.h"
+#include "base/ref_counted_memory.h"
+#include "base/scoped_temp_dir.h"
+#include "chrome/browser/history/thumbnail_database.h"
+#include "chrome/common/chrome_paths.h"
+#include "chrome/common/thumbnail_score.h"
+#include "chrome/tools/profiles/thumbnail-inl.h"
+#include "gfx/codec/jpeg_codec.h"
+#include "googleurl/src/gurl.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/skia/include/core/SkBitmap.h"
+
+using base::Time;
+using base::TimeDelta;
+
+namespace history {
+
+namespace {
+
+// data we'll put into the thumbnail database
+static const unsigned char blob1[] =
+ "12346102356120394751634516591348710478123649165419234519234512349134";
+static const unsigned char blob2[] =
+ "goiwuegrqrcomizqyzkjalitbahxfjytrqvpqeroicxmnlkhlzunacxaneviawrtxcywhgef";
+static const unsigned char blob3[] =
+ "3716871354098370776510470746794707624107647054607467847164027";
+const double kBoringness = 0.25;
+const double kWorseBoringness = 0.50;
+const double kBetterBoringness = 0.10;
+const double kTotallyBoring = 1.0;
+
+const int64 kPage1 = 1234;
+
+} // namespace
+
+class ThumbnailDatabaseTest : public testing::Test {
+ public:
+ ThumbnailDatabaseTest() {
+ }
+ ~ThumbnailDatabaseTest() {
+ }
+
+ protected:
+ virtual void SetUp() {
+ // Get a temporary directory for the test DB files.
+ ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
+
+ file_name_ = temp_dir_.path().AppendASCII("TestThumbnails.db");
+ new_file_name_ = temp_dir_.path().AppendASCII("TestFavicons.db");
+
+ google_bitmap_.reset(
+ gfx::JPEGCodec::Decode(kGoogleThumbnail, sizeof(kGoogleThumbnail)));
+ }
+
+ scoped_ptr<SkBitmap> google_bitmap_;
+
+ ScopedTempDir temp_dir_;
+ FilePath file_name_;
+ FilePath new_file_name_;
+};
+
+TEST_F(ThumbnailDatabaseTest, AddDelete) {
+ ThumbnailDatabase db;
+ ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL));
+
+ // Add one page & verify it got added.
+ ThumbnailScore boring(kBoringness, true, true);
+ Time time;
+ GURL gurl;
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, boring, time);
+ ThumbnailScore score_output;
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_output));
+ ASSERT_TRUE(boring.Equals(score_output));
+
+ // Verify a random page is not found.
+ int64 page2 = 5678;
+ std::vector<unsigned char> jpeg_data;
+ EXPECT_FALSE(db.GetPageThumbnail(page2, &jpeg_data));
+ EXPECT_FALSE(db.ThumbnailScoreForId(page2, &score_output));
+
+ // Add another page with a better boringness & verify it got added.
+ ThumbnailScore better_boringness(kBetterBoringness, true, true);
+
+ db.SetPageThumbnail(gurl, page2, *google_bitmap_, better_boringness, time);
+ ASSERT_TRUE(db.ThumbnailScoreForId(page2, &score_output));
+ ASSERT_TRUE(better_boringness.Equals(score_output));
+
+ // Delete the thumbnail for the second page.
+ ThumbnailScore worse_boringness(kWorseBoringness, true, true);
+ db.SetPageThumbnail(gurl, page2, SkBitmap(), worse_boringness, time);
+ ASSERT_FALSE(db.GetPageThumbnail(page2, &jpeg_data));
+ ASSERT_FALSE(db.ThumbnailScoreForId(page2, &score_output));
+
+ // Delete the first thumbnail using the explicit delete API.
+ ASSERT_TRUE(db.DeleteThumbnail(kPage1));
+
+ // Make sure it is gone
+ ASSERT_FALSE(db.ThumbnailScoreForId(kPage1, &score_output));
+ ASSERT_FALSE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_FALSE(db.ThumbnailScoreForId(page2, &score_output));
+ ASSERT_FALSE(db.GetPageThumbnail(page2, &jpeg_data));
+}
+
+TEST_F(ThumbnailDatabaseTest, UseLessBoringThumbnails) {
+ ThumbnailDatabase db;
+ Time now = Time::Now();
+ ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL));
+
+ // Add one page & verify it got added.
+ ThumbnailScore boring(kBoringness, true, true);
+
+ Time time;
+ GURL gurl;
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, boring, time);
+ std::vector<unsigned char> jpeg_data;
+ ThumbnailScore score_out;
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(boring.Equals(score_out));
+
+ // Attempt to update the first page entry with a thumbnail that
+ // is more boring and verify that it doesn't change.
+ ThumbnailScore more_boring(kWorseBoringness, true, true);
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, more_boring, time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(boring.Equals(score_out));
+
+ // Attempt to update the first page entry with a thumbnail that
+ // is less boring and verify that we update it.
+ ThumbnailScore less_boring(kBetterBoringness, true, true);
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, less_boring, time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(less_boring.Equals(score_out));
+}
+
+TEST_F(ThumbnailDatabaseTest, UseAtTopThumbnails) {
+ ThumbnailDatabase db;
+ Time now = Time::Now();
+ ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL));
+
+ // Add one page & verify it got added. Note that it doesn't have
+ // |good_clipping| and isn't |at_top|.
+ ThumbnailScore boring_and_bad(kBoringness, false, false);
+
+ Time time;
+ GURL gurl;
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, boring_and_bad, time);
+ std::vector<unsigned char> jpeg_data;
+ ThumbnailScore score_out;
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(boring_and_bad.Equals(score_out));
+
+ // A thumbnail that's at the top of the page should replace
+ // thumbnails that are in the middle, for the same boringness.
+ ThumbnailScore boring_but_better(kBoringness, false, true);
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, boring_but_better, time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(boring_but_better.Equals(score_out));
+
+ // The only case where we should replace a thumbnail at the top with
+ // a thumbnail in the middle/bottom is when the current thumbnail is
+ // weirdly stretched and the incoming thumbnail isn't.
+ ThumbnailScore better_boring_bad_framing(kBetterBoringness, false, false);
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, better_boring_bad_framing,
+ time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(boring_but_better.Equals(score_out));
+
+ ThumbnailScore boring_good_clipping(kBoringness, true, false);
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, boring_good_clipping,
+ time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(boring_good_clipping.Equals(score_out));
+
+ // Now that we have a non-stretched, middle of the page thumbnail,
+ // we shouldn't be able to replace it with:
+
+ // 1) A stretched thumbnail in the middle of the page
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_,
+ ThumbnailScore(kBetterBoringness, false, false, now),
+ time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(boring_good_clipping.Equals(score_out));
+
+ // 2) A stretched thumbnail at the top of the page
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_,
+ ThumbnailScore(kBetterBoringness, false, true, now),
+ time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(boring_good_clipping.Equals(score_out));
+
+ // But it should be replaced by a thumbnail that's clipped properly
+ // and is at the top
+ ThumbnailScore best_score(kBetterBoringness, true, true);
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, best_score, time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(best_score.Equals(score_out));
+}
+
+TEST_F(ThumbnailDatabaseTest, ThumbnailTimeDegradation) {
+ ThumbnailDatabase db;
+ const Time kNow = Time::Now();
+ const Time kThreeHoursAgo = kNow - TimeDelta::FromHours(4);
+ const Time kFiveHoursAgo = kNow - TimeDelta::FromHours(6);
+ const double kBaseBoringness = 0.305;
+ const double kWorseBoringness = 0.345;
+
+ ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL));
+
+ // add one page & verify it got added.
+ ThumbnailScore base_boringness(kBaseBoringness, true, true, kFiveHoursAgo);
+
+ Time time;
+ GURL gurl;
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, base_boringness, time);
+ std::vector<unsigned char> jpeg_data;
+ ThumbnailScore score_out;
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(base_boringness.Equals(score_out));
+
+ // Try to add a different thumbnail with a worse score an hour later
+ // (but not enough to trip the boringness degradation threshold).
+ ThumbnailScore hour_later(kWorseBoringness, true, true, kThreeHoursAgo);
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, hour_later, time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(base_boringness.Equals(score_out));
+
+ // After a full five hours, things should have degraded enough
+ // that we'll allow the same thumbnail with the same (worse)
+ // boringness that we previous rejected.
+ ThumbnailScore five_hours_later(kWorseBoringness, true, true, kNow);
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, five_hours_later, time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(five_hours_later.Equals(score_out));
+}
+
+TEST_F(ThumbnailDatabaseTest, NeverAcceptTotallyBoringThumbnail) {
+ // We enforce a maximum boringness score: even in cases where we
+ // should replace a thumbnail with another because of reasons other
+ // than straight up boringness score, still reject because the
+ // thumbnail is totally boring.
+ ThumbnailDatabase db;
+ Time now = Time::Now();
+ ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL));
+
+ std::vector<unsigned char> jpeg_data;
+ ThumbnailScore score_out;
+ const double kBaseBoringness = 0.50;
+ const Time kNow = Time::Now();
+ const int kSizeOfTable = 4;
+ struct {
+ bool good_scaling;
+ bool at_top;
+ } const heiarchy_table[] = {
+ {false, false},
+ {false, true},
+ {true, false},
+ {true, true}
+ };
+
+ Time time;
+ GURL gurl;
+
+ // Test that for each entry type, all entry types that are better
+ // than it still will reject thumbnails which are totally boring.
+ for (int i = 0; i < kSizeOfTable; ++i) {
+ ThumbnailScore base(kBaseBoringness,
+ heiarchy_table[i].good_scaling,
+ heiarchy_table[i].at_top,
+ kNow);
+
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, base, time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(base.Equals(score_out));
+
+ for (int j = i; j < kSizeOfTable; ++j) {
+ ThumbnailScore shouldnt_replace(
+ kTotallyBoring, heiarchy_table[j].good_scaling,
+ heiarchy_table[j].at_top, kNow);
+
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, shouldnt_replace,
+ time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(base.Equals(score_out));
+ }
+
+ // Clean up for the next iteration
+ ASSERT_TRUE(db.DeleteThumbnail(kPage1));
+ ASSERT_FALSE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_FALSE(db.ThumbnailScoreForId(kPage1, &score_out));
+ }
+
+ // We should never accept a totally boring thumbnail no matter how
+ // much old the current thumbnail is.
+ ThumbnailScore base_boring(kBaseBoringness, true, true, kNow);
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, base_boring, time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(base_boring.Equals(score_out));
+
+ ThumbnailScore totally_boring_in_the_future(
+ kTotallyBoring, true, true, kNow + TimeDelta::FromDays(365));
+ db.SetPageThumbnail(gurl, kPage1, *google_bitmap_,
+ totally_boring_in_the_future, time);
+ ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data));
+ ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out));
+ ASSERT_TRUE(base_boring.Equals(score_out));
+}
+
+TEST_F(ThumbnailDatabaseTest, NeedsMigrationToTopSites) {
+ ThumbnailDatabase db;
+ ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL));
+ db.BeginTransaction();
+ EXPECT_TRUE(db.NeedsMigrationToTopSites());
+ EXPECT_TRUE(db.RenameAndDropThumbnails(file_name_, new_file_name_));
+ EXPECT_FALSE(db.NeedsMigrationToTopSites());
+ EXPECT_FALSE(file_util::PathExists(file_name_));
+ EXPECT_TRUE(file_util::PathExists(new_file_name_));
+}
+
+TEST_F(ThumbnailDatabaseTest, GetFaviconAfterMigrationToTopSites) {
+ ThumbnailDatabase db;
+ ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL));
+ db.BeginTransaction();
+
+ std::vector<unsigned char> data(blob1, blob1 + sizeof(blob1));
+ scoped_refptr<RefCountedBytes> favicon(new RefCountedBytes(data));
+
+ GURL url("http://google.com");
+ FavIconID id = db.AddFavIcon(url);
+ base::Time time = base::Time::Now();
+ db.SetFavIcon(id, favicon, time);
+ EXPECT_TRUE(db.RenameAndDropThumbnails(file_name_, new_file_name_));
+
+ base::Time time_out;
+ std::vector<unsigned char> favicon_out;
+ GURL url_out;
+ EXPECT_TRUE(db.GetFavIcon(id, &time_out, &favicon_out, &url_out));
+ EXPECT_EQ(url, url_out);
+ EXPECT_EQ(time.ToTimeT(), time_out.ToTimeT());
+ ASSERT_EQ(data.size(), favicon_out.size());
+ EXPECT_TRUE(std::equal(data.begin(),
+ data.end(),
+ favicon_out.begin()));
+}
+
+} // namespace history
diff --git a/chrome/browser/history/top_sites.cc b/chrome/browser/history/top_sites.cc
new file mode 100644
index 0000000..1ef4dec
--- /dev/null
+++ b/chrome/browser/history/top_sites.cc
@@ -0,0 +1,572 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/top_sites.h"
+
+#include <algorithm>
+
+#include "base/file_util.h"
+#include "base/logging.h"
+#include "chrome/browser/chrome_thread.h"
+#include "chrome/browser/profile.h"
+#include "chrome/browser/history/top_sites_database.h"
+#include "chrome/browser/history/history_notifications.h"
+#include "chrome/browser/history/page_usage_data.h"
+#include "chrome/browser/tab_contents/navigation_controller.h"
+#include "chrome/browser/tab_contents/navigation_entry.h"
+#include "gfx/codec/jpeg_codec.h"
+#include "third_party/skia/include/core/SkBitmap.h"
+
+namespace history {
+
+// How many top sites to store in the cache.
+static const size_t kTopSitesNumber = 20;
+static const int kDaysOfHistory = 90;
+// Time from startup to first HistoryService query.
+static const int64 kUpdateIntervalSecs = 15;
+// Intervals between requests to HistoryService.
+static const int64 kMinUpdateIntervalMinutes = 1;
+static const int64 kMaxUpdateIntervalMinutes = 60;
+
+
+TopSites::TopSites(Profile* profile) : profile_(profile),
+ mock_history_service_(NULL),
+ last_num_urls_changed_(0),
+ migration_in_progress_(false),
+ waiting_for_results_(true) {
+ registrar_.Add(this, NotificationType::HISTORY_URLS_DELETED,
+ Source<Profile>(profile_));
+ registrar_.Add(this, NotificationType::NAV_ENTRY_COMMITTED,
+ NotificationService::AllSources());
+}
+
+TopSites::~TopSites() {
+ timer_.Stop();
+}
+
+void TopSites::Init(const FilePath& db_name) {
+ db_path_ = db_name;
+ db_.reset(new TopSitesDatabaseImpl());
+ if (!db_->Init(db_name)) {
+ NOTREACHED() << "Failed to initialize database.";
+ return;
+ }
+
+ ChromeThread::PostTask(ChromeThread::DB, FROM_HERE, NewRunnableMethod(
+ this, &TopSites::ReadDatabase));
+
+ // Start the one-shot timer.
+ timer_.Start(base::TimeDelta::FromSeconds(kUpdateIntervalSecs), this,
+ &TopSites::StartQueryForMostVisited);
+}
+
+void TopSites::ReadDatabase() {
+ DCHECK(ChromeThread::CurrentlyOn(ChromeThread::DB));
+ std::map<GURL, Images> thumbnails;
+
+ DCHECK(db_.get());
+ {
+ AutoLock lock(lock_);
+ MostVisitedURLList top_urls;
+ db_->GetPageThumbnails(&top_urls, &thumbnails);
+ StoreMostVisited(&top_urls);
+ } // Lock is released here.
+
+ for (size_t i = 0; i < top_sites_.size(); i++) {
+ GURL url = top_sites_[i].url;
+ Images thumbnail = thumbnails[url];
+ if (!thumbnail.thumbnail.get() || !thumbnail.thumbnail->size()) {
+ LOG(INFO) << "No thumbnail for " << url.spec();
+ } else {
+ SetPageThumbnailNoDB(url, thumbnail.thumbnail,
+ thumbnail.thumbnail_score);
+ }
+ }
+}
+
+// Public function that encodes the bitmap into RefCountedBytes and
+// updates the database.
+bool TopSites::SetPageThumbnail(const GURL& url,
+ const SkBitmap& thumbnail,
+ const ThumbnailScore& score) {
+ bool add_temp_thumbnail = false;
+ if (canonical_urls_.find(url) == canonical_urls_.end()) {
+ if (top_sites_.size() < kTopSitesNumber) {
+ add_temp_thumbnail = true;
+ } else {
+ return false; // This URL is not known to us.
+ }
+ }
+
+ if (!HistoryService::CanAddURL(url))
+ return false; // It's not a real webpage.
+
+ scoped_refptr<RefCountedBytes> thumbnail_data = new RefCountedBytes;
+ SkAutoLockPixels thumbnail_lock(thumbnail);
+ bool encoded = gfx::JPEGCodec::Encode(
+ reinterpret_cast<unsigned char*>(thumbnail.getAddr32(0, 0)),
+ gfx::JPEGCodec::FORMAT_BGRA, thumbnail.width(),
+ thumbnail.height(),
+ static_cast<int>(thumbnail.rowBytes()), 90,
+ &thumbnail_data->data);
+ if (!encoded)
+ return false;
+
+ if (add_temp_thumbnail) {
+ AddTemporaryThumbnail(url, thumbnail_data, score);
+ return true;
+ }
+
+ return SetPageThumbnail(url, thumbnail_data, score);
+}
+
+bool TopSites::SetPageThumbnail(const GURL& url,
+ const RefCountedBytes* thumbnail,
+ const ThumbnailScore& score) {
+ if (!SetPageThumbnailNoDB(url, thumbnail, score))
+ return false;
+
+ // Update the database.
+ if (!db_.get())
+ return true;
+ std::map<GURL, size_t>::iterator found = canonical_urls_.find(url);
+ if (found == canonical_urls_.end())
+ return false;
+ size_t index = found->second;
+
+ MostVisitedURL& most_visited = top_sites_[index];
+ ChromeThread::PostTask(ChromeThread::DB, FROM_HERE, NewRunnableMethod(
+ this, &TopSites::WriteThumbnailToDB,
+ most_visited, index, top_images_[most_visited.url]));
+ return true;
+}
+
+void TopSites::WriteThumbnailToDB(const MostVisitedURL& url,
+ int url_rank,
+ const TopSites::Images& thumbnail) {
+ DCHECK(db_.get());
+ DCHECK(ChromeThread::CurrentlyOn(ChromeThread::DB));
+ db_->SetPageThumbnail(url, url_rank, thumbnail);
+}
+
+// private
+bool TopSites::SetPageThumbnailNoDB(const GURL& url,
+ const RefCountedBytes* thumbnail_data,
+ const ThumbnailScore& score) {
+ AutoLock lock(lock_);
+
+ std::map<GURL, size_t>::iterator found = canonical_urls_.find(url);
+ if (found == canonical_urls_.end()) {
+ if (top_sites_.size() >= kTopSitesNumber)
+ return false; // This URL is not known to us.
+
+ // We don't have enough Top Sites - add this one too.
+ MostVisitedURL mv;
+ mv.url = url;
+ mv.redirects.push_back(url);
+ top_sites_.push_back(mv);
+ size_t index = top_sites_.size() - 1;
+ StoreRedirectChain(top_sites_[index].redirects, index);
+ found = canonical_urls_.find(url);
+ }
+
+ MostVisitedURL& most_visited = top_sites_[found->second];
+ Images& image = top_images_[most_visited.url];
+
+ // When comparing the thumbnail scores, we need to take into account the
+ // redirect hops, which are not generated when the thumbnail is because the
+ // redirects weren't known. We fill that in here since we know the redirects.
+ ThumbnailScore new_score_with_redirects(score);
+ new_score_with_redirects.redirect_hops_from_dest =
+ GetRedirectDistanceForURL(most_visited, url);
+
+ if (!ShouldReplaceThumbnailWith(image.thumbnail_score,
+ new_score_with_redirects) &&
+ image.thumbnail.get())
+ return false; // The one we already have is better.
+
+ // Take ownership of the thumbnail data.
+ image.thumbnail = const_cast<RefCountedBytes*>(thumbnail_data);
+ image.thumbnail_score = new_score_with_redirects;
+
+ return true;
+}
+
+void TopSites::GetMostVisitedURLs(CancelableRequestConsumer* consumer,
+ GetTopSitesCallback* callback) {
+
+ scoped_refptr<CancelableRequest<GetTopSitesCallback> > request(
+ new CancelableRequest<GetTopSitesCallback>(callback));
+ // This ensures cancelation of requests when either the consumer or the
+ // provider is deleted. Deletion of requests is also guaranteed.
+ AddRequest(request, consumer);
+ if (waiting_for_results_) {
+ // A request came in before we have any top sites.
+ // We have to keep track of the requests ourselves.
+ pending_callbacks_.insert(request);
+ return;
+ }
+ if (request->canceled())
+ return;
+ request->ForwardResult(GetTopSitesCallback::TupleType(top_sites_));
+}
+
+bool TopSites::GetPageThumbnail(const GURL& url, RefCountedBytes** data) const {
+ std::map<GURL, Images>::const_iterator found = top_images_.find(url);
+ if (found == top_images_.end())
+ return false; // No thumbnail for this URL.
+
+ Images image = found->second;
+ *data = image.thumbnail.get();
+ return true;
+}
+
+void TopSites::UpdateMostVisited(MostVisitedURLList most_visited) {
+ DCHECK(ChromeThread::CurrentlyOn(ChromeThread::DB));
+ // TODO(brettw) filter for blacklist!
+
+ if (!top_sites_.empty()) {
+ std::vector<size_t> added; // Indices into most_visited.
+ std::vector<size_t> deleted; // Indices into top_sites_.
+ std::vector<size_t> moved; // Indices into most_visited.
+ DiffMostVisited(top_sites_, most_visited, &added, &deleted, &moved);
+
+ // #added == #deleted; #added + #moved = total.
+ last_num_urls_changed_ = added.size() + moved.size();
+
+ // Process the diff: delete from images and disk, add to disk.
+ // Delete all the thumbnails associated with URLs that were deleted.
+ for (size_t i = 0; i < deleted.size(); i++) {
+ const MostVisitedURL& deleted_url = top_sites_[deleted[i]];
+ std::map<GURL, Images>::iterator found =
+ top_images_.find(deleted_url.url);
+ if (found != top_images_.end())
+ top_images_.erase(found);
+
+ // Delete from disk.
+ if (db_.get())
+ db_->RemoveURL(deleted_url);
+ }
+
+ if (db_.get()) {
+ // Write both added and moved urls.
+ for (size_t i = 0; i < added.size(); i++) {
+ MostVisitedURL& added_url = most_visited[added[i]];
+ db_->SetPageThumbnail(added_url, added[i], Images());
+ }
+ for (size_t i = 0; i < moved.size(); i++) {
+ MostVisitedURL moved_url = most_visited[moved[i]];
+ db_->UpdatePageRank(moved_url, moved[i]);
+ }
+ }
+ }
+
+ StoreMostVisited(&most_visited);
+ if (migration_in_progress_) {
+ // Copy all thumnbails from the history service.
+ for (size_t i = 0; i < top_sites_.size(); i++) {
+ GURL& url = top_sites_[i].url;
+ Images& img = top_images_[url];
+ if (!img.thumbnail.get() || !img.thumbnail->size()) {
+ StartQueryForThumbnail(i);
+ }
+ }
+ }
+
+ // If we are not expecting any thumbnails, migration is done.
+ if (migration_in_progress_ && migration_pending_urls_.empty())
+ OnMigrationDone();
+
+ timer_.Stop();
+ timer_.Start(GetUpdateDelay(), this,
+ &TopSites::StartQueryForMostVisited);
+}
+
+void TopSites::OnMigrationDone() {
+ migration_in_progress_ = false;
+ HistoryService* hs = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
+ // |hs| may be null during unit tests.
+ if (!hs)
+ return;
+ hs->OnTopSitesReady();
+}
+
+void TopSites::AddTemporaryThumbnail(const GURL& url,
+ const RefCountedBytes* thumbnail,
+ const ThumbnailScore& score) {
+ Images& img = temp_thumbnails_map_[url];
+ img.thumbnail = const_cast<RefCountedBytes*>(thumbnail);
+ img.thumbnail_score = score;
+}
+
+void TopSites::StartQueryForThumbnail(size_t index) {
+ DCHECK(migration_in_progress_);
+ migration_pending_urls_.insert(top_sites_[index].url);
+
+ if (mock_history_service_) {
+ // Testing with a mockup.
+ // QueryMostVisitedURLs is not virtual, so we have to duplicate the code.
+ // This calls SetClientData.
+ mock_history_service_->GetPageThumbnail(
+ top_sites_[index].url,
+ &cancelable_consumer_,
+ NewCallback(this, &TopSites::OnThumbnailAvailable),
+ index);
+ return;
+ }
+
+ HistoryService* hs = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
+ // |hs| may be null during unit tests.
+ if (!hs)
+ return;
+ HistoryService::Handle handle =
+ hs->GetPageThumbnail(top_sites_[index].url,
+ &cancelable_consumer_,
+ NewCallback(this, &TopSites::OnThumbnailAvailable));
+ cancelable_consumer_.SetClientData(hs, handle, index);
+}
+
+void TopSites::StoreMostVisited(MostVisitedURLList* most_visited) {
+ DCHECK(ChromeThread::CurrentlyOn(ChromeThread::DB));
+ // Take ownership of the most visited data.
+ top_sites_.clear();
+ top_sites_.swap(*most_visited);
+ waiting_for_results_ = false;
+
+ // Save the redirect information for quickly mapping to the canonical URLs.
+ canonical_urls_.clear();
+ for (size_t i = 0; i < top_sites_.size(); i++) {
+ const MostVisitedURL& mv = top_sites_[i];
+ StoreRedirectChain(mv.redirects, i);
+
+ std::map<GURL, Images>::iterator it = temp_thumbnails_map_.begin();
+ GURL canonical_url = GetCanonicalURL(mv.url);
+ for (; it != temp_thumbnails_map_.end(); it++) {
+ // Must map all temp URLs to canonical ones.
+ // temp_thumbnails_map_ contains non-canonical URLs, because
+ // when we add a temp thumbnail, redirect chain is not known.
+ // This is slow, but temp_thumbnails_map_ should have very few URLs.
+ if (canonical_url == GetCanonicalURL(it->first)) {
+ SetPageThumbnail(mv.url, it->second.thumbnail,
+ it->second.thumbnail_score);
+ temp_thumbnails_map_.erase(it);
+ break;
+ }
+ }
+ }
+ if (top_sites_.size() >= kTopSitesNumber)
+ temp_thumbnails_map_.clear();
+}
+
+void TopSites::StoreRedirectChain(const RedirectList& redirects,
+ size_t destination) {
+ if (redirects.empty()) {
+ NOTREACHED();
+ return;
+ }
+
+ // Map all the redirected URLs to the destination.
+ for (size_t i = 0; i < redirects.size(); i++)
+ canonical_urls_[redirects[i]] = destination;
+}
+
+GURL TopSites::GetCanonicalURL(const GURL& url) const {
+ std::map<GURL, size_t>::const_iterator found = canonical_urls_.find(url);
+ if (found == canonical_urls_.end())
+ return GURL(); // Don't know anything about this URL.
+ return top_sites_[found->second].url;
+}
+
+// static
+int TopSites::GetRedirectDistanceForURL(const MostVisitedURL& most_visited,
+ const GURL& url) {
+ for (size_t i = 0; i < most_visited.redirects.size(); i++) {
+ if (most_visited.redirects[i] == url)
+ return static_cast<int>(most_visited.redirects.size() - i - 1);
+ }
+ NOTREACHED() << "URL should always be found.";
+ return 0;
+}
+
+// static
+void TopSites::DiffMostVisited(const MostVisitedURLList& old_list,
+ const MostVisitedURLList& new_list,
+ std::vector<size_t>* added_urls,
+ std::vector<size_t>* deleted_urls,
+ std::vector<size_t>* moved_urls) {
+ added_urls->clear();
+ deleted_urls->clear();
+ moved_urls->clear();
+
+ // Add all the old URLs for quick lookup. This maps URLs to the corresponding
+ // index in the input.
+ std::map<GURL, size_t> all_old_urls;
+ for (size_t i = 0; i < old_list.size(); i++)
+ all_old_urls[old_list[i].url] = i;
+
+ // Check all the URLs in the new set to see which ones are new or just moved.
+ // When we find a match in the old set, we'll reset its index to our special
+ // marker. This allows us to quickly identify the deleted ones in a later
+ // pass.
+ const size_t kAlreadyFoundMarker = static_cast<size_t>(-1);
+ for (size_t i = 0; i < new_list.size(); i++) {
+ std::map<GURL, size_t>::iterator found = all_old_urls.find(new_list[i].url);
+ if (found == all_old_urls.end()) {
+ added_urls->push_back(i);
+ } else {
+ if (found->second != i)
+ moved_urls->push_back(i);
+ found->second = kAlreadyFoundMarker;
+ }
+ }
+
+ // Any member without the special marker in the all_old_urls list means that
+ // there wasn't a "new" URL that mapped to it, so it was deleted.
+ for (std::map<GURL, size_t>::const_iterator i = all_old_urls.begin();
+ i != all_old_urls.end(); ++i) {
+ if (i->second != kAlreadyFoundMarker)
+ deleted_urls->push_back(i->second);
+ }
+}
+
+void TopSites::StartQueryForMostVisited() {
+ if (mock_history_service_) {
+ // Testing with a mockup.
+ // QueryMostVisitedURLs is not virtual, so we have to duplicate the code.
+ mock_history_service_->QueryMostVisitedURLs(
+ kTopSitesNumber,
+ kDaysOfHistory,
+ &cancelable_consumer_,
+ NewCallback(this, &TopSites::OnTopSitesAvailable));
+ } else {
+ HistoryService* hs = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
+ // |hs| may be null during unit tests.
+ if (hs) {
+ hs->QueryMostVisitedURLs(
+ kTopSitesNumber,
+ kDaysOfHistory,
+ &cancelable_consumer_,
+ NewCallback(this, &TopSites::OnTopSitesAvailable));
+ } else {
+ LOG(INFO) << "History Service not available.";
+ }
+ }
+}
+
+void TopSites::StartMigration() {
+ migration_in_progress_ = true;
+ StartQueryForMostVisited();
+}
+
+base::TimeDelta TopSites::GetUpdateDelay() {
+ if (top_sites_.size() == 0)
+ return base::TimeDelta::FromSeconds(30);
+
+ int64 range = kMaxUpdateIntervalMinutes - kMinUpdateIntervalMinutes;
+ int64 minutes = kMaxUpdateIntervalMinutes -
+ last_num_urls_changed_ * range / top_sites_.size();
+ return base::TimeDelta::FromMinutes(minutes);
+}
+
+void TopSites::OnTopSitesAvailable(
+ CancelableRequestProvider::Handle handle,
+ MostVisitedURLList pages) {
+ if (!pending_callbacks_.empty()) {
+ PendingCallbackSet copy(pending_callbacks_);
+ PendingCallbackSet::iterator i;
+ for (i = pending_callbacks_.begin();
+ i != pending_callbacks_.end(); ++i) {
+ scoped_refptr<CancelableRequest<GetTopSitesCallback> > request = *i;
+ if (!request->canceled())
+ request->ForwardResult(GetTopSitesCallback::TupleType(pages));
+ }
+ pending_callbacks_.clear();
+ }
+
+ ChromeThread::PostTask(ChromeThread::DB, FROM_HERE, NewRunnableMethod(
+ this, &TopSites::UpdateMostVisited, pages));
+}
+
+void TopSites::OnThumbnailAvailable(CancelableRequestProvider::Handle handle,
+ scoped_refptr<RefCountedBytes> thumbnail) {
+ size_t index;
+ if (mock_history_service_) {
+ index = handle;
+ } else {
+ HistoryService* hs = profile_ ->GetHistoryService(Profile::EXPLICIT_ACCESS);
+ index = cancelable_consumer_.GetClientData(hs, handle);
+ }
+ DCHECK(static_cast<size_t>(index) < top_sites_.size());
+
+ if (migration_in_progress_)
+ migration_pending_urls_.erase(top_sites_[index].url);
+
+ if (thumbnail.get() && thumbnail->size()) {
+ const MostVisitedURL& url = top_sites_[index];
+ SetPageThumbnail(url.url, thumbnail, ThumbnailScore());
+ }
+
+ if (migration_in_progress_ && migration_pending_urls_.empty() &&
+ !mock_history_service_)
+ OnMigrationDone();
+}
+
+void TopSites::SetMockHistoryService(MockHistoryService* mhs) {
+ mock_history_service_ = mhs;
+}
+
+void TopSites::Observe(NotificationType type,
+ const NotificationSource& source,
+ const NotificationDetails& details) {
+ if (type == NotificationType::HISTORY_URLS_DELETED) {
+ Details<history::URLsDeletedDetails> deleted_details(details);
+ if (deleted_details->all_history) {
+ top_sites_.clear();
+ ChromeThread::PostTask(ChromeThread::DB, FROM_HERE,
+ NewRunnableMethod(this, &TopSites::ResetDatabase));
+ } else {
+ std::set<GURL>::iterator it;
+ for (it = deleted_details->urls.begin();
+ it != deleted_details->urls.end(); ++it) {
+ for (size_t i = 0; i < top_sites_.size(); i++) {
+ if (top_sites_[i].url == *it) {
+ top_sites_.erase(top_sites_.begin() + i);
+ break;
+ }
+ }
+ }
+ }
+ StartQueryForMostVisited();
+ } else if (type == NotificationType::NAV_ENTRY_COMMITTED) {
+ if (top_sites_.size() < kTopSitesNumber) {
+ const NavigationController::LoadCommittedDetails& load_details =
+ *Details<NavigationController::LoadCommittedDetails>(details).ptr();
+ GURL url = load_details.entry->url();
+ if (canonical_urls_.find(url) == canonical_urls_.end() &&
+ HistoryService::CanAddURL(url)) {
+ // Add this page to the known pages in case the thumbnail comes
+ // in before we get the results.
+ MostVisitedURL mv;
+ mv.url = url;
+ mv.redirects.push_back(url);
+ top_sites_.push_back(mv);
+ size_t index = top_sites_.size() - 1;
+ StoreRedirectChain(top_sites_[index].redirects, index);
+ }
+ StartQueryForMostVisited();
+ }
+ }
+}
+
+void TopSites::ResetDatabase() {
+ DCHECK(ChromeThread::CurrentlyOn(ChromeThread::DB));
+ db_.reset(new TopSitesDatabaseImpl());
+ file_util::Delete(db_path_, false);
+ if (!db_->Init(db_path_)) {
+ NOTREACHED() << "Failed to initialize database.";
+ return;
+ }
+}
+
+} // namespace history
diff --git a/chrome/browser/history/top_sites.h b/chrome/browser/history/top_sites.h
new file mode 100644
index 0000000..4cc6059
--- /dev/null
+++ b/chrome/browser/history/top_sites.h
@@ -0,0 +1,281 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_TOP_SITES_H_
+#define CHROME_BROWSER_HISTORY_TOP_SITES_H_
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/gtest_prod_util.h"
+#include "base/lock.h"
+#include "base/timer.h"
+#include "base/ref_counted.h"
+#include "base/ref_counted_memory.h"
+#include "chrome/browser/cancelable_request.h"
+#include "chrome/browser/history/history_types.h"
+#include "chrome/browser/history/history.h"
+#include "chrome/browser/history/page_usage_data.h"
+#include "chrome/common/notification_service.h"
+#include "chrome/common/thumbnail_score.h"
+#include "googleurl/src/gurl.h"
+
+class SkBitmap;
+class Profile;
+
+namespace history {
+
+class TopSitesBackend;
+class TopSitesDatabase;
+class TopSitesTest;
+
+typedef std::vector<MostVisitedURL> MostVisitedURLList;
+
+// Stores the data for the top "most visited" sites. This includes a cache of
+// the most visited data from history, as well as the corresponding thumbnails
+// of those sites.
+//
+// This class IS threadsafe. It is designed to be used from the UI thread of
+// the browser (where history requests must be kicked off and received from)
+// and from the I/O thread (where new tab page requests come in). Handling the
+// new tab page requests on the I/O thread without proxying to the UI thread is
+// a nontrivial performance win, especially when the browser is starting and
+// the UI thread is busy.
+class TopSites : public NotificationObserver,
+ public base::RefCountedThreadSafe<TopSites>,
+ public CancelableRequestProvider {
+ public:
+ explicit TopSites(Profile* profile);
+
+ class MockHistoryService {
+ // A mockup of a HistoryService used for testing TopSites.
+ public:
+ virtual HistoryService::Handle QueryMostVisitedURLs(
+ int result_count, int days_back,
+ CancelableRequestConsumerBase* consumer,
+ HistoryService::QueryMostVisitedURLsCallback* callback) = 0;
+ virtual ~MockHistoryService() {}
+ virtual void GetPageThumbnail(
+ const GURL& page_url,
+ CancelableRequestConsumerTSimple<size_t>* consumer,
+ HistoryService::ThumbnailDataCallback* callback,
+ size_t index) = 0;
+ };
+
+ struct Images {
+ scoped_refptr<RefCountedBytes> thumbnail;
+ ThumbnailScore thumbnail_score;
+
+ // TODO(brettw): this will eventually store the favicon.
+ // scoped_refptr<RefCountedBytes> favicon;
+ };
+
+ // Initializes TopSites.
+ void Init(const FilePath& db_name);
+
+ // Sets the given thumbnail for the given URL. Returns true if the thumbnail
+ // was updated. False means either the URL wasn't known to us, or we felt
+ // that our current thumbnail was superior to the given one.
+ bool SetPageThumbnail(const GURL& url,
+ const SkBitmap& thumbnail,
+ const ThumbnailScore& score);
+
+ // Callback for GetMostVisitedURLs.
+ typedef Callback1<const MostVisitedURLList&>::Type GetTopSitesCallback;
+
+ // Returns a list of most visited URLs via a callback.
+ // NOTE: the callback may be called immediately if we have the data cached.
+ void GetMostVisitedURLs(CancelableRequestConsumer* consumer,
+ GetTopSitesCallback* callback);
+
+ // Get a thumbnail for a given page. Returns true iff we have the thumbnail.
+ bool GetPageThumbnail(const GURL& url, RefCountedBytes** data) const;
+
+ // For testing with a HistoryService mock.
+ void SetMockHistoryService(MockHistoryService* mhs);
+
+ // Start reading thumbnails from the ThumbnailDatabase.
+ void StartMigration();
+
+ private:
+ friend class base::RefCountedThreadSafe<TopSites>;
+ friend class TopSitesTest;
+ FRIEND_TEST_ALL_PREFIXES(TopSitesTest, GetMostVisited);
+ FRIEND_TEST_ALL_PREFIXES(TopSitesTest, RealDatabase);
+ FRIEND_TEST_ALL_PREFIXES(TopSitesTest, MockDatabase);
+ FRIEND_TEST_ALL_PREFIXES(TopSitesTest, DeleteNotifications);
+ FRIEND_TEST_ALL_PREFIXES(TopSitesTest, GetUpdateDelay);
+ FRIEND_TEST_ALL_PREFIXES(TopSitesTest, Migration);
+ FRIEND_TEST_ALL_PREFIXES(TopSitesTest, QueueingRequestsForTopSites);
+ FRIEND_TEST_ALL_PREFIXES(TopSitesTest, CancelingRequestsForTopSites);
+ FRIEND_TEST_ALL_PREFIXES(TopSitesTest, AddTemporaryThumbnail);
+
+ ~TopSites();
+
+ // Sets the thumbnail without writing to the database. Useful when
+ // reading last known top sites from the DB.
+ // Returns true if the thumbnail was set, false if the existing one is better.
+ bool SetPageThumbnailNoDB(const GURL& url,
+ const RefCountedBytes* thumbnail_data,
+ const ThumbnailScore& score);
+
+ // A version of SetPageThumbnail that takes RefCountedBytes as
+ // returned by HistoryService.
+ bool SetPageThumbnail(const GURL& url,
+ const RefCountedBytes* thumbnail,
+ const ThumbnailScore& score);
+
+ // Query history service for the list of available thumbnails.
+ void StartQueryForMostVisited();
+
+ // Query history service for the thumbnail for a given url. |index|
+ // is the index into top_sites_.
+ void StartQueryForThumbnail(size_t index);
+
+ // Called when history service returns a list of top URLs.
+ void OnTopSitesAvailable(CancelableRequestProvider::Handle handle,
+ MostVisitedURLList data);
+
+ // Called when history service returns a thumbnail.
+ void OnThumbnailAvailable(CancelableRequestProvider::Handle handle,
+ scoped_refptr<RefCountedBytes> thumbnail);
+
+ // Saves the set of the top URLs visited by this user. The 0th item is the
+ // most popular.
+ // DANGER! This will clear all data from the input argument.
+ void StoreMostVisited(MostVisitedURLList* most_visited);
+
+ // Saves the given set of redirects. The redirects are in order of the
+ // given vector, so [0] -> [1] -> [2].
+ void StoreRedirectChain(const RedirectList& redirects,
+ size_t destination);
+
+ // Each item in the most visited view can redirect elsewhere. This returns
+ // the canonical URL one identifying the site if the given URL does appear
+ // in the "top sites" list.
+ //
+ // If the given URL is not in the top sites, this will return an empty GURL.
+ GURL GetCanonicalURL(const GURL& url) const;
+
+ // Finds the given URL in the redirect chain for the given TopSite, and
+ // returns the distance from the destination in hops that the given URL is.
+ // The URL is assumed to be in the list. The destination is 0.
+ static int GetRedirectDistanceForURL(const MostVisitedURL& most_visited,
+ const GURL& url);
+
+ // Generates the diff of things that happened between "old" and "new."
+ //
+ // The URLs that are in "new" but not "old" will be have their index into
+ // "new" put in |added_urls|. The URLs that are in "old" but not "new" will
+ // have their index into "old" put into |deleted_urls|.
+ //
+ // URLs appearing in both old and new lists but having different indices will
+ // have their index into "new" be put into |moved_urls|.
+ static void DiffMostVisited(const MostVisitedURLList& old_list,
+ const MostVisitedURLList& new_list,
+ std::vector<size_t>* added_urls,
+ std::vector<size_t>* deleted_urls,
+ std::vector<size_t>* moved_urls);
+
+ // Implementation of NotificationObserver.
+ virtual void Observe(NotificationType type,
+ const NotificationSource& source,
+ const NotificationDetails& details);
+
+ // Returns the delay until the next update of history is needed.
+ // Uses num_urls_changed
+ base::TimeDelta GetUpdateDelay();
+
+ // The following methods must be run on the DB thread since they
+ // access the database.
+
+ // Reads the database from disk. Called on startup to get the last
+ // known top sites.
+ void ReadDatabase();
+
+ // Write a thumbnail to database.
+ void WriteThumbnailToDB(const MostVisitedURL& url,
+ int url_rank,
+ const TopSites::Images& thumbnail);
+
+ // Updates the top sites list and writes the difference to disk.
+ void UpdateMostVisited(MostVisitedURLList most_visited);
+
+ // Deletes the database file, then reinitializes the database.
+ void ResetDatabase();
+
+ // Called after TopSites completes migration.
+ void OnMigrationDone();
+
+ // Add a thumbnail for an unknown url. See temp_thumbnails_map_.
+ void AddTemporaryThumbnail(const GURL& url,
+ const RefCountedBytes* thumbnail,
+ const ThumbnailScore& score);
+
+ Profile* profile_;
+ // A mockup to use for testing. If NULL, use the real HistoryService
+ // from the profile_. See SetMockHistoryService.
+ MockHistoryService* mock_history_service_;
+ CancelableRequestConsumerTSimple<size_t> cancelable_consumer_;
+ mutable Lock lock_;
+
+ // The cached version of the top sites. The 0th item in this vector is the
+ // #1 site.
+ MostVisitedURLList top_sites_;
+
+ // The images corresponding to the top_sites. This is indexed by the URL of
+ // the top site, so this doesn't have to be shuffled around when the ordering
+ // changes of the top sites. Some top_sites_ entries may not have images.
+ std::map<GURL, Images> top_images_;
+
+ // Generated from the redirects to and from the most visited pages, this
+ // maps the redirects to the index into top_sites_ that contains it.
+ std::map<GURL, size_t> canonical_urls_;
+
+ // Timer for updating TopSites data.
+ base::OneShotTimer<TopSites> timer_;
+
+ scoped_ptr<TopSitesDatabase> db_;
+ FilePath db_path_;
+
+ NotificationRegistrar registrar_;
+
+ // The number of URLs changed on the last update.
+ size_t last_num_urls_changed_;
+
+ // Are we in the middle of migration from ThumbnailsDatabase to
+ // TopSites?
+ bool migration_in_progress_;
+
+ // URLs for which we are expecting thumbnails.
+ std::set<GURL> migration_pending_urls_;
+
+ // The map of requests for the top sites list. Can only be
+ // non-empty at startup. After we read the top sites from the DB, we'll
+ // always have a cached list.
+ typedef std::set<scoped_refptr<CancelableRequest<GetTopSitesCallback> > >
+ PendingCallbackSet;
+ PendingCallbackSet pending_callbacks_;
+
+ // Are we waiting for the top sites from HistoryService?
+ bool waiting_for_results_;
+
+ // Stores thumbnails for unknown pages. When SetPageThumbnail is
+ // called, if we don't know about that URL yet and we don't have
+ // enough Top Sites (new profile), we store it until the next
+ // UpdateMostVisitedURLs call.
+ std::map<GURL, Images> temp_thumbnails_map_;
+
+ // TODO(brettw): use the blacklist.
+ // std::set<GURL> blacklist_;
+
+ DISALLOW_COPY_AND_ASSIGN(TopSites);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_TOP_SITES_H_
diff --git a/chrome/browser/history/top_sites_database.cc b/chrome/browser/history/top_sites_database.cc
new file mode 100644
index 0000000..99f0bb4
--- /dev/null
+++ b/chrome/browser/history/top_sites_database.cc
@@ -0,0 +1,329 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "app/sql/transaction.h"
+#include "base/string_util.h"
+#include "chrome/browser/diagnostics/sqlite_diagnostics.h"
+#include "chrome/browser/history/top_sites.h"
+#include "chrome/browser/history/top_sites_database.h"
+
+namespace history {
+
+TopSitesDatabaseImpl::TopSitesDatabaseImpl() {
+}
+
+bool TopSitesDatabaseImpl::Init(const FilePath& db_name) {
+ // Settings copied from ThumbnailDatabase.
+ db_.set_error_delegate(GetErrorHandlerForThumbnailDb());
+ db_.set_page_size(4096);
+ db_.set_cache_size(64);
+
+ if (!db_.Open(db_name)) {
+ LOG(WARNING) << db_.GetErrorMessage();
+ return false;
+ }
+
+ return InitThumbnailTable();
+}
+
+bool TopSitesDatabaseImpl::InitThumbnailTable() {
+ if (!db_.DoesTableExist("thumbnails")) {
+ if (!db_.Execute("CREATE TABLE thumbnails ("
+ "url LONGVARCHAR PRIMARY KEY,"
+ "url_rank INTEGER ,"
+ "title LONGVARCHAR,"
+ "thumbnail BLOB,"
+ "redirects LONGVARCHAR,"
+ "boring_score DOUBLE DEFAULT 1.0, "
+ "good_clipping INTEGER DEFAULT 0, "
+ "at_top INTEGER DEFAULT 0, "
+ "last_updated INTEGER DEFAULT 0) ")) {
+ LOG(WARNING) << db_.GetErrorMessage();
+ return false;
+ }
+ }
+ return true;
+}
+
+void TopSitesDatabaseImpl::GetPageThumbnails(MostVisitedURLList* urls,
+ std::map<GURL,
+ TopSites::Images>* thumbnails) {
+ sql::Statement statement(db_.GetCachedStatement(
+ SQL_FROM_HERE,
+ "SELECT url, url_rank, title, thumbnail, redirects, "
+ "boring_score, good_clipping, at_top, last_updated "
+ "FROM thumbnails ORDER BY url_rank "));
+
+ if (!statement) {
+ LOG(WARNING) << db_.GetErrorMessage();
+ return;
+ }
+
+ urls->clear();
+ thumbnails->clear();
+
+ while (statement.Step()) {
+ // Results are sorted by url_rank.
+ MostVisitedURL url;
+ GURL gurl(statement.ColumnString(0));
+ url.url = gurl;
+ url.title = statement.ColumnString16(2);
+ std::string redirects = statement.ColumnString(4);
+ SetRedirects(redirects, &url);
+ urls->push_back(url);
+
+ std::vector<unsigned char> data;
+ statement.ColumnBlobAsVector(3, &data);
+ TopSites::Images thumbnail;
+ thumbnail.thumbnail = RefCountedBytes::TakeVector(&data);
+ thumbnail.thumbnail_score.boring_score = statement.ColumnDouble(5);
+ thumbnail.thumbnail_score.good_clipping = statement.ColumnBool(6);
+ thumbnail.thumbnail_score.at_top = statement.ColumnBool(7);
+ thumbnail.thumbnail_score.time_at_snapshot =
+ base::Time::FromInternalValue(statement.ColumnInt64(8));
+
+ (*thumbnails)[gurl] = thumbnail;
+ }
+}
+
+// static
+std::string TopSitesDatabaseImpl::GetRedirects(const MostVisitedURL& url) {
+ std::vector<std::string> redirects;
+ for (size_t i = 0; i < url.redirects.size(); i++)
+ redirects.push_back(url.redirects[i].spec());
+ return JoinString(redirects, ' ');
+}
+
+// static
+void TopSitesDatabaseImpl::SetRedirects(const std::string& redirects,
+ MostVisitedURL* url) {
+ std::vector<std::string> redirects_vector;
+ SplitStringAlongWhitespace(redirects, &redirects_vector);
+ for (size_t i = 0; i < redirects_vector.size(); i++)
+ url->redirects.push_back(GURL(redirects_vector[i]));
+}
+
+void TopSitesDatabaseImpl::SetPageThumbnail(const MostVisitedURL& url,
+ int new_rank,
+ const TopSites::Images& thumbnail) {
+ sql::Transaction transaction(&db_);
+ transaction.Begin();
+
+ int rank = GetURLRank(url);
+ if (rank == -1) {
+ AddPageThumbnail(url, new_rank, thumbnail);
+ } else {
+ UpdatePageRankNoTransaction(url, new_rank);
+ UpdatePageThumbnail(url, thumbnail);
+ }
+
+ transaction.Commit();
+}
+
+void TopSitesDatabaseImpl::UpdatePageThumbnail(
+ const MostVisitedURL& url, const TopSites::Images& thumbnail) {
+ sql::Statement statement(db_.GetCachedStatement(
+ SQL_FROM_HERE,
+ "UPDATE thumbnails SET "
+ "title = ?, thumbnail = ?, redirects = ?, "
+ "boring_score = ?, good_clipping = ?, at_top = ?, last_updated = ? "
+ "WHERE url = ? "));
+ if (!statement)
+ return;
+
+ statement.BindString16(0, url.title);
+ if (thumbnail.thumbnail.get()) {
+ statement.BindBlob(1, &thumbnail.thumbnail->data.front(),
+ static_cast<int>(thumbnail.thumbnail->data.size()));
+ }
+ statement.BindString(2, GetRedirects(url));
+ const ThumbnailScore& score = thumbnail.thumbnail_score;
+ statement.BindDouble(3, score.boring_score);
+ statement.BindBool(4, score.good_clipping);
+ statement.BindBool(5, score.at_top);
+ statement.BindInt64(6, score.time_at_snapshot.ToInternalValue());
+ statement.BindString(7, url.url.spec());
+ if (!statement.Run())
+ NOTREACHED() << db_.GetErrorMessage();
+}
+
+void TopSitesDatabaseImpl::AddPageThumbnail(const MostVisitedURL& url,
+ int new_rank,
+ const TopSites::Images& thumbnail) {
+ int count = GetRowCount();
+
+ sql::Statement statement(db_.GetCachedStatement(
+ SQL_FROM_HERE,
+ "INSERT OR REPLACE INTO thumbnails "
+ "(url, url_rank, title, thumbnail, redirects, "
+ "boring_score, good_clipping, at_top, last_updated) "
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"));
+ if (!statement)
+ return;
+
+ statement.BindString(0, url.url.spec());
+ statement.BindInt(1, count); // Make it the last url.
+ statement.BindString16(2, url.title);
+ if (thumbnail.thumbnail.get()) {
+ statement.BindBlob(3, &thumbnail.thumbnail->data.front(),
+ static_cast<int>(thumbnail.thumbnail->data.size()));
+ }
+ statement.BindString(4, GetRedirects(url));
+ const ThumbnailScore& score = thumbnail.thumbnail_score;
+ statement.BindDouble(5, score.boring_score);
+ statement.BindBool(6, score.good_clipping);
+ statement.BindBool(7, score.at_top);
+ statement.BindInt64(8, score.time_at_snapshot.ToInternalValue());
+ if (!statement.Run())
+ NOTREACHED() << db_.GetErrorMessage();
+
+ UpdatePageRankNoTransaction(url, new_rank);
+}
+
+void TopSitesDatabaseImpl::UpdatePageRank(const MostVisitedURL& url,
+ int new_rank) {
+ sql::Transaction transaction(&db_);
+ transaction.Begin();
+ UpdatePageRankNoTransaction(url, new_rank);
+ transaction.Commit();
+}
+
+// Caller should have a transaction open.
+void TopSitesDatabaseImpl::UpdatePageRankNoTransaction(
+ const MostVisitedURL& url, int new_rank) {
+ int prev_rank = GetURLRank(url);
+ if (prev_rank == -1) {
+ NOTREACHED() << "Updating rank of an unknown URL: " << url.url.spec();
+ return;
+ }
+
+ // Shift the ranks.
+ if (prev_rank > new_rank) {
+ // Shift up
+ sql::Statement shift_statement(db_.GetCachedStatement(
+ SQL_FROM_HERE,
+ "UPDATE thumbnails "
+ "SET url_rank = url_rank + 1 "
+ "WHERE url_rank >= ? AND url_rank < ?"));
+ shift_statement.BindInt(0, new_rank);
+ shift_statement.BindInt(1, prev_rank);
+ if (shift_statement)
+ shift_statement.Run();
+ } else if (prev_rank < new_rank) {
+ // Shift down
+ sql::Statement shift_statement(db_.GetCachedStatement(
+ SQL_FROM_HERE,
+ "UPDATE thumbnails "
+ "SET url_rank = url_rank - 1 "
+ "WHERE url_rank > ? AND url_rank <= ?"));
+ shift_statement.BindInt(0, prev_rank);
+ shift_statement.BindInt(1, new_rank);
+ if (shift_statement)
+ shift_statement.Run();
+ }
+
+ // Set the url's rank.
+ sql::Statement set_statement(db_.GetCachedStatement(
+ SQL_FROM_HERE,
+ "UPDATE thumbnails "
+ "SET url_rank = ? "
+ "WHERE url == ?"));
+ set_statement.BindInt(0, new_rank);
+ set_statement.BindString(1, url.url.spec());
+ if (set_statement)
+ set_statement.Run();
+}
+
+bool TopSitesDatabaseImpl::GetPageThumbnail(const GURL& url,
+ TopSites::Images* thumbnail) {
+ sql::Statement statement(db_.GetCachedStatement(
+ SQL_FROM_HERE,
+ "SELECT thumbnail, boring_score, good_clipping, at_top, last_updated "
+ "FROM thumbnails WHERE url=?"));
+
+ if (!statement) {
+ LOG(WARNING) << db_.GetErrorMessage();
+ return false;
+ }
+
+ statement.BindString(0, url.spec());
+ if (!statement.Step())
+ return false;
+
+ std::vector<unsigned char> data;
+ statement.ColumnBlobAsVector(0, &data);
+ thumbnail->thumbnail = RefCountedBytes::TakeVector(&data);
+ thumbnail->thumbnail_score.boring_score = statement.ColumnDouble(1);
+ thumbnail->thumbnail_score.good_clipping = statement.ColumnBool(2);
+ thumbnail->thumbnail_score.at_top = statement.ColumnBool(3);
+ thumbnail->thumbnail_score.time_at_snapshot =
+ base::Time::FromInternalValue(statement.ColumnInt64(4));
+ return true;
+}
+
+int TopSitesDatabaseImpl::GetRowCount() {
+ int result = 0;
+ sql::Statement select_statement(db_.GetCachedStatement(
+ SQL_FROM_HERE,
+ "SELECT COUNT (url) FROM thumbnails"));
+ if (!select_statement) {
+ LOG(WARNING) << db_.GetErrorMessage();
+ return result;
+ }
+
+ if (select_statement.Step())
+ result = select_statement.ColumnInt(0);
+
+ return result;
+}
+
+int TopSitesDatabaseImpl::GetURLRank(const MostVisitedURL& url) {
+ int result = -1;
+ sql::Statement select_statement(db_.GetCachedStatement(
+ SQL_FROM_HERE,
+ "SELECT url_rank "
+ "FROM thumbnails WHERE url=?"));
+ if (!select_statement) {
+ LOG(WARNING) << db_.GetErrorMessage();
+ return result;
+ }
+
+ select_statement.BindString(0, url.url.spec());
+ if (select_statement.Step())
+ result = select_statement.ColumnInt(0);
+
+ return result;
+}
+
+// Remove the record for this URL. Returns true iff removed successfully.
+bool TopSitesDatabaseImpl::RemoveURL(const MostVisitedURL& url) {
+ int old_rank = GetURLRank(url);
+ if (old_rank < 0)
+ return false;
+
+ sql::Transaction transaction(&db_);
+ transaction.Begin();
+ // Decrement all following ranks.
+ sql::Statement shift_statement(db_.GetCachedStatement(
+ SQL_FROM_HERE,
+ "UPDATE thumbnails "
+ "SET url_rank = url_rank - 1 "
+ "WHERE url_rank > ?"));
+ if (!shift_statement)
+ return false;
+ shift_statement.BindInt(0, old_rank);
+ shift_statement.Run();
+
+ sql::Statement delete_statement(
+ db_.GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM thumbnails WHERE url = ?"));
+ if (!delete_statement)
+ return false;
+ delete_statement.BindString(0, url.url.spec());
+ delete_statement.Run();
+
+ return transaction.Commit();
+}
+
+} // namespace history
diff --git a/chrome/browser/history/top_sites_database.h b/chrome/browser/history/top_sites_database.h
new file mode 100644
index 0000000..cfb362c
--- /dev/null
+++ b/chrome/browser/history/top_sites_database.h
@@ -0,0 +1,136 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_TOP_SITES_DATABASE_H_
+#define CHROME_BROWSER_HISTORY_TOP_SITES_DATABASE_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "app/sql/connection.h"
+#include "base/ref_counted.h"
+#include "chrome/browser/history/history_types.h"
+#include "chrome/browser/history/url_database.h" // For DBCloseScoper.
+
+class FilePath;
+class RefCountedMemory;
+class SkBitmap;
+class TopSites;
+
+namespace base {
+class Time;
+}
+
+namespace history {
+
+// Interface to be implemented by the real storage layer as well as
+// the mockup database for testing.
+class TopSitesDatabase {
+ public:
+ virtual ~TopSitesDatabase() {}
+ virtual bool Init(const FilePath& filename) {
+ return true;
+ }
+
+ // Returns a list of all URLs currently in the table.
+ virtual void GetPageThumbnails(MostVisitedURLList* urls,
+ std::map<GURL,
+ TopSites::Images>* thumbnails) = 0;
+
+ // Set a thumbnail for a URL. |url_rank| is the position of the URL
+ // in the list of TopURLs, zero-based.
+ // If the URL is not in the table, add it. If it is, replace its
+ // thumbnail.
+ virtual void SetPageThumbnail(const MostVisitedURL& url,
+ int url_rank,
+ const TopSites::Images& thumbnail) = 0;
+
+ // Update rank of a URL that's already in the database.
+ virtual void UpdatePageRank(const MostVisitedURL& url, int new_rank) = 0;
+
+ // Convenience wrapper.
+ bool GetPageThumbnail(const MostVisitedURL& url,
+ TopSites::Images* thumbnail) {
+ return GetPageThumbnail(url.url, thumbnail);
+ }
+
+ // Get a thumbnail for a given page. Returns true iff we have the thumbnail.
+ virtual bool GetPageThumbnail(const GURL& url,
+ TopSites::Images* thumbnail) = 0;
+
+ // Remove the record for this URL. Returns true iff removed successfully.
+ virtual bool RemoveURL(const MostVisitedURL& url) = 0;
+};
+
+class TopSitesDatabaseImpl : public TopSitesDatabase {
+ public:
+ TopSitesDatabaseImpl();
+ ~TopSitesDatabaseImpl() {}
+
+ // Must be called after creation but before any other methods are called.
+ // Returns true on success. If false, no other functions should be called.
+ virtual bool Init(const FilePath& db_name);
+
+ // Thumbnails ----------------------------------------------------------------
+
+ // Returns a list of all URLs currently in the table.
+ // WARNING: clears both input arguments.
+ virtual void GetPageThumbnails(MostVisitedURLList* urls,
+ std::map<GURL, TopSites::Images>* thumbnails);
+
+ // Set a thumbnail for a URL. |url_rank| is the position of the URL
+ // in the list of TopURLs, zero-based.
+ // If the URL is not in the table, add it. If it is, replace its
+ // thumbnail and rank. Shift the ranks of other URLs if necessary.
+ virtual void SetPageThumbnail(const MostVisitedURL& url,
+ int new_rank,
+ const TopSites::Images& thumbnail);
+
+ // Sets the rank for a given URL. The URL must be in the database.
+ // Use SetPageThumbnail if it's not.
+ virtual void UpdatePageRank(const MostVisitedURL& url, int new_rank);
+
+ // Get a thumbnail for a given page. Returns true iff we have the thumbnail.
+ virtual bool GetPageThumbnail(const GURL& url,
+ TopSites::Images* thumbnail);
+
+ // Remove the record for this URL. Returns true iff removed successfully.
+ virtual bool RemoveURL(const MostVisitedURL& url);
+
+ private:
+ // Creates the thumbnail table, returning true if the table already exists
+ // or was successfully created.
+ bool InitThumbnailTable();
+
+ // Adds a new URL to the database.
+ void AddPageThumbnail(const MostVisitedURL& url,
+ int new_rank,
+ const TopSites::Images& thumbnail);
+
+ // Sets the page rank. Should be called within an open transaction.
+ void UpdatePageRankNoTransaction(const MostVisitedURL& url, int new_rank);
+
+ // Updates thumbnail of a URL that's already in the database.
+ void UpdatePageThumbnail(const MostVisitedURL& url,
+ const TopSites::Images& thumbnail);
+
+ // Returns the URL's current rank or -1 if it is not present.
+ int GetURLRank(const MostVisitedURL& url);
+
+ // Returns the number of URLs (rows) in the database.
+ int GetRowCount();
+
+ // Encodes redirects into a string.
+ static std::string GetRedirects(const MostVisitedURL& url);
+
+ // Decodes redirects from a string and sets them for the url.
+ static void SetRedirects(const std::string& redirects, MostVisitedURL* url);
+
+ sql::Connection db_;
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_TOP_SITES_DATABASE_H_
diff --git a/chrome/browser/history/top_sites_unittest.cc b/chrome/browser/history/top_sites_unittest.cc
new file mode 100644
index 0000000..a6b7e7b
--- /dev/null
+++ b/chrome/browser/history/top_sites_unittest.cc
@@ -0,0 +1,950 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/scoped_temp_dir.h"
+#include "base/string_util.h"
+#include "chrome/browser/history/top_sites.h"
+#include "chrome/common/chrome_paths.h"
+#include "chrome/browser/history/history_marshaling.h"
+#include "chrome/browser/history/top_sites_database.h"
+#include "chrome/browser/history/history_notifications.h"
+#include "chrome/test/testing_profile.h"
+#include "chrome/tools/profiles/thumbnail-inl.h"
+#include "gfx/codec/jpeg_codec.h"
+#include "googleurl/src/gurl.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/skia/include/core/SkBitmap.h"
+
+
+namespace history {
+
+static const unsigned char kBlob[] =
+ "12346102356120394751634516591348710478123649165419234519234512349134";
+
+class TopSitesTest : public testing::Test {
+ public:
+ TopSitesTest() : number_of_callbacks_(0) {
+ }
+ ~TopSitesTest() {
+ }
+
+ TopSites& top_sites() { return *top_sites_; }
+ MostVisitedURLList& urls() { return urls_; }
+ Profile& profile() {return *profile_;}
+ FilePath& file_name() { return file_name_; }
+ RefCountedBytes* google_thumbnail() { return google_thumbnail_; }
+ RefCountedBytes* random_thumbnail() { return random_thumbnail_; }
+ RefCountedBytes* weewar_thumbnail() { return weewar_thumbnail_; }
+ CancelableRequestConsumer* consumer() { return &consumer_; }
+ size_t number_of_callbacks() {return number_of_callbacks_; }
+
+ virtual void SetUp() {
+ profile_.reset(new TestingProfile);
+ top_sites_ = new TopSites(profile_.get());
+
+ ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
+ file_name_ = temp_dir_.path().AppendASCII("TopSites.db");
+ EXPECT_TRUE(file_util::Delete(file_name_, false));
+
+ std::vector<unsigned char> random_data(kBlob, kBlob + sizeof(kBlob));
+ std::vector<unsigned char> google_data(kGoogleThumbnail,
+ kGoogleThumbnail +
+ sizeof(kGoogleThumbnail));
+ std::vector<unsigned char> weewar_data(kWeewarThumbnail,
+ kWeewarThumbnail +
+ sizeof(kWeewarThumbnail));
+ random_thumbnail_ = new RefCountedBytes(random_data);
+ google_thumbnail_ = new RefCountedBytes(google_data);
+ weewar_thumbnail_ = new RefCountedBytes(weewar_data);
+ }
+
+ virtual void TearDown() {
+ profile_.reset();
+ top_sites_ = NULL;
+ EXPECT_TRUE(file_util::Delete(file_name_, false));
+ }
+
+ // Callback for TopSites::GetMostVisitedURLs.
+ void OnTopSitesAvailable(const history::MostVisitedURLList& data) {
+ urls_ = data;
+ number_of_callbacks_++;
+ }
+
+ // Wrappers that allow private TopSites functions to be called from the
+ // individual tests without making them all be friends.
+ GURL GetCanonicalURL(const GURL& url) const {
+ AutoLock lock(top_sites_->lock_); // The function asserts it's in the lock.
+ return top_sites_->GetCanonicalURL(url);
+ }
+
+ void StoreMostVisited(std::vector<MostVisitedURL>* urls) {
+ AutoLock lock(top_sites_->lock_); // The function asserts it's in the lock.
+ top_sites_->StoreMostVisited(urls);
+ }
+
+ static void DiffMostVisited(const std::vector<MostVisitedURL>& old_list,
+ const std::vector<MostVisitedURL>& new_list,
+ std::vector<size_t>* added_urls,
+ std::vector<size_t>* deleted_urls,
+ std::vector<size_t>* moved_urls) {
+ TopSites::DiffMostVisited(old_list, new_list,
+ added_urls, deleted_urls, moved_urls);
+ }
+
+ private:
+ scoped_refptr<TopSites> top_sites_;
+ MostVisitedURLList urls_;
+ size_t number_of_callbacks_;
+ scoped_ptr<TestingProfile> profile_;
+ ScopedTempDir temp_dir_;
+ FilePath file_name_; // Database filename.
+ scoped_refptr<RefCountedBytes> google_thumbnail_;
+ scoped_refptr<RefCountedBytes> random_thumbnail_;
+ scoped_refptr<RefCountedBytes> weewar_thumbnail_;
+ MessageLoop message_loop_;
+ CancelableRequestConsumer consumer_;
+
+ DISALLOW_COPY_AND_ASSIGN(TopSitesTest);
+};
+
+// A mockup of a HistoryService used for testing TopSites.
+class MockHistoryServiceImpl : public TopSites::MockHistoryService {
+ public:
+ MockHistoryServiceImpl() : num_thumbnail_requests_(0) {}
+
+ // Calls the callback directly with the results.
+ HistoryService::Handle QueryMostVisitedURLs(
+ int result_count, int days_back,
+ CancelableRequestConsumerBase* consumer,
+ HistoryService::QueryMostVisitedURLsCallback* callback) {
+ callback->Run(CancelableRequestProvider::Handle(0), // Handle is unused.
+ most_visited_urls_);
+ delete callback;
+ return 0;
+ }
+
+ // Add a page to the end of the pages list.
+ void AppendMockPage(const GURL& url,
+ const string16& title) {
+ MostVisitedURL page;
+ page.url = url;
+ page.title = title;
+ page.redirects = RedirectList();
+ page.redirects.push_back(url);
+ most_visited_urls_.push_back(page);
+ }
+
+ // Removes the last URL in the list.
+ void RemoveMostVisitedURL() {
+ most_visited_urls_.pop_back();
+ }
+
+ virtual void GetPageThumbnail(
+ const GURL& url,
+ CancelableRequestConsumerTSimple<size_t>* consumer,
+ HistoryService::ThumbnailDataCallback* callback,
+ size_t index) {
+ num_thumbnail_requests_++;
+ MostVisitedURL mvu;
+ mvu.url = url;
+ MostVisitedURLList::iterator pos = std::find(most_visited_urls_.begin(),
+ most_visited_urls_.end(),
+ mvu);
+ EXPECT_TRUE(pos != most_visited_urls_.end());
+ scoped_refptr<RefCountedBytes> thumbnail;
+ callback->Run(index, thumbnail);
+ delete callback;
+ }
+
+ void ResetNumberOfThumbnailRequests() {
+ num_thumbnail_requests_ = 0;
+ }
+
+ int GetNumberOfThumbnailRequests() {
+ return num_thumbnail_requests_;
+ }
+
+ private:
+ MostVisitedURLList most_visited_urls_;
+ int num_thumbnail_requests_; // Number of calls to GetPageThumbnail.
+};
+
+
+// A mockup of a TopSitesDatabase used for testing TopSites.
+class MockTopSitesDatabaseImpl : public TopSitesDatabase {
+ public:
+ virtual void GetPageThumbnails(MostVisitedURLList* urls,
+ std::map<GURL, TopSites::Images>* thumbnails) {
+ // Return a copy of the vector.
+ *urls = top_sites_list_;
+ *thumbnails = thumbnails_map_;
+ }
+
+ virtual void SetPageThumbnail(const MostVisitedURL& url, int url_rank,
+ const TopSites::Images& thumbnail) {
+ SetPageRank(url, url_rank);
+ // Update thubmnail
+ thumbnails_map_[url.url] = thumbnail;
+ }
+
+ virtual void UpdatePageRank(const MostVisitedURL& url, int new_rank) {
+ MostVisitedURLList::iterator pos = std::find(top_sites_list_.begin(),
+ top_sites_list_.end(),
+ url);
+ // Is it in the right position?
+ int rank = pos - top_sites_list_.begin();
+ if (rank != new_rank) {
+ // Move the URL to a new position.
+ top_sites_list_.erase(pos);
+ top_sites_list_.insert(top_sites_list_.begin() + new_rank, url);
+ }
+ }
+
+ virtual void SetPageRank(const MostVisitedURL& url, int url_rank) {
+ // Check if this url is in the list, and at which position.
+ MostVisitedURLList::iterator pos = std::find(top_sites_list_.begin(),
+ top_sites_list_.end(),
+ url);
+ if (pos == top_sites_list_.end()) {
+ // Add it to the list.
+ top_sites_list_.insert(top_sites_list_.begin() + url_rank, url);
+ } else {
+ UpdatePageRank(url, url_rank);
+ }
+ }
+
+ // Get a thumbnail for a given page. Returns true iff we have the thumbnail.
+ virtual bool GetPageThumbnail(const GURL& url,
+ TopSites::Images* thumbnail) {
+ std::map<GURL, TopSites::Images>::const_iterator found =
+ thumbnails_map_.find(url);
+ if (found == thumbnails_map_.end())
+ return false; // No thumbnail for this URL.
+
+ thumbnail->thumbnail = found->second.thumbnail;
+ thumbnail->thumbnail_score = found->second.thumbnail_score;
+ return true;
+ }
+
+ virtual bool RemoveURL(const MostVisitedURL& url) {
+ // Comparison by url.
+ MostVisitedURLList::iterator pos = std::find(top_sites_list_.begin(),
+ top_sites_list_.end(),
+ url);
+ if (pos == top_sites_list_.end()) {
+ return false;
+ }
+ top_sites_list_.erase(pos);
+ thumbnails_map_.erase(url.url);
+ return true;
+ }
+
+ private:
+ MostVisitedURLList top_sites_list_; // Keeps the URLs sorted by score (rank).
+ std::map<GURL, TopSites::Images> thumbnails_map_;
+};
+
+
+// Helper function for appending a URL to a vector of "most visited" URLs,
+// using the default values for everything but the URL.
+static void AppendMostVisitedURL(std::vector<MostVisitedURL>* list,
+ const GURL& url) {
+ MostVisitedURL mv;
+ mv.url = url;
+ mv.redirects.push_back(url);
+ list->push_back(mv);
+}
+
+// Returns true if t1 and t2 contain the same data.
+static bool ThumbnailsAreEqual(RefCountedBytes* t1,
+ RefCountedBytes* t2) {
+ if (!t1 || !t2)
+ return false;
+ if (t1->data.size() != t2->data.size())
+ return false;
+ return std::equal(t1->data.begin(),
+ t1->data.end(),
+ t2->data.begin());
+}
+
+// Same as AppendMostVisitedURL except that it adds a redirect from the first
+// URL to the second.
+static void AppendMostVisitedURLWithRedirect(
+ std::vector<MostVisitedURL>* list,
+ const GURL& redirect_source, const GURL& redirect_dest) {
+ MostVisitedURL mv;
+ mv.url = redirect_dest;
+ mv.redirects.push_back(redirect_source);
+ mv.redirects.push_back(redirect_dest);
+ list->push_back(mv);
+}
+
+TEST_F(TopSitesTest, GetCanonicalURL) {
+ ChromeThread db_loop(ChromeThread::DB, MessageLoop::current());
+ // Have two chains:
+ // google.com -> www.google.com
+ // news.google.com (no redirects)
+ GURL news("http://news.google.com/");
+ GURL source("http://google.com/");
+ GURL dest("http://www.google.com/");
+
+ std::vector<MostVisitedURL> most_visited;
+ AppendMostVisitedURLWithRedirect(&most_visited, source, dest);
+ AppendMostVisitedURL(&most_visited, news);
+ StoreMostVisited(&most_visited);
+
+ // Random URLs not in the database shouldn't be reported as being in there.
+ GURL result = GetCanonicalURL(GURL("http://fark.com/"));
+ EXPECT_TRUE(result.is_empty());
+
+ // Easy case, there are no redirects and the exact URL is stored.
+ result = GetCanonicalURL(news);
+ EXPECT_EQ(news, result);
+
+ // The URL in question is the source URL in a redirect list.
+ result = GetCanonicalURL(source);
+ EXPECT_EQ(dest, result);
+
+ // The URL in question is the destination of a redirect.
+ result = GetCanonicalURL(dest);
+ EXPECT_EQ(dest, result);
+}
+
+TEST_F(TopSitesTest, DiffMostVisited) {
+ GURL stays_the_same("http://staysthesame/");
+ GURL gets_added_1("http://getsadded1/");
+ GURL gets_added_2("http://getsadded2/");
+ GURL gets_deleted_1("http://getsdeleted2/");
+ GURL gets_moved_1("http://getsmoved1/");
+
+ std::vector<MostVisitedURL> old_list;
+ AppendMostVisitedURL(&old_list, stays_the_same); // 0 (unchanged)
+ AppendMostVisitedURL(&old_list, gets_deleted_1); // 1 (deleted)
+ AppendMostVisitedURL(&old_list, gets_moved_1); // 2 (moved to 3)
+
+ std::vector<MostVisitedURL> new_list;
+ AppendMostVisitedURL(&new_list, stays_the_same); // 0 (unchanged)
+ AppendMostVisitedURL(&new_list, gets_added_1); // 1 (added)
+ AppendMostVisitedURL(&new_list, gets_added_2); // 2 (added)
+ AppendMostVisitedURL(&new_list, gets_moved_1); // 3 (moved from 2)
+
+ std::vector<size_t> added;
+ std::vector<size_t> deleted;
+ std::vector<size_t> moved;
+ DiffMostVisited(old_list, new_list, &added, &deleted, &moved);
+
+ ASSERT_EQ(2u, added.size());
+ ASSERT_EQ(1u, deleted.size());
+ ASSERT_EQ(1u, moved.size());
+
+ // There should be 2 URLs added, we don't assume what order they're in inside
+ // the result vector.
+ EXPECT_TRUE(added[0] == 1 || added[1] == 1);
+ EXPECT_TRUE(added[0] == 2 || added[1] == 2);
+
+ EXPECT_EQ(1u, deleted[0]);
+ EXPECT_EQ(3u, moved[0]);
+}
+
+TEST_F(TopSitesTest, SetPageThumbnail) {
+ ChromeThread db_loop(ChromeThread::DB, MessageLoop::current());
+ GURL url1a("http://google.com/");
+ GURL url1b("http://www.google.com/");
+ GURL url2("http://images.google.com/");
+ GURL invalid_url("chrome://favicon/http://google.com/");
+
+ std::vector<MostVisitedURL> list;
+ AppendMostVisitedURL(&list, url2);
+
+ MostVisitedURL mv;
+ mv.url = url1b;
+ mv.redirects.push_back(url1a);
+ mv.redirects.push_back(url1b);
+ list.push_back(mv);
+
+ // Save our most visited data containing that one site.
+ StoreMostVisited(&list);
+
+ // Create a dummy thumbnail.
+ SkBitmap thumbnail;
+ thumbnail.setConfig(SkBitmap::kARGB_8888_Config, 4, 4);
+ thumbnail.allocPixels();
+ thumbnail.eraseRGB(0x00, 0x00, 0x00);
+
+ base::Time now = base::Time::Now();
+ ThumbnailScore low_score(1.0, true, true, now);
+ ThumbnailScore medium_score(0.5, true, true, now);
+ ThumbnailScore high_score(0.0, true, true, now);
+
+ // Setting the thumbnail for invalid pages should fail.
+ EXPECT_FALSE(top_sites().SetPageThumbnail(invalid_url,
+ thumbnail, medium_score));
+
+ // Setting the thumbnail for url2 should succeed, lower scores shouldn't
+ // replace it, higher scores should.
+ EXPECT_TRUE(top_sites().SetPageThumbnail(url2, thumbnail, medium_score));
+ EXPECT_FALSE(top_sites().SetPageThumbnail(url2, thumbnail, low_score));
+ EXPECT_TRUE(top_sites().SetPageThumbnail(url2, thumbnail, high_score));
+
+ // Set on the redirect source should succeed. It should be replacable by
+ // the same score on the redirect destination, which in turn should not
+ // be replaced by the source again.
+ EXPECT_TRUE(top_sites().SetPageThumbnail(url1a, thumbnail, medium_score));
+ EXPECT_TRUE(top_sites().SetPageThumbnail(url1b, thumbnail, medium_score));
+ EXPECT_FALSE(top_sites().SetPageThumbnail(url1a, thumbnail, medium_score));
+}
+
+TEST_F(TopSitesTest, GetMostVisited) {
+ ChromeThread db_loop(ChromeThread::DB, MessageLoop::current());
+ GURL news("http://news.google.com/");
+ GURL google("http://google.com/");
+
+ MockHistoryServiceImpl hs;
+ hs.AppendMockPage(news, ASCIIToUTF16("Google News"));
+ hs.AppendMockPage(google, ASCIIToUTF16("Google"));
+ top_sites().SetMockHistoryService(&hs);
+
+ top_sites().StartQueryForMostVisited();
+ MessageLoop::current()->RunAllPending();
+ top_sites().GetMostVisitedURLs(
+ consumer(),
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+ ASSERT_EQ(2u, urls().size());
+ EXPECT_EQ(news, urls()[0].url);
+ EXPECT_EQ(google, urls()[1].url);
+}
+
+TEST_F(TopSitesTest, MockDatabase) {
+ ChromeThread db_loop(ChromeThread::DB, MessageLoop::current());
+ MockTopSitesDatabaseImpl* db = new MockTopSitesDatabaseImpl;
+ // |db| is destroyed when the top_sites is destroyed in TearDown.
+ top_sites().db_.reset(db);
+ MostVisitedURL url;
+ GURL asdf_url("http://asdf.com");
+ string16 asdf_title(ASCIIToUTF16("ASDF"));
+ GURL google_url("http://google.com");
+ string16 google_title(ASCIIToUTF16("Google"));
+ GURL news_url("http://news.google.com");
+ string16 news_title(ASCIIToUTF16("Google News"));
+
+ url.url = asdf_url;
+ url.title = asdf_title;
+ url.redirects.push_back(url.url);
+ TopSites::Images thumbnail;
+ db->SetPageThumbnail(url, 0, thumbnail);
+
+ top_sites().ReadDatabase();
+
+ top_sites().GetMostVisitedURLs(
+ consumer(),
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+ ASSERT_EQ(1u, urls().size());
+ EXPECT_EQ(asdf_url, urls()[0].url);
+ EXPECT_EQ(asdf_title, urls()[0].title);
+
+ MostVisitedURL url2;
+ url2.url = google_url;
+ url2.title = google_title;
+ url2.redirects.push_back(url2.url);
+
+ // Add new thumbnail at rank 0 and shift the other result to 1.
+ db->SetPageThumbnail(url2, 0, thumbnail);
+
+ top_sites().ReadDatabase();
+
+ top_sites().GetMostVisitedURLs(
+ consumer(),
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+ ASSERT_EQ(2u, urls().size());
+ EXPECT_EQ(google_url, urls()[0].url);
+ EXPECT_EQ(google_title, urls()[0].title);
+ EXPECT_EQ(asdf_url, urls()[1].url);
+ EXPECT_EQ(asdf_title, urls()[1].title);
+
+ MockHistoryServiceImpl hs;
+ // Add one old, one new URL to the history.
+ hs.AppendMockPage(google_url, google_title);
+ hs.AppendMockPage(news_url, news_title);
+ top_sites().SetMockHistoryService(&hs);
+
+ // This writes the new data to the DB.
+ top_sites().StartQueryForMostVisited();
+ MessageLoop::current()->RunAllPending();
+
+ std::map<GURL, TopSites::Images> thumbnails;
+ MostVisitedURLList result;
+ db->GetPageThumbnails(&result, &thumbnails);
+ ASSERT_EQ(2u, result.size());
+ EXPECT_EQ(google_title, result[0].title);
+ EXPECT_EQ(news_title, result[1].title);
+}
+
+// Test TopSitesDatabaseImpl.
+TEST_F(TopSitesTest, TopSitesDB) {
+ TopSitesDatabaseImpl db;
+
+ ASSERT_TRUE(db.Init(file_name()));
+
+ MostVisitedURL url;
+ GURL asdf_url("http://asdf.com");
+ string16 asdf_title(ASCIIToUTF16("ASDF"));
+ GURL google_url("http://google.com");
+ string16 google_title(ASCIIToUTF16("Google"));
+ GURL news_url("http://news.google.com");
+ string16 news_title(ASCIIToUTF16("Google News"));
+
+ url.url = asdf_url;
+ url.title = asdf_title;
+ url.redirects.push_back(url.url);
+ TopSites::Images thumbnail;
+ thumbnail.thumbnail = random_thumbnail();
+ // Add asdf at rank 0.
+ db.SetPageThumbnail(url, 0, thumbnail);
+
+ TopSites::Images result;
+ EXPECT_TRUE(db.GetPageThumbnail(url.url, &result));
+ EXPECT_EQ(thumbnail.thumbnail->data.size(), result.thumbnail->data.size());
+ EXPECT_TRUE(ThumbnailsAreEqual(thumbnail.thumbnail, result.thumbnail));
+
+ MostVisitedURLList urls;
+ std::map<GURL, TopSites::Images> thumbnails;
+ db.GetPageThumbnails(&urls, &thumbnails);
+ ASSERT_EQ(1u, urls.size());
+ EXPECT_EQ(asdf_url, urls[0].url);
+ EXPECT_EQ(asdf_title, urls[0].title);
+
+ url.url = google_url;
+ url.title = google_title;
+
+ // Add google at rank 1 - no rank shifting.
+ db.SetPageThumbnail(url, 1, thumbnail);
+ db.GetPageThumbnails(&urls, &thumbnails);
+ ASSERT_EQ(2u, urls.size());
+ EXPECT_EQ(asdf_url, urls[0].url);
+ EXPECT_EQ(asdf_title, urls[0].title);
+ EXPECT_EQ(google_url, urls[1].url);
+ EXPECT_EQ(google_title, urls[1].title);
+
+ url.url = news_url;
+ url.title = news_title;
+
+ // Add news at rank 1 - shift google to rank 2.
+ db.SetPageThumbnail(url, 1, thumbnail);
+ db.GetPageThumbnails(&urls, &thumbnails);
+ ASSERT_EQ(3u, urls.size());
+ EXPECT_EQ(asdf_url, urls[0].url);
+ EXPECT_EQ(news_url, urls[1].url);
+ EXPECT_EQ(google_url, urls[2].url);
+
+ // Move news at rank 0 - shift the rest up.
+ db.SetPageThumbnail(url, 0, thumbnail);
+ db.GetPageThumbnails(&urls, &thumbnails);
+ ASSERT_EQ(3u, urls.size());
+ EXPECT_EQ(news_url, urls[0].url);
+ EXPECT_EQ(asdf_url, urls[1].url);
+ EXPECT_EQ(google_url, urls[2].url);
+
+ // Move news at rank 2 - shift the rest down.
+ db.SetPageThumbnail(url, 2, thumbnail);
+ db.GetPageThumbnails(&urls, &thumbnails);
+ ASSERT_EQ(3u, urls.size());
+ EXPECT_EQ(asdf_url, urls[0].url);
+ EXPECT_EQ(google_url, urls[1].url);
+ EXPECT_EQ(news_url, urls[2].url);
+
+ // Delete asdf.
+ url.url = asdf_url;
+ db.RemoveURL(url);
+
+ db.GetPageThumbnails(&urls, &thumbnails);
+ ASSERT_EQ(2u, urls.size());
+ EXPECT_EQ(google_url, urls[0].url);
+ EXPECT_EQ(news_url, urls[1].url);
+}
+
+// Test TopSites with a real database.
+TEST_F(TopSitesTest, RealDatabase) {
+ ChromeThread db_loop(ChromeThread::DB, MessageLoop::current());
+ TopSitesDatabaseImpl* db = new TopSitesDatabaseImpl;
+
+ ASSERT_TRUE(db->Init(file_name()));
+ // |db| is destroyed when the top_sites is destroyed in TearDown.
+ top_sites().db_.reset(db);
+ MostVisitedURL url;
+ GURL asdf_url("http://asdf.com");
+ string16 asdf_title(ASCIIToUTF16("ASDF"));
+ GURL google1_url("http://google.com");
+ GURL google2_url("http://google.com/redirect");
+ GURL google3_url("http://www.google.com");
+ string16 google_title(ASCIIToUTF16("Google"));
+ GURL news_url("http://news.google.com");
+ string16 news_title(ASCIIToUTF16("Google News"));
+
+ url.url = asdf_url;
+ url.title = asdf_title;
+ url.redirects.push_back(url.url);
+ TopSites::Images thumbnail;
+ thumbnail.thumbnail = random_thumbnail();
+ db->SetPageThumbnail(url, 0, thumbnail);
+
+ top_sites().ReadDatabase();
+
+ top_sites().GetMostVisitedURLs(
+ consumer(),
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+ ASSERT_EQ(1u, urls().size());
+ EXPECT_EQ(asdf_url, urls()[0].url);
+ EXPECT_EQ(asdf_title, urls()[0].title);
+
+ TopSites::Images img_result;
+ db->GetPageThumbnail(asdf_url, &img_result);
+ EXPECT_TRUE(img_result.thumbnail != NULL);
+ EXPECT_TRUE(ThumbnailsAreEqual(random_thumbnail(), img_result.thumbnail));
+
+ RefCountedBytes* thumbnail_result;
+ EXPECT_TRUE(top_sites().GetPageThumbnail(asdf_url, &thumbnail_result));
+ EXPECT_TRUE(thumbnail_result != NULL);
+ EXPECT_TRUE(ThumbnailsAreEqual(random_thumbnail(), thumbnail_result));
+
+ MostVisitedURL url2;
+ url2.url = google1_url;
+ url2.title = google_title;
+ url2.redirects.push_back(google1_url);
+ url2.redirects.push_back(google2_url);
+ url2.redirects.push_back(google3_url);
+
+ // Add new thumbnail at rank 0 and shift the other result to 1.
+ TopSites::Images g_thumbnail;
+ g_thumbnail.thumbnail = google_thumbnail();
+ db->SetPageThumbnail(url2, 0, g_thumbnail);
+
+ top_sites().ReadDatabase();
+
+ top_sites().GetMostVisitedURLs(
+ consumer(),
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+ ASSERT_EQ(2u, urls().size());
+ EXPECT_EQ(google1_url, urls()[0].url);
+ EXPECT_EQ(google_title, urls()[0].title);
+ EXPECT_TRUE(top_sites().GetPageThumbnail(google1_url, &thumbnail_result));
+ EXPECT_TRUE(ThumbnailsAreEqual(google_thumbnail(), thumbnail_result));
+ ASSERT_EQ(3u, urls()[0].redirects.size());
+ EXPECT_EQ(google1_url, urls()[0].redirects[0]);
+ EXPECT_EQ(google2_url, urls()[0].redirects[1]);
+ EXPECT_EQ(google3_url, urls()[0].redirects[2]);
+
+ EXPECT_EQ(asdf_url, urls()[1].url);
+ EXPECT_EQ(asdf_title, urls()[1].title);
+
+ MockHistoryServiceImpl hs;
+ // Add one old, one new URL to the history.
+ hs.AppendMockPage(google1_url, google_title);
+ hs.AppendMockPage(news_url, news_title);
+ top_sites().SetMockHistoryService(&hs);
+
+ // This requests data from History Service and writes it to the DB.
+ top_sites().StartQueryForMostVisited();
+ MessageLoop::current()->RunAllPending();
+
+ std::map<GURL, TopSites::Images> thumbnails;
+ MostVisitedURLList results;
+ db->GetPageThumbnails(&results, &thumbnails);
+ ASSERT_EQ(2u, results.size());
+ EXPECT_EQ(google_title, results[0].title);
+ EXPECT_EQ(news_title, results[1].title);
+
+ scoped_ptr<SkBitmap> weewar_bitmap(
+ gfx::JPEGCodec::Decode(weewar_thumbnail()->front(),
+ weewar_thumbnail()->size()));
+
+ base::Time now = base::Time::Now();
+ ThumbnailScore low_score(1.0, true, true, now);
+ ThumbnailScore medium_score(0.5, true, true, now);
+ ThumbnailScore high_score(0.0, true, true, now);
+
+ // 1. Set to weewar. (Writes the thumbnail to the DB.)
+ EXPECT_TRUE(top_sites().SetPageThumbnail(google1_url,
+ *weewar_bitmap,
+ medium_score));
+ RefCountedBytes* out_1;
+ TopSites::Images out_2;
+ EXPECT_TRUE(top_sites().GetPageThumbnail(google1_url, &out_1));
+
+ MessageLoop::current()->RunAllPending();
+
+ db->GetPageThumbnail(url2.url, &out_2);
+ EXPECT_TRUE(ThumbnailsAreEqual(out_1, out_2.thumbnail));
+
+ scoped_ptr<SkBitmap> google_bitmap(
+ gfx::JPEGCodec::Decode(google_thumbnail()->front(),
+ google_thumbnail()->size()));
+
+ // 2. Set to google - low score.
+ EXPECT_FALSE(top_sites().SetPageThumbnail(google1_url,
+ *google_bitmap,
+ low_score));
+
+ // 3. Set to google - high score.
+ EXPECT_TRUE(top_sites().SetPageThumbnail(google1_url,
+ *google_bitmap,
+ high_score));
+ // Check that the thumbnail was updated.
+ EXPECT_TRUE(top_sites().GetPageThumbnail(google1_url, &out_1));
+ EXPECT_FALSE(ThumbnailsAreEqual(out_1, out_2.thumbnail));
+ MessageLoop::current()->RunAllPending();
+
+ // Read the new thumbnail from the DB - should match what's in TopSites.
+ db->GetPageThumbnail(url2.url, &out_2);
+ EXPECT_TRUE(ThumbnailsAreEqual(out_1, out_2.thumbnail));
+ EXPECT_TRUE(high_score.Equals(out_2.thumbnail_score));
+}
+
+// This test has been crashing unit_tests on Mac 10.6.
+// See http://crbug.com/49799
+TEST_F(TopSitesTest, DISABLED_DeleteNotifications) {
+ ChromeThread db_loop(ChromeThread::DB, MessageLoop::current());
+ GURL google1_url("http://google.com");
+ GURL google2_url("http://google.com/redirect");
+ GURL google3_url("http://www.google.com");
+ string16 google_title(ASCIIToUTF16("Google"));
+ GURL news_url("http://news.google.com");
+ string16 news_title(ASCIIToUTF16("Google News"));
+
+ MockHistoryServiceImpl hs;
+
+ top_sites().Init(file_name());
+
+ hs.AppendMockPage(google1_url, google_title);
+ hs.AppendMockPage(news_url, news_title);
+ top_sites().SetMockHistoryService(&hs);
+
+ top_sites().StartQueryForMostVisited();
+ MessageLoop::current()->RunAllPending();
+
+ top_sites().GetMostVisitedURLs(
+ consumer(),
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+ ASSERT_EQ(2u, urls().size());
+
+ hs.RemoveMostVisitedURL();
+
+ history::URLsDeletedDetails details;
+ details.all_history = false;
+ top_sites().Observe(NotificationType::HISTORY_URLS_DELETED,
+ Source<Profile> (&profile()),
+ (const NotificationDetails&)details);
+ MessageLoop::current()->RunAllPending();
+
+ top_sites().GetMostVisitedURLs(
+ consumer(),
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+ ASSERT_EQ(1u, urls().size());
+ EXPECT_EQ(google_title, urls()[0].title);
+
+ hs.RemoveMostVisitedURL();
+ details.all_history = true;
+ top_sites().Observe(NotificationType::HISTORY_URLS_DELETED,
+ Source<Profile> (&profile()),
+ (const NotificationDetails&)details);
+ MessageLoop::current()->RunAllPending();
+ top_sites().GetMostVisitedURLs(
+ consumer(),
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+ ASSERT_EQ(0u, urls().size());
+}
+
+TEST_F(TopSitesTest, GetUpdateDelay) {
+ top_sites().last_num_urls_changed_ = 0;
+ EXPECT_EQ(30, top_sites().GetUpdateDelay().InSeconds());
+
+ top_sites().top_sites_.resize(20);
+ top_sites().last_num_urls_changed_ = 0;
+ EXPECT_EQ(60, top_sites().GetUpdateDelay().InMinutes());
+
+ top_sites().last_num_urls_changed_ = 3;
+ EXPECT_EQ(52, top_sites().GetUpdateDelay().InMinutes());
+
+ top_sites().last_num_urls_changed_ = 20;
+ EXPECT_EQ(1, top_sites().GetUpdateDelay().InMinutes());
+}
+
+TEST_F(TopSitesTest, Migration) {
+ ChromeThread db_loop(ChromeThread::DB, MessageLoop::current());
+ GURL google1_url("http://google.com");
+ GURL google2_url("http://google.com/redirect");
+ GURL google3_url("http://www.google.com");
+ string16 google_title(ASCIIToUTF16("Google"));
+ GURL news_url("http://news.google.com");
+ string16 news_title(ASCIIToUTF16("Google News"));
+
+ MockHistoryServiceImpl hs;
+
+ top_sites().Init(file_name());
+
+ hs.AppendMockPage(google1_url, google_title);
+ hs.AppendMockPage(news_url, news_title);
+ top_sites().SetMockHistoryService(&hs);
+
+ top_sites().StartMigration();
+ EXPECT_TRUE(top_sites().migration_in_progress_);
+ MessageLoop::current()->RunAllPending();
+ EXPECT_EQ(2, hs.GetNumberOfThumbnailRequests());
+ EXPECT_FALSE(top_sites().migration_in_progress_);
+}
+
+TEST_F(TopSitesTest, QueueingRequestsForTopSites) {
+ ChromeThread db_loop(ChromeThread::DB, MessageLoop::current());
+ CancelableRequestConsumer c1;
+ CancelableRequestConsumer c2;
+ CancelableRequestConsumer c3;
+ top_sites().GetMostVisitedURLs(
+ &c1,
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+
+ top_sites().GetMostVisitedURLs(
+ &c2,
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+
+ top_sites().GetMostVisitedURLs(
+ &c3,
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+
+ EXPECT_EQ(0u, number_of_callbacks());
+ EXPECT_EQ(0u, urls().size());
+
+ MostVisitedURLList pages;
+ MostVisitedURL url;
+ url.url = GURL("http://1.com/");
+ url.redirects.push_back(url.url);
+ pages.push_back(url);
+ url.url = GURL("http://2.com/");
+ url.redirects.push_back(url.url);
+ pages.push_back(url);
+ top_sites().OnTopSitesAvailable(0, pages);
+ MessageLoop::current()->RunAllPending();
+
+ EXPECT_EQ(3u, number_of_callbacks());
+
+ ASSERT_EQ(2u, urls().size());
+ EXPECT_EQ("http://1.com/", urls()[0].url.spec());
+ EXPECT_EQ("http://2.com/", urls()[1].url.spec());
+
+ url.url = GURL("http://3.com/");
+ url.redirects.push_back(url.url);
+ pages.push_back(url);
+ top_sites().OnTopSitesAvailable(0, pages);
+ MessageLoop::current()->RunAllPending();
+
+ top_sites().GetMostVisitedURLs(
+ &c3,
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+
+ EXPECT_EQ(4u, number_of_callbacks());
+
+ ASSERT_EQ(3u, urls().size());
+ EXPECT_EQ("http://1.com/", urls()[0].url.spec());
+ EXPECT_EQ("http://2.com/", urls()[1].url.spec());
+ EXPECT_EQ("http://3.com/", urls()[2].url.spec());
+}
+
+TEST_F(TopSitesTest, CancelingRequestsForTopSites) {
+ CancelableRequestConsumer c1;
+ CancelableRequestConsumer c2;
+ top_sites().GetMostVisitedURLs(
+ &c1,
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+
+ top_sites().GetMostVisitedURLs(
+ &c2,
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+
+ {
+ CancelableRequestConsumer c3;
+ top_sites().GetMostVisitedURLs(
+ &c3,
+ NewCallback(static_cast<TopSitesTest*>(this),
+ &TopSitesTest::OnTopSitesAvailable));
+ // c3 is out of scope, and the request should be cancelled.
+ }
+
+ // No requests until OnTopSitesAvailable is called.
+ EXPECT_EQ(0u, number_of_callbacks());
+ EXPECT_EQ(0u, urls().size());
+
+ MostVisitedURLList pages;
+ MostVisitedURL url;
+ url.url = GURL("http://1.com/");
+ url.redirects.push_back(url.url);
+ pages.push_back(url);
+ url.url = GURL("http://2.com/");
+ pages.push_back(url);
+
+ top_sites().OnTopSitesAvailable(0, pages);
+
+ // 1 request was canceled.
+ EXPECT_EQ(2u, number_of_callbacks());
+
+ ASSERT_EQ(2u, urls().size());
+ EXPECT_EQ("http://1.com/", urls()[0].url.spec());
+ EXPECT_EQ("http://2.com/", urls()[1].url.spec());
+}
+
+TEST_F(TopSitesTest, AddTemporaryThumbnail) {
+ ChromeThread db_loop(ChromeThread::DB, MessageLoop::current());
+ GURL unknown_url("http://news.google.com/");
+ GURL invalid_url("chrome://thumb/http://google.com/");
+ GURL url1a("http://google.com/");
+ GURL url1b("http://www.google.com/");
+
+ // Create a dummy thumbnail.
+ SkBitmap thumbnail;
+ thumbnail.setConfig(SkBitmap::kARGB_8888_Config, 4, 4);
+ thumbnail.allocPixels();
+ thumbnail.eraseRGB(0x00, 0x00, 0x00);
+
+ ThumbnailScore medium_score(0.5, true, true, base::Time::Now());
+
+ // Don't store thumbnails for Javascript URLs.
+ EXPECT_FALSE(top_sites().SetPageThumbnail(invalid_url,
+ thumbnail, medium_score));
+ // Store thumbnails for unknown (but valid) URLs temporarily - calls
+ // AddTemporaryThumbnail.
+ EXPECT_TRUE(top_sites().SetPageThumbnail(unknown_url,
+ thumbnail, medium_score));
+
+ std::vector<MostVisitedURL> list;
+
+ MostVisitedURL mv;
+ mv.url = unknown_url;
+ mv.redirects.push_back(mv.url);
+ mv.redirects.push_back(url1a);
+ mv.redirects.push_back(url1b);
+ list.push_back(mv);
+
+ // Update URLs - use temporary thumbnails.
+ top_sites().UpdateMostVisited(list);
+
+ RefCountedBytes* out = NULL;
+ ASSERT_TRUE(top_sites().GetPageThumbnail(unknown_url, &out));
+ scoped_ptr<SkBitmap> out_bitmap(gfx::JPEGCodec::Decode(out->front(),
+ out->size()));
+ EXPECT_EQ(0, memcmp(thumbnail.getPixels(), out_bitmap->getPixels(),
+ thumbnail.getSize()));
+}
+
+} // namespace history
diff --git a/chrome/browser/history/url_database.cc b/chrome/browser/history/url_database.cc
new file mode 100644
index 0000000..07f8881
--- /dev/null
+++ b/chrome/browser/history/url_database.cc
@@ -0,0 +1,498 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/url_database.h"
+
+#include <algorithm>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "app/l10n_util.h"
+#include "app/sql/connection.h"
+#include "app/sql/statement.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/common/url_constants.h"
+#include "googleurl/src/gurl.h"
+
+namespace history {
+
+const char URLDatabase::kURLRowFields[] = HISTORY_URL_ROW_FIELDS;
+const int URLDatabase::kNumURLRowFields = 9;
+
+bool URLDatabase::URLEnumerator::GetNextURL(URLRow* r) {
+ if (statement_.Step()) {
+ FillURLRow(statement_, r);
+ return true;
+ }
+ return false;
+}
+
+URLDatabase::URLDatabase() : has_keyword_search_terms_(false) {
+}
+
+URLDatabase::~URLDatabase() {
+}
+
+// static
+std::string URLDatabase::GURLToDatabaseURL(const GURL& gurl) {
+ // TODO(brettw): do something fancy here with encoding, etc.
+
+ // Strip username and password from URL before sending to DB.
+ GURL::Replacements replacements;
+ replacements.ClearUsername();
+ replacements.ClearPassword();
+
+ return (gurl.ReplaceComponents(replacements)).spec();
+}
+
+// Convenience to fill a history::URLRow. Must be in sync with the fields in
+// kURLRowFields.
+void URLDatabase::FillURLRow(sql::Statement& s, history::URLRow* i) {
+ DCHECK(i);
+ i->id_ = s.ColumnInt64(0);
+ i->url_ = GURL(s.ColumnString(1));
+ i->title_ = s.ColumnString16(2);
+ i->visit_count_ = s.ColumnInt(3);
+ i->typed_count_ = s.ColumnInt(4);
+ i->last_visit_ = base::Time::FromInternalValue(s.ColumnInt64(5));
+ i->hidden_ = s.ColumnInt(6) != 0;
+ i->favicon_id_ = s.ColumnInt64(7);
+}
+
+bool URLDatabase::GetURLRow(URLID url_id, URLRow* info) {
+ // TODO(brettw) We need check for empty URLs to handle the case where
+ // there are old URLs in the database that are empty that got in before
+ // we added any checks. We should eventually be able to remove it
+ // when all inputs are using GURL (which prohibit empty input).
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE id=?"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, url_id);
+ if (statement.Step()) {
+ FillURLRow(statement, info);
+ return true;
+ }
+ return false;
+}
+
+bool URLDatabase::GetAllTypedUrls(std::vector<history::URLRow>* urls) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE typed_count > 0"));
+ if (!statement)
+ return false;
+
+ while (statement.Step()) {
+ URLRow info;
+ FillURLRow(statement, &info);
+ urls->push_back(info);
+ }
+ return true;
+}
+
+URLID URLDatabase::GetRowForURL(const GURL& url, history::URLRow* info) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE url=?"));
+ if (!statement)
+ return 0;
+
+ std::string url_string = GURLToDatabaseURL(url);
+ statement.BindString(0, url_string);
+ if (!statement.Step())
+ return 0; // no data
+
+ if (info)
+ FillURLRow(statement, info);
+ return statement.ColumnInt64(0);
+}
+
+bool URLDatabase::UpdateURLRow(URLID url_id,
+ const history::URLRow& info) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE urls SET title=?,visit_count=?,typed_count=?,last_visit_time=?,"
+ "hidden=?,favicon_id=?"
+ "WHERE id=?"));
+ if (!statement)
+ return false;
+
+ statement.BindString16(0, info.title());
+ statement.BindInt(1, info.visit_count());
+ statement.BindInt(2, info.typed_count());
+ statement.BindInt64(3, info.last_visit().ToInternalValue());
+ statement.BindInt(4, info.hidden() ? 1 : 0);
+ statement.BindInt64(5, info.favicon_id());
+ statement.BindInt64(6, url_id);
+ return statement.Run();
+}
+
+URLID URLDatabase::AddURLInternal(const history::URLRow& info,
+ bool is_temporary) {
+ // This function is used to insert into two different tables, so we have to
+ // do some shuffling. Unfortinately, we can't use the macro
+ // HISTORY_URL_ROW_FIELDS because that specifies the table name which is
+ // invalid in the insert syntax.
+ #define ADDURL_COMMON_SUFFIX \
+ " (url, title, visit_count, typed_count, "\
+ "last_visit_time, hidden, favicon_id) "\
+ "VALUES (?,?,?,?,?,?,?)"
+ const char* statement_name;
+ const char* statement_sql;
+ if (is_temporary) {
+ statement_name = "AddURLTemporary";
+ statement_sql = "INSERT INTO temp_urls" ADDURL_COMMON_SUFFIX;
+ } else {
+ statement_name = "AddURL";
+ statement_sql = "INSERT INTO urls" ADDURL_COMMON_SUFFIX;
+ }
+ #undef ADDURL_COMMON_SUFFIX
+
+ sql::Statement statement(GetDB().GetCachedStatement(
+ sql::StatementID(statement_name), statement_sql));
+ if (!statement) {
+ NOTREACHED() << GetDB().GetErrorMessage();
+ return 0;
+ }
+
+ statement.BindString(0, GURLToDatabaseURL(info.url()));
+ statement.BindString16(1, info.title());
+ statement.BindInt(2, info.visit_count());
+ statement.BindInt(3, info.typed_count());
+ statement.BindInt64(4, info.last_visit().ToInternalValue());
+ statement.BindInt(5, info.hidden() ? 1 : 0);
+ statement.BindInt64(6, info.favicon_id());
+
+ if (!statement.Run())
+ return 0;
+ return GetDB().GetLastInsertRowId();
+}
+
+bool URLDatabase::DeleteURLRow(URLID id) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM urls WHERE id = ?"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, id);
+ if (!statement.Run())
+ return false;
+
+ // And delete any keyword visits.
+ if (!has_keyword_search_terms_)
+ return true;
+
+ sql::Statement del_keyword_visit(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM keyword_search_terms WHERE url_id=?"));
+ if (!del_keyword_visit)
+ return false;
+ del_keyword_visit.BindInt64(0, id);
+ return del_keyword_visit.Run();
+}
+
+bool URLDatabase::CreateTemporaryURLTable() {
+ return CreateURLTable(true);
+}
+
+bool URLDatabase::CommitTemporaryURLTable() {
+ // See the comments in the header file as well as
+ // HistoryBackend::DeleteAllHistory() for more information on how this works
+ // and why it does what it does.
+ //
+ // Note that the main database overrides this to additionally create the
+ // supplimentary indices that the archived database doesn't need.
+
+ // Swap the url table out and replace it with the temporary one.
+ if (!GetDB().Execute("DROP TABLE urls")) {
+ NOTREACHED() << GetDB().GetErrorMessage();
+ return false;
+ }
+ if (!GetDB().Execute("ALTER TABLE temp_urls RENAME TO urls")) {
+ NOTREACHED() << GetDB().GetErrorMessage();
+ return false;
+ }
+
+ // Create the index over URLs. This is needed for the main, in-memory, and
+ // archived databases, so we always do it. The supplimentary indices used by
+ // the main database are not created here. When deleting all history, they
+ // are created by HistoryDatabase::RecreateAllButStarAndURLTables().
+ CreateMainURLIndex();
+
+ return true;
+}
+
+bool URLDatabase::InitURLEnumeratorForEverything(URLEnumerator* enumerator) {
+ DCHECK(!enumerator->initialized_);
+ std::string sql("SELECT ");
+ sql.append(kURLRowFields);
+ sql.append(" FROM urls");
+ enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str()));
+ if (!enumerator->statement_) {
+ NOTREACHED() << GetDB().GetErrorMessage();
+ return false;
+ }
+ enumerator->initialized_ = true;
+ return true;
+}
+
+bool URLDatabase::IsFavIconUsed(FavIconID favicon_id) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT id FROM urls WHERE favicon_id=? LIMIT 1"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, favicon_id);
+ return statement.Step();
+}
+
+void URLDatabase::AutocompleteForPrefix(const string16& prefix,
+ size_t max_results,
+ std::vector<history::URLRow>* results) {
+ // NOTE: this query originally sorted by starred as the second parameter. But
+ // as bookmarks is no longer part of the db we no longer include the order
+ // by clause.
+ results->clear();
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls "
+ "WHERE url >= ? AND url < ? AND hidden = 0 "
+ "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC "
+ "LIMIT ?"));
+ if (!statement)
+ return;
+
+ // We will find all strings between "prefix" and this string, which is prefix
+ // followed by the maximum character size. Use 8-bit strings for everything
+ // so we can be sure sqlite is comparing everything in 8-bit mode. Otherwise,
+ // it will have to convert strings either to UTF-8 or UTF-16 for comparison.
+ std::string prefix_utf8(UTF16ToUTF8(prefix));
+ std::string end_query(prefix_utf8);
+ end_query.push_back(std::numeric_limits<unsigned char>::max());
+
+ statement.BindString(0, prefix_utf8);
+ statement.BindString(1, end_query);
+ statement.BindInt(2, static_cast<int>(max_results));
+
+ while (statement.Step()) {
+ history::URLRow info;
+ FillURLRow(statement, &info);
+ if (info.url().is_valid())
+ results->push_back(info);
+ }
+}
+
+bool URLDatabase::FindShortestURLFromBase(const std::string& base,
+ const std::string& url,
+ int min_visits,
+ int min_typed,
+ bool allow_base,
+ history::URLRow* info) {
+ // Select URLs that start with |base| and are prefixes of |url|. All parts
+ // of this query except the substr() call can be done using the index. We
+ // could do this query with a couple of LIKE or GLOB statements as well, but
+ // those wouldn't use the index, and would run into problems with "wildcard"
+ // characters that appear in URLs (% for LIKE, or *, ? for GLOB).
+ std::string sql("SELECT ");
+ sql.append(kURLRowFields);
+ sql.append(" FROM urls WHERE url ");
+ sql.append(allow_base ? ">=" : ">");
+ sql.append(" ? AND url < :end AND url = substr(:end, 1, length(url)) "
+ "AND hidden = 0 AND visit_count >= ? AND typed_count >= ? "
+ "ORDER BY url LIMIT 1");
+ sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str()));
+ if (!statement) {
+ NOTREACHED() << GetDB().GetErrorMessage();
+ return false;
+ }
+
+ statement.BindString(0, base);
+ statement.BindString(1, url); // :end
+ statement.BindInt(2, min_visits);
+ statement.BindInt(3, min_typed);
+
+ if (!statement.Step())
+ return false;
+
+ DCHECK(info);
+ FillURLRow(statement, info);
+ return true;
+}
+
+bool URLDatabase::InitKeywordSearchTermsTable() {
+ has_keyword_search_terms_ = true;
+ if (!GetDB().DoesTableExist("keyword_search_terms")) {
+ if (!GetDB().Execute("CREATE TABLE keyword_search_terms ("
+ "keyword_id INTEGER NOT NULL," // ID of the TemplateURL.
+ "url_id INTEGER NOT NULL," // ID of the url.
+ "lower_term LONGVARCHAR NOT NULL," // The search term, in lower case.
+ "term LONGVARCHAR NOT NULL)")) // The actual search term.
+ return false;
+ }
+
+ // For searching.
+ GetDB().Execute("CREATE INDEX keyword_search_terms_index1 ON "
+ "keyword_search_terms (keyword_id, lower_term)");
+
+ // For deletion.
+ GetDB().Execute("CREATE INDEX keyword_search_terms_index2 ON "
+ "keyword_search_terms (url_id)");
+
+ return true;
+}
+
+bool URLDatabase::DropKeywordSearchTermsTable() {
+ // This will implicitly delete the indices over the table.
+ return GetDB().Execute("DROP TABLE keyword_search_terms");
+}
+
+bool URLDatabase::SetKeywordSearchTermsForURL(URLID url_id,
+ TemplateURL::IDType keyword_id,
+ const string16& term) {
+ DCHECK(url_id && keyword_id && !term.empty());
+
+ sql::Statement exist_statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT term FROM keyword_search_terms "
+ "WHERE keyword_id = ? AND url_id = ?"));
+ if (!exist_statement)
+ return false;
+ exist_statement.BindInt64(0, keyword_id);
+ exist_statement.BindInt64(1, url_id);
+ if (exist_statement.Step())
+ return true; // Term already exists, no need to add it.
+
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "INSERT INTO keyword_search_terms (keyword_id, url_id, lower_term, term) "
+ "VALUES (?,?,?,?)"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, keyword_id);
+ statement.BindInt64(1, url_id);
+ statement.BindString16(2, l10n_util::ToLower(term));
+ statement.BindString16(3, term);
+ return statement.Run();
+}
+
+void URLDatabase::DeleteAllSearchTermsForKeyword(
+ TemplateURL::IDType keyword_id) {
+ DCHECK(keyword_id);
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM keyword_search_terms WHERE keyword_id=?"));
+ if (!statement)
+ return;
+
+ statement.BindInt64(0, keyword_id);
+ statement.Run();
+}
+
+void URLDatabase::GetMostRecentKeywordSearchTerms(
+ TemplateURL::IDType keyword_id,
+ const string16& prefix,
+ int max_count,
+ std::vector<KeywordSearchTermVisit>* matches) {
+ // NOTE: the keyword_id can be zero if on first run the user does a query
+ // before the TemplateURLModel has finished loading. As the chances of this
+ // occurring are small, we ignore it.
+ if (!keyword_id)
+ return;
+
+ DCHECK(!prefix.empty());
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT DISTINCT kv.term, u.last_visit_time "
+ "FROM keyword_search_terms kv "
+ "JOIN urls u ON kv.url_id = u.id "
+ "WHERE kv.keyword_id = ? AND kv.lower_term >= ? AND kv.lower_term < ? "
+ "ORDER BY u.last_visit_time DESC LIMIT ?"));
+ if (!statement)
+ return;
+
+ // NOTE: Keep this ToLower() call in sync with search_provider.cc.
+ string16 lower_prefix = l10n_util::ToLower(prefix);
+ // This magic gives us a prefix search.
+ string16 next_prefix = lower_prefix;
+ next_prefix[next_prefix.size() - 1] =
+ next_prefix[next_prefix.size() - 1] + 1;
+ statement.BindInt64(0, keyword_id);
+ statement.BindString16(1, lower_prefix);
+ statement.BindString16(2, next_prefix);
+ statement.BindInt(3, max_count);
+
+ KeywordSearchTermVisit visit;
+ while (statement.Step()) {
+ visit.term = statement.ColumnString16(0);
+ visit.time = base::Time::FromInternalValue(statement.ColumnInt64(1));
+ matches->push_back(visit);
+ }
+}
+
+bool URLDatabase::MigrateFromVersion11ToVersion12() {
+ URLRow about_row;
+ if (GetRowForURL(GURL(chrome::kAboutBlankURL), &about_row)) {
+ about_row.set_favicon_id(0);
+ return UpdateURLRow(about_row.id(), about_row);
+ }
+ return true;
+}
+
+bool URLDatabase::DropStarredIDFromURLs() {
+ if (!GetDB().DoesColumnExist("urls", "starred_id"))
+ return true; // urls is already updated, no need to continue.
+
+ // Create a temporary table to contain the new URLs table.
+ if (!CreateTemporaryURLTable()) {
+ NOTREACHED();
+ return false;
+ }
+
+ // Copy the contents.
+ if (!GetDB().Execute(
+ "INSERT INTO temp_urls (id, url, title, visit_count, typed_count, "
+ "last_visit_time, hidden, favicon_id) "
+ "SELECT id, url, title, visit_count, typed_count, last_visit_time, "
+ "hidden, favicon_id FROM urls")) {
+ NOTREACHED() << GetDB().GetErrorMessage();
+ return false;
+ }
+
+ // Rename/commit the tmp table.
+ CommitTemporaryURLTable();
+
+ // This isn't created by CommitTemporaryURLTable.
+ CreateSupplimentaryURLIndices();
+
+ return true;
+}
+
+bool URLDatabase::CreateURLTable(bool is_temporary) {
+ const char* name = is_temporary ? "temp_urls" : "urls";
+ if (GetDB().DoesTableExist(name))
+ return true;
+
+ std::string sql;
+ sql.append("CREATE TABLE ");
+ sql.append(name);
+ sql.append("("
+ "id INTEGER PRIMARY KEY,"
+ "url LONGVARCHAR,"
+ "title LONGVARCHAR,"
+ "visit_count INTEGER DEFAULT 0 NOT NULL,"
+ "typed_count INTEGER DEFAULT 0 NOT NULL,"
+ "last_visit_time INTEGER NOT NULL,"
+ "hidden INTEGER DEFAULT 0 NOT NULL,"
+ "favicon_id INTEGER DEFAULT 0 NOT NULL)");
+
+ return GetDB().Execute(sql.c_str());
+}
+
+void URLDatabase::CreateMainURLIndex() {
+ // Index over URLs so we can quickly look up based on URL. Ignore errors as
+ // this likely already exists (and the same below).
+ GetDB().Execute("CREATE INDEX urls_url_index ON urls (url)");
+}
+
+void URLDatabase::CreateSupplimentaryURLIndices() {
+ // Add a favicon index. This is useful when we delete urls.
+ GetDB().Execute("CREATE INDEX urls_favicon_id_INDEX ON urls (favicon_id)");
+}
+
+} // namespace history
diff --git a/chrome/browser/history/url_database.h b/chrome/browser/history/url_database.h
new file mode 100644
index 0000000..84c8dde
--- /dev/null
+++ b/chrome/browser/history/url_database.h
@@ -0,0 +1,258 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_URL_DATABASE_H_
+#define CHROME_BROWSER_HISTORY_URL_DATABASE_H_
+
+#include "app/sql/statement.h"
+#include "base/basictypes.h"
+#include "chrome/browser/history/history_types.h"
+#include "chrome/browser/search_engines/template_url.h"
+
+class GURL;
+
+namespace sql {
+class Connection;
+}
+
+namespace history {
+
+class VisitDatabase; // For friend statement.
+
+// Encapsulates an SQL database that holds URL info. This is a subset of the
+// full history data. We split this class' functionality out from the larger
+// HistoryDatabase class to support maintaining separate databases of URLs with
+// different capabilities (for example, in-memory, or archived).
+//
+// This is refcounted to support calling InvokeLater() with some of its methods
+// (necessary to maintain ordering of DB operations).
+class URLDatabase {
+ public:
+ // Must call CreateURLTable() and CreateURLIndexes() before using to make
+ // sure the database is initialized.
+ URLDatabase();
+
+ // This object must be destroyed on the thread where all accesses are
+ // happening to avoid thread-safety problems.
+ virtual ~URLDatabase();
+
+ // Converts a GURL to a string used in the history database. We plan to
+ // do more complex operations than just getting the spec out involving
+ // punycode, so this function should be used instead of url.spec() when
+ // interacting with the database.
+ //
+ // TODO(brettw) this should be moved out of the public section and the
+ // entire public HistoryDatabase interface should use GURL. This should
+ // also probably return a string instead since that is what the DB uses
+ // internally and we can avoid the extra conversion.
+ static std::string GURLToDatabaseURL(const GURL& url);
+
+ // URL table functions -------------------------------------------------------
+
+ // Looks up a url given an id. Fills info with the data. Returns true on
+ // success and false otherwise.
+ bool GetURLRow(URLID url_id, URLRow* info);
+
+ // Looks up all urls that were typed in manually. Fills info with the data.
+ // Returns true on success and false otherwise.
+ bool GetAllTypedUrls(std::vector<history::URLRow>* urls);
+
+ // Looks up the given URL and if it exists, fills the given pointers with the
+ // associated info and returns the ID of that URL. If the info pointer is
+ // NULL, no information about the URL will be filled in, only the ID will be
+ // returned. Returns 0 if the URL was not found.
+ URLID GetRowForURL(const GURL& url, URLRow* info);
+
+ // Given an already-existing row in the URL table, updates that URL's stats.
+ // This can not change the URL. Returns true on success.
+ //
+ // This will NOT update the title used for full text indexing. If you are
+ // setting the title, call SetPageIndexedData with the new title.
+ bool UpdateURLRow(URLID url_id, const URLRow& info);
+
+ // Adds a line to the URL database with the given information and returns the
+ // row ID. A row with the given URL must not exist. Returns 0 on error.
+ //
+ // This does NOT add a row to the full text search database. Use
+ // HistoryDatabase::SetPageIndexedData to do this.
+ URLID AddURL(const URLRow& info) {
+ return AddURLInternal(info, false);
+ }
+
+ // Delete the row of the corresponding URL. Only the row in the URL table
+ // will be deleted, not any other data that may refer to it. Returns true if
+ // the row existed and was deleted.
+ bool DeleteURLRow(URLID id);
+
+ // URL mass-deleting ---------------------------------------------------------
+
+ // Begins the mass-deleting operation by creating a temporary URL table.
+ // The caller than adds the URLs it wants to preseve to the temporary table,
+ // and then deletes everything else by calling CommitTemporaryURLTable().
+ // Returns true on success.
+ bool CreateTemporaryURLTable();
+
+ // Adds a row to the temporary URL table. This must be called between
+ // CreateTemporaryURLTable() and CommitTemporaryURLTable() (see those for more
+ // info). The ID of the URL will change in the temporary table, so the new ID
+ // is returned. Returns 0 on failure.
+ URLID AddTemporaryURL(const URLRow& row) {
+ return AddURLInternal(row, true);
+ }
+
+ // Ends the mass-deleting by replacing the original URL table with the
+ // temporary one created in CreateTemporaryURLTable. Returns true on success.
+ //
+ // This function does not create the supplimentary indices. It is virtual so
+ // that the main history database can provide this additional behavior.
+ virtual bool CommitTemporaryURLTable();
+
+ // Enumeration ---------------------------------------------------------------
+
+ // A basic enumerator to enumerate urls
+ class URLEnumerator {
+ public:
+ URLEnumerator() : initialized_(false) {
+ }
+
+ // Retreives the next url. Returns false if no more urls are available
+ bool GetNextURL(history::URLRow* r);
+
+ private:
+ friend class URLDatabase;
+
+ bool initialized_;
+ sql::Statement statement_;
+ };
+
+ // Initializes the given enumerator to enumerator all URLs in the database
+ bool InitURLEnumeratorForEverything(URLEnumerator* enumerator);
+
+ // Favicons ------------------------------------------------------------------
+
+ // Check whether a favicon is used by any URLs in the database.
+ bool IsFavIconUsed(FavIconID favicon_id);
+
+ // Autocomplete --------------------------------------------------------------
+
+ // Fills the given array with URLs matching the given prefix. They will be
+ // sorted by typed count, then by visit count, then by visit date (most
+ // recent first) up to the given maximum number. Called by HistoryURLProvider.
+ void AutocompleteForPrefix(const string16& prefix,
+ size_t max_results,
+ std::vector<URLRow>* results);
+
+ // Tries to find the shortest URL beginning with |base| that strictly
+ // prefixes |url|, and has minimum visit_ and typed_counts as specified.
+ // If found, fills in |info| and returns true; otherwise returns false,
+ // leaving |info| unchanged.
+ // We allow matches of exactly |base| iff |allow_base| is true.
+ bool FindShortestURLFromBase(const std::string& base,
+ const std::string& url,
+ int min_visits,
+ int min_typed,
+ bool allow_base,
+ history::URLRow* info);
+
+ // Keyword Search Terms ------------------------------------------------------
+
+ // Sets the search terms for the specified url/keyword pair.
+ bool SetKeywordSearchTermsForURL(URLID url_id,
+ TemplateURL::IDType keyword_id,
+ const string16& term);
+
+ // Deletes all search terms for the specified keyword that have been added by
+ // way of SetKeywordSearchTermsForURL.
+ void DeleteAllSearchTermsForKeyword(TemplateURL::IDType keyword_id);
+
+ // Returns up to max_count of the most recent search terms for the specified
+ // keyword.
+ void GetMostRecentKeywordSearchTerms(
+ TemplateURL::IDType keyword_id,
+ const string16& prefix,
+ int max_count,
+ std::vector<KeywordSearchTermVisit>* matches);
+
+ // Migration -----------------------------------------------------------------
+
+ // Do to a bug we were setting the favicon of about:blank. This forces
+ // about:blank to have no icon or title. Returns true on success, false if
+ // the favicon couldn't be updated.
+ bool MigrateFromVersion11ToVersion12();
+
+ protected:
+ friend class VisitDatabase;
+
+ // See HISTORY_URL_ROW_FIELDS below.
+ static const char kURLRowFields[];
+
+ // The number of fiends in kURLRowFields. If callers need additional
+ // fields, they can add their 0-based index to this value to get the index of
+ // fields following kURLRowFields.
+ static const int kNumURLRowFields;
+
+ // Drops the starred_id column from urls, returning true on success. This does
+ // nothing (and returns true) if the urls doesn't contain the starred_id
+ // column.
+ bool DropStarredIDFromURLs();
+
+ // Initialization functions. The indexing functions are separate from the
+ // table creation functions so the in-memory database and the temporary tables
+ // used when clearing history can populate the table and then create the
+ // index, which is faster than the reverse.
+ //
+ // is_temporary is false when generating the "regular" URLs table. The expirer
+ // sets this to true to generate the temporary table, which will have a
+ // different name but the same schema.
+ bool CreateURLTable(bool is_temporary);
+ // We have two tiers of indices for the URL table. The main tier is used by
+ // all URL databases, and is an index over the URL itself. The main history
+ // DB also creates indices over the favicons and bookmark IDs. The archived
+ // and in-memory databases don't need these supplimentary indices so we can
+ // save space by not creating them.
+ void CreateMainURLIndex();
+ void CreateSupplimentaryURLIndices();
+
+ // Ensures the keyword search terms table exists.
+ bool InitKeywordSearchTermsTable();
+
+ // Deletes the keyword search terms table.
+ bool DropKeywordSearchTermsTable();
+
+ // Inserts the given URL row into the URLs table, using the regular table
+ // if is_temporary is false, or the temporary URL table if is temporary is
+ // true. The temporary table may only be used in between
+ // CreateTemporaryURLTable() and CommitTemporaryURLTable().
+ URLID AddURLInternal(const URLRow& info, bool is_temporary);
+
+ // Convenience to fill a history::URLRow. Must be in sync with the fields in
+ // kHistoryURLRowFields.
+ static void FillURLRow(sql::Statement& s, URLRow* i);
+
+ // Returns the database for the functions in this interface. The decendent of
+ // this class implements these functions to return its objects.
+ virtual sql::Connection& GetDB() = 0;
+
+ private:
+ // True if InitKeywordSearchTermsTable() has been invoked. Not all subclasses
+ // have keyword search terms.
+ bool has_keyword_search_terms_;
+
+ DISALLOW_COPY_AND_ASSIGN(URLDatabase);
+};
+
+// The fields and order expected by FillURLRow(). ID is guaranteed to be first
+// so that DISTINCT can be prepended to get distinct URLs.
+//
+// This is available BOTH as a macro and a static string (kURLRowFields). Use
+// the macro if you want to put this in the middle of an otherwise constant
+// string, it will save time doing string appends. If you have to build a SQL
+// string dynamically anyway, use the constant, it will save space.
+#define HISTORY_URL_ROW_FIELDS \
+ " urls.id, urls.url, urls.title, urls.visit_count, urls.typed_count, " \
+ "urls.last_visit_time, urls.hidden, urls.favicon_id "
+
+} // history
+
+#endif // CHROME_BROWSER_HISTORY_URL_DATABASE_H_
diff --git a/chrome/browser/history/url_database_unittest.cc b/chrome/browser/history/url_database_unittest.cc
new file mode 100644
index 0000000..32ded0c
--- /dev/null
+++ b/chrome/browser/history/url_database_unittest.cc
@@ -0,0 +1,179 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "app/sql/connection.h"
+#include "base/file_path.h"
+#include "base/file_util.h"
+#include "base/path_service.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/history/url_database.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using base::Time;
+using base::TimeDelta;
+
+namespace history {
+
+namespace {
+
+bool IsURLRowEqual(const URLRow& a,
+ const URLRow& b) {
+ // TODO(brettw) when the database stores an actual Time value rather than
+ // a time_t, do a reaul comparison. Instead, we have to do a more rough
+ // comparison since the conversion reduces the precision.
+ return a.title() == b.title() &&
+ a.visit_count() == b.visit_count() &&
+ a.typed_count() == b.typed_count() &&
+ a.last_visit() - b.last_visit() <= TimeDelta::FromSeconds(1) &&
+ a.hidden() == b.hidden();
+}
+
+} // namespace
+
+class URLDatabaseTest : public testing::Test,
+ public URLDatabase {
+ public:
+ URLDatabaseTest() {
+ }
+
+ private:
+ // Test setup.
+ void SetUp() {
+ FilePath temp_dir;
+ PathService::Get(base::DIR_TEMP, &temp_dir);
+ db_file_ = temp_dir.AppendASCII("URLTest.db");
+
+ EXPECT_TRUE(db_.Open(db_file_));
+
+ // Initialize the tables for this test.
+ CreateURLTable(false);
+ CreateMainURLIndex();
+ CreateSupplimentaryURLIndices();
+ InitKeywordSearchTermsTable();
+ }
+ void TearDown() {
+ db_.Close();
+ file_util::Delete(db_file_, false);
+ }
+
+ // Provided for URL/VisitDatabase.
+ virtual sql::Connection& GetDB() {
+ return db_;
+ }
+
+ FilePath db_file_;
+ sql::Connection db_;
+};
+
+// Test add and query for the URL table in the HistoryDatabase
+TEST_F(URLDatabaseTest, AddURL) {
+ // first, add two URLs
+ const GURL url1("http://www.google.com/");
+ URLRow url_info1(url1);
+ url_info1.set_title(UTF8ToUTF16("Google"));
+ url_info1.set_visit_count(4);
+ url_info1.set_typed_count(2);
+ url_info1.set_last_visit(Time::Now() - TimeDelta::FromDays(1));
+ url_info1.set_hidden(false);
+ EXPECT_TRUE(AddURL(url_info1));
+
+ const GURL url2("http://mail.google.com/");
+ URLRow url_info2(url2);
+ url_info2.set_title(UTF8ToUTF16("Google Mail"));
+ url_info2.set_visit_count(3);
+ url_info2.set_typed_count(0);
+ url_info2.set_last_visit(Time::Now() - TimeDelta::FromDays(2));
+ url_info2.set_hidden(true);
+ EXPECT_TRUE(AddURL(url_info2));
+
+ // query both of them
+ URLRow info;
+ EXPECT_TRUE(GetRowForURL(url1, &info));
+ EXPECT_TRUE(IsURLRowEqual(url_info1, info));
+ URLID id2 = GetRowForURL(url2, &info);
+ EXPECT_TRUE(id2);
+ EXPECT_TRUE(IsURLRowEqual(url_info2, info));
+
+ // update the second
+ url_info2.set_title(UTF8ToUTF16("Google Mail Too"));
+ url_info2.set_visit_count(4);
+ url_info2.set_typed_count(1);
+ url_info2.set_typed_count(91011);
+ url_info2.set_hidden(false);
+ EXPECT_TRUE(UpdateURLRow(id2, url_info2));
+
+ // make sure it got updated
+ URLRow info2;
+ EXPECT_TRUE(GetRowForURL(url2, &info2));
+ EXPECT_TRUE(IsURLRowEqual(url_info2, info2));
+
+ // query a nonexistant URL
+ EXPECT_EQ(0, GetRowForURL(GURL("http://news.google.com/"), &info));
+
+ // Delete all urls in the domain
+ // TODO(acw): test the new url based delete domain
+ // EXPECT_TRUE(db.DeleteDomain(kDomainID));
+
+ // Make sure the urls have been properly removed
+ // TODO(acw): commented out because remove no longer works.
+ // EXPECT_TRUE(db.GetURLInfo(url1, NULL) == NULL);
+ // EXPECT_TRUE(db.GetURLInfo(url2, NULL) == NULL);
+}
+
+// Tests adding, querying and deleting keyword visits.
+TEST_F(URLDatabaseTest, KeywordSearchTermVisit) {
+ const GURL url1("http://www.google.com/");
+ URLRow url_info1(url1);
+ url_info1.set_title(UTF8ToUTF16("Google"));
+ url_info1.set_visit_count(4);
+ url_info1.set_typed_count(2);
+ url_info1.set_last_visit(Time::Now() - TimeDelta::FromDays(1));
+ url_info1.set_hidden(false);
+ URLID url_id = AddURL(url_info1);
+ ASSERT_TRUE(url_id != 0);
+
+ // Add a keyword visit.
+ ASSERT_TRUE(SetKeywordSearchTermsForURL(url_id, 1, UTF8ToUTF16("visit")));
+
+ // Make sure we get it back.
+ std::vector<KeywordSearchTermVisit> matches;
+ GetMostRecentKeywordSearchTerms(1, UTF8ToUTF16("visit"), 10, &matches);
+ ASSERT_EQ(1U, matches.size());
+ ASSERT_EQ(UTF8ToUTF16("visit"), matches[0].term);
+
+ // Delete the keyword visit.
+ DeleteAllSearchTermsForKeyword(1);
+
+ // Make sure we don't get it back when querying.
+ matches.clear();
+ GetMostRecentKeywordSearchTerms(1, UTF8ToUTF16("visit"), 10, &matches);
+ ASSERT_EQ(0U, matches.size());
+}
+
+// Make sure deleting a URL also deletes a keyword visit.
+TEST_F(URLDatabaseTest, DeleteURLDeletesKeywordSearchTermVisit) {
+ const GURL url1("http://www.google.com/");
+ URLRow url_info1(url1);
+ url_info1.set_title(UTF8ToUTF16("Google"));
+ url_info1.set_visit_count(4);
+ url_info1.set_typed_count(2);
+ url_info1.set_last_visit(Time::Now() - TimeDelta::FromDays(1));
+ url_info1.set_hidden(false);
+ URLID url_id = AddURL(url_info1);
+ ASSERT_TRUE(url_id != 0);
+
+ // Add a keyword visit.
+ ASSERT_TRUE(SetKeywordSearchTermsForURL(url_id, 1, UTF8ToUTF16("visit")));
+
+ // Delete the url.
+ ASSERT_TRUE(DeleteURLRow(url_id));
+
+ // Make sure the keyword visit was deleted.
+ std::vector<KeywordSearchTermVisit> matches;
+ GetMostRecentKeywordSearchTerms(1, UTF8ToUTF16("visit"), 10, &matches);
+ ASSERT_EQ(0U, matches.size());
+}
+
+} // namespace history
diff --git a/chrome/browser/history/visit_database.cc b/chrome/browser/history/visit_database.cc
new file mode 100644
index 0000000..80fa8c8
--- /dev/null
+++ b/chrome/browser/history/visit_database.cc
@@ -0,0 +1,440 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/visit_database.h"
+
+#include <algorithm>
+#include <limits>
+#include <map>
+#include <set>
+
+#include "app/sql/connection.h"
+#include "app/sql/statement.h"
+#include "base/logging.h"
+#include "chrome/browser/history/url_database.h"
+#include "chrome/common/page_transition_types.h"
+#include "chrome/common/url_constants.h"
+
+// Rows, in order, of the visit table.
+#define HISTORY_VISIT_ROW_FIELDS \
+ " id,url,visit_time,from_visit,transition,segment_id,is_indexed "
+
+namespace history {
+
+VisitDatabase::VisitDatabase() {
+}
+
+VisitDatabase::~VisitDatabase() {
+}
+
+bool VisitDatabase::InitVisitTable() {
+ if (!GetDB().DoesTableExist("visits")) {
+ if (!GetDB().Execute("CREATE TABLE visits("
+ "id INTEGER PRIMARY KEY,"
+ "url INTEGER NOT NULL," // key of the URL this corresponds to
+ "visit_time INTEGER NOT NULL,"
+ "from_visit INTEGER,"
+ "transition INTEGER DEFAULT 0 NOT NULL,"
+ "segment_id INTEGER,"
+ // True when we have indexed data for this visit.
+ "is_indexed BOOLEAN)"))
+ return false;
+ } else if (!GetDB().DoesColumnExist("visits", "is_indexed")) {
+ // Old versions don't have the is_indexed column, we can just add that and
+ // not worry about different database revisions, since old ones will
+ // continue to work.
+ //
+ // TODO(brettw) this should be removed once we think everybody has been
+ // updated (added early Mar 2008).
+ if (!GetDB().Execute("ALTER TABLE visits ADD COLUMN is_indexed BOOLEAN"))
+ return false;
+ }
+
+ // Index over url so we can quickly find visits for a page. This will just
+ // fail if it already exists and we'll ignore it.
+ GetDB().Execute("CREATE INDEX visits_url_index ON visits (url)");
+
+ // Create an index over from visits so that we can efficiently find
+ // referrers and redirects. Ignore failures because it likely already exists.
+ GetDB().Execute("CREATE INDEX visits_from_index ON visits (from_visit)");
+
+ // Create an index over time so that we can efficiently find the visits in a
+ // given time range (most history views are time-based). Ignore failures
+ // because it likely already exists.
+ GetDB().Execute("CREATE INDEX visits_time_index ON visits (visit_time)");
+
+ return true;
+}
+
+bool VisitDatabase::DropVisitTable() {
+ // This will also drop the indices over the table.
+ return GetDB().Execute("DROP TABLE visits");
+}
+
+// Must be in sync with HISTORY_VISIT_ROW_FIELDS.
+// static
+void VisitDatabase::FillVisitRow(sql::Statement& statement, VisitRow* visit) {
+ visit->visit_id = statement.ColumnInt64(0);
+ visit->url_id = statement.ColumnInt64(1);
+ visit->visit_time = base::Time::FromInternalValue(statement.ColumnInt64(2));
+ visit->referring_visit = statement.ColumnInt64(3);
+ visit->transition = PageTransition::FromInt(statement.ColumnInt(4));
+ visit->segment_id = statement.ColumnInt64(5);
+ visit->is_indexed = !!statement.ColumnInt(6);
+}
+
+// static
+void VisitDatabase::FillVisitVector(sql::Statement& statement,
+ VisitVector* visits) {
+ while (statement.Step()) {
+ history::VisitRow visit;
+ FillVisitRow(statement, &visit);
+ visits->push_back(visit);
+ }
+}
+
+VisitID VisitDatabase::AddVisit(VisitRow* visit) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "INSERT INTO visits "
+ "(url, visit_time, from_visit, transition, segment_id, is_indexed) "
+ "VALUES (?,?,?,?,?,?)"));
+ if (!statement)
+ return 0;
+
+ statement.BindInt64(0, visit->url_id);
+ statement.BindInt64(1, visit->visit_time.ToInternalValue());
+ statement.BindInt64(2, visit->referring_visit);
+ statement.BindInt64(3, visit->transition);
+ statement.BindInt64(4, visit->segment_id);
+ statement.BindInt64(5, visit->is_indexed);
+ if (!statement.Run())
+ return 0;
+
+ visit->visit_id = GetDB().GetLastInsertRowId();
+ return visit->visit_id;
+}
+
+void VisitDatabase::DeleteVisit(const VisitRow& visit) {
+ // Patch around this visit. Any visits that this went to will now have their
+ // "source" be the deleted visit's source.
+ sql::Statement update_chain(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE visits SET from_visit=? WHERE from_visit=?"));
+ if (!update_chain)
+ return;
+ update_chain.BindInt64(0, visit.referring_visit);
+ update_chain.BindInt64(1, visit.visit_id);
+ update_chain.Run();
+
+ // Now delete the actual visit.
+ sql::Statement del(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM visits WHERE id=?"));
+ if (!del)
+ return;
+ del.BindInt64(0, visit.visit_id);
+ del.Run();
+}
+
+bool VisitDatabase::GetRowForVisit(VisitID visit_id, VisitRow* out_visit) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits WHERE id=?"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, visit_id);
+ if (!statement.Step())
+ return false;
+
+ FillVisitRow(statement, out_visit);
+
+ // We got a different visit than we asked for, something is wrong.
+ DCHECK_EQ(visit_id, out_visit->visit_id);
+ if (visit_id != out_visit->visit_id)
+ return false;
+
+ return true;
+}
+
+bool VisitDatabase::UpdateVisitRow(const VisitRow& visit) {
+ // Don't store inconsistent data to the database.
+ DCHECK_NE(visit.visit_id, visit.referring_visit);
+ if (visit.visit_id == visit.referring_visit)
+ return false;
+
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE visits SET "
+ "url=?,visit_time=?,from_visit=?,transition=?,segment_id=?,is_indexed=? "
+ "WHERE id=?"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, visit.url_id);
+ statement.BindInt64(1, visit.visit_time.ToInternalValue());
+ statement.BindInt64(2, visit.referring_visit);
+ statement.BindInt64(3, visit.transition);
+ statement.BindInt64(4, visit.segment_id);
+ statement.BindInt64(5, visit.is_indexed);
+ statement.BindInt64(6, visit.visit_id);
+ return statement.Run();
+}
+
+bool VisitDatabase::GetVisitsForURL(URLID url_id, VisitVector* visits) {
+ visits->clear();
+
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT" HISTORY_VISIT_ROW_FIELDS
+ "FROM visits "
+ "WHERE url=? "
+ "ORDER BY visit_time ASC"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, url_id);
+ FillVisitVector(statement, visits);
+ return true;
+}
+
+void VisitDatabase::GetAllVisitsInRange(base::Time begin_time,
+ base::Time end_time,
+ int max_results,
+ VisitVector* visits) {
+ visits->clear();
+
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
+ "WHERE visit_time >= ? AND visit_time < ?"
+ "ORDER BY visit_time LIMIT ?"));
+ if (!statement)
+ return;
+
+ // See GetVisibleVisitsInRange for more info on how these times are bound.
+ int64 end = end_time.ToInternalValue();
+ statement.BindInt64(0, begin_time.ToInternalValue());
+ statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
+ statement.BindInt64(2,
+ max_results ? max_results : std::numeric_limits<int64>::max());
+
+ FillVisitVector(statement, visits);
+}
+
+void VisitDatabase::GetVisitsInRangeForTransition(
+ base::Time begin_time,
+ base::Time end_time,
+ int max_results,
+ PageTransition::Type transition,
+ VisitVector* visits) {
+ DCHECK(visits);
+ visits->clear();
+
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
+ "WHERE visit_time >= ? AND visit_time < ? "
+ "AND (transition & ?) == ?"
+ "ORDER BY visit_time LIMIT ?"));
+ if (!statement)
+ return;
+
+ // See GetVisibleVisitsInRange for more info on how these times are bound.
+ int64 end = end_time.ToInternalValue();
+ statement.BindInt64(0, begin_time.ToInternalValue());
+ statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
+ statement.BindInt(2, PageTransition::CORE_MASK);
+ statement.BindInt(3, transition);
+ statement.BindInt64(4,
+ max_results ? max_results : std::numeric_limits<int64>::max());
+
+ FillVisitVector(statement, visits);
+}
+
+void VisitDatabase::GetVisibleVisitsInRange(base::Time begin_time,
+ base::Time end_time,
+ int max_count,
+ VisitVector* visits) {
+ visits->clear();
+ // The visit_time values can be duplicated in a redirect chain, so we sort
+ // by id too, to ensure a consistent ordering just in case.
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
+ "WHERE visit_time >= ? AND visit_time < ? "
+ "AND (transition & ?) != 0 " // CHAIN_END
+ "AND (transition & ?) NOT IN (?, ?, ?) " // NO SUBFRAME or
+ // KEYWORD_GENERATED
+ "ORDER BY visit_time DESC, id DESC"));
+ if (!statement)
+ return;
+
+ // Note that we use min/max values for querying unlimited ranges of time using
+ // the same statement. Since the time has an index, this will be about the
+ // same amount of work as just doing a query for everything with no qualifier.
+ int64 end = end_time.ToInternalValue();
+ statement.BindInt64(0, begin_time.ToInternalValue());
+ statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
+ statement.BindInt(2, PageTransition::CHAIN_END);
+ statement.BindInt(3, PageTransition::CORE_MASK);
+ statement.BindInt(4, PageTransition::AUTO_SUBFRAME);
+ statement.BindInt(5, PageTransition::MANUAL_SUBFRAME);
+ statement.BindInt(6, PageTransition::KEYWORD_GENERATED);
+
+ std::set<URLID> found_urls;
+ while (statement.Step()) {
+ VisitRow visit;
+ FillVisitRow(statement, &visit);
+ // Make sure the URL this visit corresponds to is unique.
+ if (found_urls.find(visit.url_id) != found_urls.end())
+ continue;
+ found_urls.insert(visit.url_id);
+ visits->push_back(visit);
+
+ if (max_count > 0 && static_cast<int>(visits->size()) >= max_count)
+ break;
+ }
+}
+
+VisitID VisitDatabase::GetMostRecentVisitForURL(URLID url_id,
+ VisitRow* visit_row) {
+ // The visit_time values can be duplicated in a redirect chain, so we sort
+ // by id too, to ensure a consistent ordering just in case.
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
+ "WHERE url=? "
+ "ORDER BY visit_time DESC, id DESC "
+ "LIMIT 1"));
+ if (!statement)
+ return 0;
+
+ statement.BindInt64(0, url_id);
+ if (!statement.Step())
+ return 0; // No visits for this URL.
+
+ if (visit_row) {
+ FillVisitRow(statement, visit_row);
+ return visit_row->visit_id;
+ }
+ return statement.ColumnInt64(0);
+}
+
+bool VisitDatabase::GetMostRecentVisitsForURL(URLID url_id,
+ int max_results,
+ VisitVector* visits) {
+ visits->clear();
+
+ // The visit_time values can be duplicated in a redirect chain, so we sort
+ // by id too, to ensure a consistent ordering just in case.
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT" HISTORY_VISIT_ROW_FIELDS
+ "FROM visits "
+ "WHERE url=? "
+ "ORDER BY visit_time DESC, id DESC "
+ "LIMIT ?"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, url_id);
+ statement.BindInt(1, max_results);
+ FillVisitVector(statement, visits);
+ return true;
+}
+
+bool VisitDatabase::GetRedirectFromVisit(VisitID from_visit,
+ VisitID* to_visit,
+ GURL* to_url) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT v.id,u.url "
+ "FROM visits v JOIN urls u ON v.url = u.id "
+ "WHERE v.from_visit = ? "
+ "AND (v.transition & ?) != 0")); // IS_REDIRECT_MASK
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, from_visit);
+ statement.BindInt(1, PageTransition::IS_REDIRECT_MASK);
+
+ if (!statement.Step())
+ return false; // No redirect from this visit.
+ if (to_visit)
+ *to_visit = statement.ColumnInt64(0);
+ if (to_url)
+ *to_url = GURL(statement.ColumnString(1));
+ return true;
+}
+
+bool VisitDatabase::GetRedirectToVisit(VisitID to_visit,
+ VisitID* from_visit,
+ GURL* from_url) {
+ VisitRow row;
+ if (!GetRowForVisit(to_visit, &row))
+ return false;
+
+ if (from_visit)
+ *from_visit = row.referring_visit;
+
+ if (from_url) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT u.url "
+ "FROM visits v JOIN urls u ON v.url = u.id "
+ "WHERE v.id = ?"));
+ statement.BindInt64(0, row.referring_visit);
+
+ if (!statement.Step())
+ return false;
+
+ *from_url = GURL(statement.ColumnString(0));
+ }
+ return true;
+}
+
+bool VisitDatabase::GetVisitCountToHost(const GURL& url,
+ int* count,
+ base::Time* first_visit) {
+ if (!url.SchemeIs(chrome::kHttpScheme) && !url.SchemeIs(chrome::kHttpsScheme))
+ return false;
+
+ // We need to search for URLs with a matching host/port. One way to query for
+ // this is to use the LIKE operator, eg 'url LIKE http://google.com/%'. This
+ // is inefficient though in that it doesn't use the index and each entry must
+ // be visited. The same query can be executed by using >= and < operator.
+ // The query becomes:
+ // 'url >= http://google.com/' and url < http://google.com0'.
+ // 0 is used as it is one character greater than '/'.
+ GURL search_url(url);
+ const std::string host_query_min = search_url.GetOrigin().spec();
+
+ if (host_query_min.empty())
+ return false;
+
+ std::string host_query_max = host_query_min;
+ host_query_max[host_query_max.size() - 1] = '0';
+
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT MIN(v.visit_time), COUNT(*) "
+ "FROM visits v INNER JOIN urls u ON v.url = u.id "
+ "WHERE (u.url >= ? AND u.url < ?)"));
+ if (!statement)
+ return false;
+
+ statement.BindString(0, host_query_min);
+ statement.BindString(1, host_query_max);
+
+ if (!statement.Step()) {
+ // We've never been to this page before.
+ *count = 0;
+ return true;
+ }
+
+ *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0));
+ *count = statement.ColumnInt(1);
+ return true;
+}
+
+bool VisitDatabase::GetStartDate(base::Time* first_visit) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT MIN(visit_time) FROM visits WHERE visit_time != 0"));
+ if (!statement || !statement.Step() || statement.ColumnInt64(0) == 0) {
+ *first_visit = base::Time::Now();
+ return false;
+ }
+ *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0));
+ return true;
+}
+
+} // namespace history
diff --git a/chrome/browser/history/visit_database.h b/chrome/browser/history/visit_database.h
new file mode 100644
index 0000000..a6dbf3c
--- /dev/null
+++ b/chrome/browser/history/visit_database.h
@@ -0,0 +1,168 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_VISIT_DATABASE_H_
+#define CHROME_BROWSER_HISTORY_VISIT_DATABASE_H_
+
+#include "chrome/browser/history/history_types.h"
+
+namespace sql {
+class Connection;
+class Statement;
+}
+
+namespace history {
+
+// A visit database is one which stores visits for URLs, that is, times and
+// linking information. A visit database must also be a URLDatabase, as this
+// modifies tables used by URLs directly and could be thought of as inheriting
+// from URLDatabase. However, this inheritance is not explicit as things would
+// get too complicated and have multiple inheritance.
+class VisitDatabase {
+ public:
+ // Must call InitVisitTable() before using to make sure the database is
+ // initialized.
+ VisitDatabase();
+ virtual ~VisitDatabase();
+
+ // Deletes the visit table. Used for rapidly clearing all visits. In this
+ // case, InitVisitTable would be called immediately afterward to re-create it.
+ // Returns true on success.
+ bool DropVisitTable();
+
+ // Adds a line to the visit database with the given information, returning
+ // the added row ID on success, 0 on failure. The given visit is updated with
+ // the new row ID on success.
+ VisitID AddVisit(VisitRow* visit);
+
+ // Deletes the given visit from the database. If a visit with the given ID
+ // doesn't exist, it will not do anything.
+ void DeleteVisit(const VisitRow& visit);
+
+ // Query a VisitInfo giving an visit id, filling the given VisitRow.
+ // Returns true on success.
+ bool GetRowForVisit(VisitID visit_id, VisitRow* out_visit);
+
+ // Updates an existing row. The new information is set on the row, using the
+ // VisitID as the key. The visit must exist. Returns true on success.
+ bool UpdateVisitRow(const VisitRow& visit);
+
+ // Fills in the given vector with all of the visits for the given page ID,
+ // sorted in ascending order of date. Returns true on success (although there
+ // may still be no matches).
+ bool GetVisitsForURL(URLID url_id, VisitVector* visits);
+
+ // Fills all visits in the time range [begin, end) to the given vector. Either
+ // time can be is_null(), in which case the times in that direction are
+ // unbounded.
+ //
+ // If |max_results| is non-zero, up to that many results will be returned. If
+ // there are more results than that, the oldest ones will be returned. (This
+ // is used for history expiration.)
+ //
+ // The results will be in increasing order of date.
+ void GetAllVisitsInRange(base::Time begin_time, base::Time end_time,
+ int max_results, VisitVector* visits);
+
+ // Fills all visits with specified transition in the time range [begin, end)
+ // to the given vector. Either time can be is_null(), in which case the times
+ // in that direction are unbounded.
+ //
+ // If |max_results| is non-zero, up to that many results will be returned. If
+ // there are more results than that, the oldest ones will be returned. (This
+ // is used for history expiration.)
+ //
+ // The results will be in increasing order of date.
+ void GetVisitsInRangeForTransition(base::Time begin_time,
+ base::Time end_time,
+ int max_results,
+ PageTransition::Type transition,
+ VisitVector* visits);
+
+ // Fills all visits in the given time range into the given vector that should
+ // be user-visible, which excludes things like redirects and subframes. The
+ // begin time is inclusive, the end time is exclusive. Either time can be
+ // is_null(), in which case the times in that direction are unbounded.
+ //
+ // Up to |max_count| visits will be returned. If there are more visits than
+ // that, the most recent |max_count| will be returned. If 0, all visits in the
+ // range will be computed.
+ //
+ // Only one visit for each URL will be returned, and it will be the most
+ // recent one in the time range.
+ void GetVisibleVisitsInRange(base::Time begin_time, base::Time end_time,
+ int max_count,
+ VisitVector* visits);
+
+ // Returns the visit ID for the most recent visit of the given URL ID, or 0
+ // if there is no visit for the URL.
+ //
+ // If non-NULL, the given visit row will be filled with the information of
+ // the found visit. When no visit is found, the row will be unchanged.
+ VisitID GetMostRecentVisitForURL(URLID url_id,
+ VisitRow* visit_row);
+
+ // Returns the |max_results| most recent visit sessions for |url_id|.
+ //
+ // Returns false if there's a failure preparing the statement. True
+ // otherwise. (No results are indicated with an empty |visits|
+ // vector.)
+ bool GetMostRecentVisitsForURL(URLID url_id,
+ int max_results,
+ VisitVector* visits);
+
+ // Finds a redirect coming from the given |from_visit|. If a redirect is
+ // found, it fills the visit ID and URL into the out variables and returns
+ // true. If there is no redirect from the given visit, returns false.
+ //
+ // If there is more than one redirect, this will compute a random one. But
+ // duplicates should be very rare, and we don't actually care which one we
+ // get in most cases. These will occur when the user goes back and gets
+ // redirected again.
+ //
+ // to_visit and to_url can be NULL in which case they are ignored.
+ bool GetRedirectFromVisit(VisitID from_visit,
+ VisitID* to_visit,
+ GURL* to_url);
+
+ // Similar to the above function except finds a redirect going to a given
+ // |to_visit|.
+ bool GetRedirectToVisit(VisitID to_visit,
+ VisitID* from_visit,
+ GURL* from_url);
+
+ // Returns the number of visits to all urls on the scheme/host/post
+ // identified by url. This is only valid for http and https urls (all other
+ // schemes are ignored and false is returned).
+ // count is set to the number of visits, first_visit is set to the first time
+ // the host was visited. Returns true on success.
+ bool GetVisitCountToHost(const GURL& url, int* count,
+ base::Time* first_visit);
+
+ // Get the time of the first item in our database.
+ bool GetStartDate(base::Time* first_visit);
+
+ protected:
+ // Returns the database for the functions in this interface.
+ virtual sql::Connection& GetDB() = 0;
+
+ // Called by the derived classes on initialization to make sure the tables
+ // and indices are properly set up. Must be called before anything else.
+ bool InitVisitTable();
+
+ // Convenience to fill a VisitRow. Assumes the visit values are bound starting
+ // at index 0.
+ static void FillVisitRow(sql::Statement& statement, VisitRow* visit);
+
+ // Convenience to fill a VisitVector. Assumes that statement.step()
+ // hasn't happened yet.
+ static void FillVisitVector(sql::Statement& statement, VisitVector* visits);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(VisitDatabase);
+};
+
+} // history
+
+#endif // CHROME_BROWSER_HISTORY_VISIT_DATABASE_H_
diff --git a/chrome/browser/history/visit_database_unittest.cc b/chrome/browser/history/visit_database_unittest.cc
new file mode 100644
index 0000000..ebc2e1b
--- /dev/null
+++ b/chrome/browser/history/visit_database_unittest.cc
@@ -0,0 +1,230 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "app/sql/connection.h"
+#include "base/file_path.h"
+#include "base/file_util.h"
+#include "base/path_service.h"
+#include "base/string_util.h"
+#include "chrome/browser/history/url_database.h"
+#include "chrome/browser/history/visit_database.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/platform_test.h"
+
+using base::Time;
+using base::TimeDelta;
+
+namespace history {
+
+namespace {
+
+bool IsVisitInfoEqual(const VisitRow& a,
+ const VisitRow& b) {
+ return a.visit_id == b.visit_id &&
+ a.url_id == b.url_id &&
+ a.visit_time == b.visit_time &&
+ a.referring_visit == b.referring_visit &&
+ a.transition == b.transition &&
+ a.is_indexed == b.is_indexed;
+}
+
+} // namespace
+
+class VisitDatabaseTest : public PlatformTest,
+ public URLDatabase,
+ public VisitDatabase {
+ public:
+ VisitDatabaseTest() {
+ }
+
+ private:
+ // Test setup.
+ void SetUp() {
+ PlatformTest::SetUp();
+ FilePath temp_dir;
+ PathService::Get(base::DIR_TEMP, &temp_dir);
+ db_file_ = temp_dir.AppendASCII("VisitTest.db");
+ file_util::Delete(db_file_, false);
+
+ EXPECT_TRUE(db_.Open(db_file_));
+
+ // Initialize the tables for this test.
+ CreateURLTable(false);
+ CreateMainURLIndex();
+ CreateSupplimentaryURLIndices();
+ InitVisitTable();
+ }
+ void TearDown() {
+ db_.Close();
+ file_util::Delete(db_file_, false);
+ PlatformTest::TearDown();
+ }
+
+ // Provided for URL/VisitDatabase.
+ virtual sql::Connection& GetDB() {
+ return db_;
+ }
+
+ FilePath db_file_;
+ sql::Connection db_;
+};
+
+TEST_F(VisitDatabaseTest, Add) {
+ // Add one visit.
+ VisitRow visit_info1(1, Time::Now(), 0, PageTransition::LINK, 0);
+ EXPECT_TRUE(AddVisit(&visit_info1));
+
+ // Add second visit for the same page.
+ VisitRow visit_info2(visit_info1.url_id,
+ visit_info1.visit_time + TimeDelta::FromSeconds(1), 1,
+ PageTransition::TYPED, 0);
+ EXPECT_TRUE(AddVisit(&visit_info2));
+
+ // Add third visit for a different page.
+ VisitRow visit_info3(2,
+ visit_info1.visit_time + TimeDelta::FromSeconds(2), 0,
+ PageTransition::LINK, 0);
+ EXPECT_TRUE(AddVisit(&visit_info3));
+
+ // Query the first two.
+ std::vector<VisitRow> matches;
+ EXPECT_TRUE(GetVisitsForURL(visit_info1.url_id, &matches));
+ EXPECT_EQ(static_cast<size_t>(2), matches.size());
+
+ // Make sure we got both (order in result set is visit time).
+ EXPECT_TRUE(IsVisitInfoEqual(matches[0], visit_info1) &&
+ IsVisitInfoEqual(matches[1], visit_info2));
+}
+
+TEST_F(VisitDatabaseTest, Delete) {
+ // Add three visits that form a chain of navigation, and then delete the
+ // middle one. We should be left with the outer two visits, and the chain
+ // should link them.
+ static const int kTime1 = 1000;
+ VisitRow visit_info1(1, Time::FromInternalValue(kTime1), 0,
+ PageTransition::LINK, 0);
+ EXPECT_TRUE(AddVisit(&visit_info1));
+
+ static const int kTime2 = kTime1 + 1;
+ VisitRow visit_info2(1, Time::FromInternalValue(kTime2),
+ visit_info1.visit_id, PageTransition::LINK, 0);
+ EXPECT_TRUE(AddVisit(&visit_info2));
+
+ static const int kTime3 = kTime2 + 1;
+ VisitRow visit_info3(1, Time::FromInternalValue(kTime3),
+ visit_info2.visit_id, PageTransition::LINK, 0);
+ EXPECT_TRUE(AddVisit(&visit_info3));
+
+ // First make sure all the visits are there.
+ std::vector<VisitRow> matches;
+ EXPECT_TRUE(GetVisitsForURL(visit_info1.url_id, &matches));
+ EXPECT_EQ(static_cast<size_t>(3), matches.size());
+ EXPECT_TRUE(IsVisitInfoEqual(matches[0], visit_info1) &&
+ IsVisitInfoEqual(matches[1], visit_info2) &&
+ IsVisitInfoEqual(matches[2], visit_info3));
+
+ // Delete the middle one.
+ DeleteVisit(visit_info2);
+
+ // The outer two should be left, and the last one should have the first as
+ // the referrer.
+ visit_info3.referring_visit = visit_info1.visit_id;
+ matches.clear();
+ EXPECT_TRUE(GetVisitsForURL(visit_info1.url_id, &matches));
+ EXPECT_EQ(static_cast<size_t>(2), matches.size());
+ EXPECT_TRUE(IsVisitInfoEqual(matches[0], visit_info1) &&
+ IsVisitInfoEqual(matches[1], visit_info3));
+}
+
+TEST_F(VisitDatabaseTest, Update) {
+ // Make something in the database.
+ VisitRow original(1, Time::Now(), 23, 22, 19);
+ AddVisit(&original);
+
+ // Mutate that row.
+ VisitRow modification(original);
+ modification.url_id = 2;
+ modification.transition = PageTransition::TYPED;
+ modification.visit_time = Time::Now() + TimeDelta::FromDays(1);
+ modification.referring_visit = 9292;
+ modification.is_indexed = true;
+ UpdateVisitRow(modification);
+
+ // Check that the mutated version was written.
+ VisitRow final;
+ GetRowForVisit(original.visit_id, &final);
+ EXPECT_TRUE(IsVisitInfoEqual(modification, final));
+}
+
+// TODO(brettw) write test for GetMostRecentVisitForURL!
+
+TEST_F(VisitDatabaseTest, GetVisibleVisitsInRange) {
+ // Add one visit.
+ VisitRow visit_info1(1, Time::Now(), 0,
+ static_cast<PageTransition::Type>(PageTransition::LINK |
+ PageTransition::CHAIN_START |
+ PageTransition::CHAIN_END),
+ 0);
+ visit_info1.visit_id = 1;
+ EXPECT_TRUE(AddVisit(&visit_info1));
+
+ // Add second visit for the same page.
+ VisitRow visit_info2(visit_info1.url_id,
+ visit_info1.visit_time + TimeDelta::FromSeconds(1), 1,
+ static_cast<PageTransition::Type>(PageTransition::TYPED |
+ PageTransition::CHAIN_START |
+ PageTransition::CHAIN_END),
+ 0);
+ visit_info2.visit_id = 2;
+ EXPECT_TRUE(AddVisit(&visit_info2));
+
+ // Add third visit for a different page.
+ VisitRow visit_info3(2,
+ visit_info1.visit_time + TimeDelta::FromSeconds(2), 0,
+ static_cast<PageTransition::Type>(PageTransition::LINK |
+ PageTransition::CHAIN_START),
+ 0);
+ visit_info3.visit_id = 3;
+ EXPECT_TRUE(AddVisit(&visit_info3));
+
+ // Add a redirect visit from the last page.
+ VisitRow visit_info4(3,
+ visit_info1.visit_time + TimeDelta::FromSeconds(3), visit_info3.visit_id,
+ static_cast<PageTransition::Type>(PageTransition::SERVER_REDIRECT |
+ PageTransition::CHAIN_END),
+ 0);
+ visit_info4.visit_id = 4;
+ EXPECT_TRUE(AddVisit(&visit_info4));
+
+ // Add a subframe visit.
+ VisitRow visit_info5(4,
+ visit_info1.visit_time + TimeDelta::FromSeconds(4), visit_info4.visit_id,
+ static_cast<PageTransition::Type>(PageTransition::AUTO_SUBFRAME |
+ PageTransition::CHAIN_START |
+ PageTransition::CHAIN_END),
+ 0);
+ visit_info5.visit_id = 5;
+ EXPECT_TRUE(AddVisit(&visit_info5));
+
+ // Query the visits for all time, we should not get the first (duplicate of
+ // the second) or the redirect or subframe visits.
+ VisitVector results;
+ GetVisibleVisitsInRange(Time(), Time(), 0, &results);
+ ASSERT_EQ(static_cast<size_t>(2), results.size());
+ EXPECT_TRUE(IsVisitInfoEqual(results[0], visit_info4) &&
+ IsVisitInfoEqual(results[1], visit_info2));
+
+ // Query a time range and make sure beginning is inclusive and ending is
+ // exclusive.
+ GetVisibleVisitsInRange(visit_info2.visit_time, visit_info4.visit_time, 0,
+ &results);
+ ASSERT_EQ(static_cast<size_t>(1), results.size());
+ EXPECT_TRUE(IsVisitInfoEqual(results[0], visit_info2));
+
+ // Query for a max count and make sure we get only that number.
+ GetVisibleVisitsInRange(Time(), Time(), 1, &results);
+ ASSERT_EQ(static_cast<size_t>(1), results.size());
+ EXPECT_TRUE(IsVisitInfoEqual(results[0], visit_info4));
+}
+} // namespace history
diff --git a/chrome/browser/history/visit_tracker.cc b/chrome/browser/history/visit_tracker.cc
new file mode 100644
index 0000000..7bb0011
--- /dev/null
+++ b/chrome/browser/history/visit_tracker.cc
@@ -0,0 +1,106 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/visit_tracker.h"
+
+#include "base/stl_util-inl.h"
+
+namespace history {
+
+// When the list gets longer than 'MaxItems', CleanupTransitionList will resize
+// the list down to 'ResizeTo' size. This is so we only do few block moves of
+// the data rather than constantly shuffle stuff around in the vector.
+static const size_t kMaxItemsInTransitionList = 96;
+static const size_t kResizeBigTransitionListTo = 64;
+COMPILE_ASSERT(kResizeBigTransitionListTo < kMaxItemsInTransitionList,
+ max_items_must_be_larger_than_resize_to);
+
+VisitTracker::VisitTracker() {
+}
+
+VisitTracker::~VisitTracker() {
+ STLDeleteContainerPairSecondPointers(hosts_.begin(), hosts_.end());
+}
+
+// This function is potentially slow because it may do up to two brute-force
+// searches of the transitions list. This transitions list is kept to a
+// relatively small number by CleanupTransitionList so it shouldn't be a big
+// deal. However, if this ends up being noticable for performance, we may want
+// to optimize lookup.
+VisitID VisitTracker::GetLastVisit(const void* host,
+ int32 page_id,
+ const GURL& referrer) {
+ if (referrer.is_empty() || !host)
+ return 0;
+
+ HostList::iterator i = hosts_.find(host);
+ if (i == hosts_.end())
+ return 0; // We don't have any entries for this host.
+ TransitionList& transitions = *i->second;
+
+ // Recall that a page ID is associated with a single session history entry.
+ // In the case of automatically loaded iframes, many visits/URLs can have the
+ // same page ID.
+ //
+ // We search backwards, starting at the current page ID, for the referring
+ // URL. This won't always be correct. For example, if a render process has
+ // the same page open in two different tabs, or even in two different frames,
+ // we can get confused about which was which. We can have the renderer
+ // report more precise referrer information in the future, but this is a
+ // hard problem and doesn't affect much in terms of real-world issues.
+ //
+ // We assume that the page IDs are increasing over time, so larger IDs than
+ // the current input ID happened in the future (this will occur if the user
+ // goes back). We can ignore future transitions because if you navigate, go
+ // back, and navigate some more, we'd like to have one node with two out
+ // edges in our visit graph.
+ for (int i = static_cast<int>(transitions.size()) - 1; i >= 0; i--) {
+ if (transitions[i].page_id <= page_id && transitions[i].url == referrer) {
+ // Found it.
+ return transitions[i].visit_id;
+ }
+ }
+
+ // We can't find the referrer.
+ return 0;
+}
+
+void VisitTracker::AddVisit(const void* host,
+ int32 page_id,
+ const GURL& url,
+ VisitID visit_id) {
+ TransitionList* transitions = hosts_[host];
+ if (!transitions) {
+ transitions = new TransitionList;
+ hosts_[host] = transitions;
+ }
+
+ Transition t;
+ t.url = url;
+ t.page_id = page_id;
+ t.visit_id = visit_id;
+ transitions->push_back(t);
+
+ CleanupTransitionList(transitions);
+}
+
+void VisitTracker::NotifyRenderProcessHostDestruction(const void* host) {
+ HostList::iterator i = hosts_.find(host);
+ if (i == hosts_.end())
+ return; // We don't have any entries for this host.
+
+ delete i->second;
+ hosts_.erase(i);
+}
+
+
+void VisitTracker::CleanupTransitionList(TransitionList* transitions) {
+ if (transitions->size() <= kMaxItemsInTransitionList)
+ return; // Nothing to do.
+
+ transitions->erase(transitions->begin(),
+ transitions->begin() + kResizeBigTransitionListTo);
+}
+
+} // namespace history
diff --git a/chrome/browser/history/visit_tracker.h b/chrome/browser/history/visit_tracker.h
new file mode 100644
index 0000000..43de7a0
--- /dev/null
+++ b/chrome/browser/history/visit_tracker.h
@@ -0,0 +1,66 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_VISIT_TRACKER_H__
+#define CHROME_BROWSER_HISTORY_VISIT_TRACKER_H__
+
+#include <map>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "chrome/browser/history/history_types.h"
+
+namespace history {
+
+// Tracks history transitions between pages. The history backend uses this to
+// link up page transitions to form a chain of page visits, and to set the
+// transition type properly.
+//
+// This class is not thread safe.
+class VisitTracker {
+ public:
+ VisitTracker();
+ ~VisitTracker();
+
+ // Notifications -------------------------------------------------------------
+
+ void AddVisit(const void* host,
+ int32 page_id,
+ const GURL& url,
+ VisitID visit_id);
+
+ // When a RenderProcessHost is destroyed, we want to clear out our saved
+ // transitions/visit IDs for it.
+ void NotifyRenderProcessHostDestruction(const void* host);
+
+ // Querying ------------------------------------------------------------------
+
+ // Returns the visit ID for the transition given information about the visit
+ // supplied by the renderer. We will return 0 if there is no appropriate
+ // referring visit.
+ VisitID GetLastVisit(const void* host, int32 page_id, const GURL& url);
+
+ private:
+ struct Transition {
+ GURL url; // URL that the event happened to.
+ int32 page_id; // ID generated by the render process host.
+ VisitID visit_id; // Visit ID generated by history.
+ };
+ typedef std::vector<Transition> TransitionList;
+ typedef std::map<const void*, TransitionList*> HostList;
+
+ // Expires oldish items in the given transition list. This keeps the list
+ // size small by removing items that are unlikely to be needed, which is
+ // important for GetReferrer which does brute-force searches of this list.
+ void CleanupTransitionList(TransitionList* transitions);
+
+ // Maps render view hosts to lists of recent transitions.
+ HostList hosts_;
+
+ DISALLOW_COPY_AND_ASSIGN(VisitTracker);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_VISIT_TRACKER_H__
diff --git a/chrome/browser/history/visit_tracker_unittest.cc b/chrome/browser/history/visit_tracker_unittest.cc
new file mode 100644
index 0000000..5c84d28
--- /dev/null
+++ b/chrome/browser/history/visit_tracker_unittest.cc
@@ -0,0 +1,138 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/visit_tracker.h"
+#include "base/basictypes.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using history::VisitTracker;
+
+namespace {
+
+struct VisitToTest {
+ // Identifies the host, we'll cast this to a pointer when querying (the
+ // tracker isn't allowed to dereference this pointer).
+ int host;
+ int32 page_id;
+
+ // Used when adding this to the tracker
+ const char* url;
+ const history::VisitID visit_id;
+
+ // Used when finding the referrer
+ const char* referrer;
+
+ // the correct referring visit ID to compare to the computed one
+ history::VisitID referring_visit_id;
+};
+
+// The tracker uses RenderProcessHost pointers for scoping but never
+// dereferences them. We use ints because it's easier. This function converts
+// between the two.
+void* MakeFakeHost(int id) {
+ void* host = 0;
+ memcpy(&host, &id, sizeof(int));
+ return host;
+}
+
+void RunTest(VisitTracker* tracker, VisitToTest* test, int test_count) {
+ for (int i = 0; i < test_count; i++) {
+ // Our host pointer is actually just an int, convert it (it will not get
+ // dereferenced).
+ void* host = MakeFakeHost(test[i].host);
+
+ // Check the referrer for this visit.
+ history::VisitID ref_visit = tracker->GetLastVisit(
+ host, test[i].page_id, GURL(test[i].referrer));
+ EXPECT_EQ(test[i].referring_visit_id, ref_visit);
+
+ // Now add this visit.
+ tracker->AddVisit(host, test[i].page_id, GURL(test[i].url),
+ test[i].visit_id);
+ }
+}
+
+} // namespace
+
+// A simple test that makes sure we transition between main pages in the
+// presence of back/forward.
+TEST(VisitTracker, SimpleTransitions) {
+ VisitToTest test_simple[] = {
+ // Started here:
+ {1, 1, "http://www.google.com/", 1, "", 0},
+ // Clicked a link:
+ {1, 2, "http://images.google.com/", 2, "http://www.google.com/", 1},
+ // Went back, then clicked a link:
+ {1, 3, "http://video.google.com/", 3, "http://www.google.com/", 1},
+ };
+
+ VisitTracker tracker;
+ RunTest(&tracker, test_simple, arraysize(test_simple));
+}
+
+// Test that referrer is properly computed when there are different frame
+// navigations happening.
+TEST(VisitTracker, Frames) {
+ VisitToTest test_frames[] = {
+ // Started here:
+ {1, 1, "http://foo.com/", 1, "", 0},
+ // Which had an auto-loaded subframe:
+ {1, 1, "http://foo.com/ad.html", 2, "http://foo.com/", 1},
+ // ...and another auto-loaded subframe:
+ {1, 1, "http://foo.com/ad2.html", 3, "http://foo.com/", 1},
+ // ...and the user navigated the first subframe to somwhere else
+ {1, 2, "http://bar.com/", 4, "http://foo.com/ad.html", 2},
+ // ...and then the second subframe somewhere else
+ {1, 3, "http://fud.com/", 5, "http://foo.com/ad2.html", 3},
+ // ...and then the main frame somewhere else.
+ {1, 4, "http://www.google.com/", 6, "http://foo.com/", 1},
+ };
+
+ VisitTracker tracker;
+ RunTest(&tracker, test_frames, arraysize(test_frames));
+}
+
+// Test frame navigation to make sure that the referrer is properly computed
+// when there are multiple processes navigating the same pages.
+TEST(VisitTracker, MultiProcess) {
+ VisitToTest test_processes[] = {
+ // Process 1 and 2 start here:
+ {1, 1, "http://foo.com/", 1, "", 0},
+ {2, 1, "http://foo.com/", 2, "", 0},
+ // They have some subframes:
+ {1, 1, "http://foo.com/ad.html", 3, "http://foo.com/", 1},
+ {2, 1, "http://foo.com/ad.html", 4, "http://foo.com/", 2},
+ // Subframes are navigated:
+ {1, 2, "http://bar.com/", 5, "http://foo.com/ad.html", 3},
+ {2, 2, "http://bar.com/", 6, "http://foo.com/ad.html", 4},
+ // Main frame is navigated:
+ {1, 3, "http://www.google.com/", 7, "http://foo.com/", 1},
+ {2, 3, "http://www.google.com/", 8, "http://foo.com/", 2},
+ };
+
+ VisitTracker tracker;
+ RunTest(&tracker, test_processes, arraysize(test_processes));
+}
+
+// Test that processes get removed properly.
+TEST(VisitTracker, ProcessRemove) {
+ // Simple navigation from one process.
+ VisitToTest part1[] = {
+ {1, 1, "http://www.google.com/", 1, "", 0},
+ {1, 2, "http://images.google.com/", 2, "http://www.google.com/", 1},
+ };
+
+ VisitTracker tracker;
+ RunTest(&tracker, part1, arraysize(part1));
+
+ // Say that process has been destroyed.
+ tracker.NotifyRenderProcessHostDestruction(MakeFakeHost(1));
+
+ // Simple navigation from a new process with the same ID, it should not find
+ // a referrer.
+ VisitToTest part2[] = {
+ {1, 1, "http://images.google.com/", 2, "http://www.google.com/", 0},
+ };
+ RunTest(&tracker, part2, arraysize(part2));
+}
diff --git a/chrome/browser/history/visitsegment_database.cc b/chrome/browser/history/visitsegment_database.cc
new file mode 100644
index 0000000..f94d713
--- /dev/null
+++ b/chrome/browser/history/visitsegment_database.cc
@@ -0,0 +1,386 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/visitsegment_database.h"
+
+#include <math.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "app/sql/connection.h"
+#include "app/sql/statement.h"
+#include "base/logging.h"
+#include "base/stl_util-inl.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/history/page_usage_data.h"
+
+// The following tables are used to store url segment information.
+//
+// segments
+// id Primary key
+// name A unique string to represent that segment. (URL derived)
+// url_id ID of the url currently used to represent this segment.
+// pres_index index used to store a fixed presentation position.
+//
+// segment_usage
+// id Primary key
+// segment_id Corresponding segment id
+// time_slot time stamp identifying for what day this entry is about
+// visit_count Number of visit in the segment
+//
+
+namespace history {
+
+VisitSegmentDatabase::VisitSegmentDatabase() {
+}
+
+VisitSegmentDatabase::~VisitSegmentDatabase() {
+}
+
+bool VisitSegmentDatabase::InitSegmentTables() {
+ // Segments table.
+ if (!GetDB().DoesTableExist("segments")) {
+ if (!GetDB().Execute("CREATE TABLE segments ("
+ "id INTEGER PRIMARY KEY,"
+ "name VARCHAR,"
+ "url_id INTEGER NON NULL,"
+ "pres_index INTEGER DEFAULT -1 NOT NULL)")) {
+ NOTREACHED();
+ return false;
+ }
+
+ if (!GetDB().Execute("CREATE INDEX segments_name ON segments(name)")) {
+ NOTREACHED();
+ return false;
+ }
+ }
+
+ // This was added later, so we need to try to create it even if the table
+ // already exists.
+ GetDB().Execute("CREATE INDEX segments_url_id ON segments(url_id)");
+
+ // Segment usage table.
+ if (!GetDB().DoesTableExist("segment_usage")) {
+ if (!GetDB().Execute("CREATE TABLE segment_usage ("
+ "id INTEGER PRIMARY KEY,"
+ "segment_id INTEGER NOT NULL,"
+ "time_slot INTEGER NOT NULL,"
+ "visit_count INTEGER DEFAULT 0 NOT NULL)")) {
+ NOTREACHED();
+ return false;
+ }
+ if (!GetDB().Execute(
+ "CREATE INDEX segment_usage_time_slot_segment_id ON "
+ "segment_usage(time_slot, segment_id)")) {
+ NOTREACHED();
+ return false;
+ }
+ }
+
+ // Added in a later version, so we always need to try to creat this index.
+ GetDB().Execute("CREATE INDEX segments_usage_seg_id "
+ "ON segment_usage(segment_id)");
+
+ // Presentation index table.
+ //
+ // Important note:
+ // Right now, this table is only used to store the presentation index.
+ // If you need to add more columns, keep in mind that rows are currently
+ // deleted when the presentation index is changed to -1.
+ // See SetPagePresentationIndex() in this file
+ if (!GetDB().DoesTableExist("presentation")) {
+ if (!GetDB().Execute("CREATE TABLE presentation("
+ "url_id INTEGER PRIMARY KEY,"
+ "pres_index INTEGER NOT NULL)"))
+ return false;
+ }
+ return true;
+}
+
+bool VisitSegmentDatabase::DropSegmentTables() {
+ // Dropping the tables will implicitly delete the indices.
+ return GetDB().Execute("DROP TABLE segments") &&
+ GetDB().Execute("DROP TABLE segment_usage");
+}
+
+// Note: the segment name is derived from the URL but is not a URL. It is
+// a string that can be easily recreated from various URLS. Maybe this should
+// be an MD5 to limit the length.
+//
+// static
+std::string VisitSegmentDatabase::ComputeSegmentName(const GURL& url) {
+ // TODO(brettw) this should probably use the registry controlled
+ // domains service.
+ GURL::Replacements r;
+ const char kWWWDot[] = "www.";
+ const int kWWWDotLen = arraysize(kWWWDot) - 1;
+
+ std::string host = url.host();
+ const char* host_c = host.c_str();
+ // Remove www. to avoid some dups.
+ if (static_cast<int>(host.size()) > kWWWDotLen &&
+ LowerCaseEqualsASCII(host_c, host_c + kWWWDotLen, kWWWDot)) {
+ r.SetHost(host.c_str(),
+ url_parse::Component(kWWWDotLen,
+ static_cast<int>(host.size()) - kWWWDotLen));
+ }
+ // Remove other stuff we don't want.
+ r.ClearUsername();
+ r.ClearPassword();
+ r.ClearQuery();
+ r.ClearRef();
+ r.ClearPort();
+
+ return url.ReplaceComponents(r).spec();
+}
+
+SegmentID VisitSegmentDatabase::GetSegmentNamed(
+ const std::string& segment_name) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT id FROM segments WHERE name = ?"));
+ if (!statement)
+ return 0;
+
+ statement.BindString(0, segment_name);
+ if (statement.Step())
+ return statement.ColumnInt64(0);
+ return 0;
+}
+
+bool VisitSegmentDatabase::UpdateSegmentRepresentationURL(SegmentID segment_id,
+ URLID url_id) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE segments SET url_id = ? WHERE id = ?"));
+ if (!statement)
+ return false;
+
+ statement.BindInt64(0, url_id);
+ statement.BindInt64(1, segment_id);
+ return statement.Run();
+}
+
+URLID VisitSegmentDatabase::GetSegmentRepresentationURL(SegmentID segment_id) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT url_id FROM segments WHERE id = ?"));
+ if (!statement)
+ return 0;
+
+ statement.BindInt64(0, segment_id);
+ if (statement.Step())
+ return statement.ColumnInt64(0);
+ return 0;
+}
+
+SegmentID VisitSegmentDatabase::CreateSegment(URLID url_id,
+ const std::string& segment_name) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "INSERT INTO segments (name, url_id) VALUES (?,?)"));
+ if (!statement)
+ return false;
+
+ statement.BindString(0, segment_name);
+ statement.BindInt64(1, url_id);
+ if (statement.Run())
+ return GetDB().GetLastInsertRowId();
+ return false;
+}
+
+bool VisitSegmentDatabase::IncreaseSegmentVisitCount(SegmentID segment_id,
+ base::Time ts,
+ int amount) {
+ base::Time t = ts.LocalMidnight();
+
+ sql::Statement select(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT id, visit_count FROM segment_usage "
+ "WHERE time_slot = ? AND segment_id = ?"));
+ if (!select)
+ return false;
+
+ select.BindInt64(0, t.ToInternalValue());
+ select.BindInt64(1, segment_id);
+ if (select.Step()) {
+ sql::Statement update(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE segment_usage SET visit_count = ? WHERE id = ?"));
+ if (!update)
+ return false;
+
+ update.BindInt64(0, select.ColumnInt64(1) + static_cast<int64>(amount));
+ update.BindInt64(1, select.ColumnInt64(0));
+ return update.Run();
+
+ } else {
+ sql::Statement insert(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "INSERT INTO segment_usage "
+ "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)"));
+ if (!insert)
+ return false;
+
+ insert.BindInt64(0, segment_id);
+ insert.BindInt64(1, t.ToInternalValue());
+ insert.BindInt64(2, static_cast<int64>(amount));
+ return insert.Run();
+ }
+}
+
+void VisitSegmentDatabase::QuerySegmentUsage(
+ base::Time from_time,
+ int max_result_count,
+ std::vector<PageUsageData*>* results) {
+ // This function gathers the highest-ranked segments in two queries.
+ // The first gathers scores for all segments.
+ // The second gathers segment data (url, title, etc.) for the highest-ranked
+ // segments.
+ // TODO(evanm): this disregards the "presentation index", which was what was
+ // used to lock results into position. But the rest of our code currently
+ // does as well.
+
+ // Gather all the segment scores.
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT segment_id, time_slot, visit_count "
+ "FROM segment_usage WHERE time_slot >= ? "
+ "ORDER BY segment_id"));
+ if (!statement) {
+ NOTREACHED() << GetDB().GetErrorMessage();
+ return;
+ }
+
+ base::Time ts = from_time.LocalMidnight();
+ statement.BindInt64(0, ts.ToInternalValue());
+
+ base::Time now = base::Time::Now();
+ SegmentID last_segment_id = 0;
+ PageUsageData* pud = NULL;
+ float score = 0;
+ while (statement.Step()) {
+ SegmentID segment_id = statement.ColumnInt64(0);
+ if (segment_id != last_segment_id) {
+ if (pud) {
+ pud->SetScore(score);
+ results->push_back(pud);
+ }
+
+ pud = new PageUsageData(segment_id);
+ score = 0;
+ last_segment_id = segment_id;
+ }
+
+ base::Time timeslot =
+ base::Time::FromInternalValue(statement.ColumnInt64(1));
+ int visit_count = statement.ColumnInt(2);
+ int days_ago = (now - timeslot).InDays();
+
+ // Score for this day in isolation.
+ float day_visits_score = 1.0f + log(static_cast<float>(visit_count));
+ // Recent visits count more than historical ones, so we multiply in a boost
+ // related to how long ago this day was.
+ // This boost is a curve that smoothly goes through these values:
+ // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x
+ // at the limit of how far we reach into the past.
+ float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f)));
+ score += recency_boost * day_visits_score;
+ }
+
+ if (pud) {
+ pud->SetScore(score);
+ results->push_back(pud);
+ }
+
+ // Limit to the top kResultCount results.
+ sort(results->begin(), results->end(), PageUsageData::Predicate);
+ if (static_cast<int>(results->size()) > max_result_count) {
+ STLDeleteContainerPointers(results->begin() + max_result_count,
+ results->end());
+ results->resize(max_result_count);
+ }
+
+ // Now fetch the details about the entries we care about.
+ sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT urls.url, urls.title FROM urls "
+ "JOIN segments ON segments.url_id = urls.id "
+ "WHERE segments.id = ?"));
+ if (!statement2) {
+ NOTREACHED() << GetDB().GetErrorMessage();
+ return;
+ }
+ for (size_t i = 0; i < results->size(); ++i) {
+ PageUsageData* pud = (*results)[i];
+ statement2.BindInt64(0, pud->GetID());
+ if (statement2.Step()) {
+ pud->SetURL(GURL(statement2.ColumnString(0)));
+ pud->SetTitle(UTF8ToUTF16(statement2.ColumnString(1)));
+ }
+ statement2.Reset();
+ }
+}
+
+void VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM segment_usage WHERE time_slot < ?"));
+ if (!statement)
+ return;
+
+ statement.BindInt64(0, older_than.LocalMidnight().ToInternalValue());
+ if (!statement.Run())
+ NOTREACHED();
+}
+
+void VisitSegmentDatabase::SetSegmentPresentationIndex(SegmentID segment_id,
+ int index) {
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "UPDATE segments SET pres_index = ? WHERE id = ?"));
+ if (!statement)
+ return;
+
+ statement.BindInt(0, index);
+ statement.BindInt64(1, segment_id);
+ if (!statement.Run())
+ NOTREACHED();
+ else
+ DCHECK_EQ(1, GetDB().GetLastChangeCount());
+}
+
+bool VisitSegmentDatabase::DeleteSegmentForURL(URLID url_id) {
+ sql::Statement select(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "SELECT id FROM segments WHERE url_id = ?"));
+ if (!select)
+ return false;
+
+ sql::Statement delete_seg(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM segments WHERE id = ?"));
+ if (!delete_seg)
+ return false;
+
+ sql::Statement delete_usage(GetDB().GetCachedStatement(SQL_FROM_HERE,
+ "DELETE FROM segment_usage WHERE segment_id = ?"));
+ if (!delete_usage)
+ return false;
+
+ bool r = true;
+ select.BindInt64(0, url_id);
+ // In theory there could not be more than one segment using that URL but we
+ // loop anyway to cleanup any inconsistency.
+ while (select.Step()) {
+ SegmentID segment_id = select.ColumnInt64(0);
+
+ delete_usage.BindInt64(0, segment_id);
+ if (!delete_usage.Run()) {
+ NOTREACHED();
+ r = false;
+ }
+
+ delete_seg.BindInt64(0, segment_id);
+ if (!delete_seg.Run()) {
+ NOTREACHED();
+ r = false;
+ }
+ delete_usage.Reset();
+ delete_seg.Reset();
+ }
+ return r;
+}
+
+} // namespace history
diff --git a/chrome/browser/history/visitsegment_database.h b/chrome/browser/history/visitsegment_database.h
new file mode 100644
index 0000000..16f0417
--- /dev/null
+++ b/chrome/browser/history/visitsegment_database.h
@@ -0,0 +1,88 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_VISITSEGMENT_DATABASE_H_
+#define CHROME_BROWSER_HISTORY_VISITSEGMENT_DATABASE_H_
+
+#include "base/basictypes.h"
+#include "chrome/browser/history/history_types.h"
+
+class PageUsageData;
+
+namespace sql {
+class Connection;
+}
+
+namespace history {
+
+// Tracks pages used for the most visited view.
+class VisitSegmentDatabase {
+ public:
+ // Must call InitSegmentTables before using any other part of this class.
+ VisitSegmentDatabase();
+ virtual ~VisitSegmentDatabase();
+
+ // Compute a segment name given a URL. The segment name is currently the
+ // source url spec less some information such as query strings.
+ static std::string ComputeSegmentName(const GURL& url);
+
+ // Returns the ID of the segment with the corresponding name, or 0 if there
+ // is no segment with that name.
+ SegmentID GetSegmentNamed(const std::string& segment_name);
+
+ // Update the segment identified by |out_segment_id| with the provided URL ID.
+ // The URL identifies the page that will now represent the segment. If url_id
+ // is non zero, it is assumed to be the row id of |url|.
+ bool UpdateSegmentRepresentationURL(SegmentID segment_id,
+ URLID url_id);
+
+ // Return the ID of the URL currently used to represent this segment or 0 if
+ // an error occured.
+ URLID GetSegmentRepresentationURL(SegmentID segment_id);
+
+ // Create a segment for the provided URL ID with the given name. Returns the
+ // ID of the newly created segment, or 0 on failure.
+ SegmentID CreateSegment(URLID url_id, const std::string& segment_name);
+
+ // Increase the segment visit count by the provided amount. Return true on
+ // success.
+ bool IncreaseSegmentVisitCount(SegmentID segment_id, base::Time ts,
+ int amount);
+
+ // Compute the segment usage since |from_time| using the provided aggregator.
+ // A PageUsageData is added in |result| for the highest-scored segments up to
+ // |max_result_count|.
+ void QuerySegmentUsage(base::Time from_time,
+ int max_result_count,
+ std::vector<PageUsageData*>* result);
+
+ // Delete all the segment usage data which is older than the provided time
+ // stamp.
+ void DeleteSegmentData(base::Time older_than);
+
+ // Change the presentation id for the segment identified by |segment_id|
+ void SetSegmentPresentationIndex(SegmentID segment_id, int index);
+
+ // Delete the segment currently using the provided url for representation.
+ // This will also delete any associated segment usage data.
+ bool DeleteSegmentForURL(URLID url_id);
+
+ protected:
+ // Returns the database for the functions in this interface.
+ virtual sql::Connection& GetDB() = 0;
+
+ // Creates the tables used by this class if necessary. Returns true on
+ // success.
+ bool InitSegmentTables();
+
+ // Deletes all the segment tables, returning true on success.
+ bool DropSegmentTables();
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(VisitSegmentDatabase);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_VISITSEGMENT_DATABASE_H_