diff options
author | Ben Murdoch <benm@google.com> | 2010-07-29 17:14:53 +0100 |
---|---|---|
committer | Ben Murdoch <benm@google.com> | 2010-08-04 14:29:45 +0100 |
commit | c407dc5cd9bdc5668497f21b26b09d988ab439de (patch) | |
tree | 7eaf8707c0309516bdb042ad976feedaf72b0bb1 /chrome/browser/history | |
parent | 0998b1cdac5733f299c12d88bc31ef9c8035b8fa (diff) | |
download | external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.zip external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.tar.gz external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.tar.bz2 |
Merge Chromium src@r53293
Change-Id: Ia79acf8670f385cee48c45b0a75371d8e950af34
Diffstat (limited to 'chrome/browser/history')
72 files changed, 22221 insertions, 0 deletions
diff --git a/chrome/browser/history/archived_database.cc b/chrome/browser/history/archived_database.cc new file mode 100644 index 0000000..1b9e010 --- /dev/null +++ b/chrome/browser/history/archived_database.cc @@ -0,0 +1,123 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <algorithm> +#include <string> + +#include "app/sql/statement.h" +#include "app/sql/transaction.h" +#include "base/string_util.h" +#include "chrome/browser/history/archived_database.h" + +namespace history { + +namespace { + +static const int kCurrentVersionNumber = 2; +static const int kCompatibleVersionNumber = 2; + +} // namespace + +ArchivedDatabase::ArchivedDatabase() { +} + +ArchivedDatabase::~ArchivedDatabase() { +} + +bool ArchivedDatabase::Init(const FilePath& file_name) { + // Set the database page size to something a little larger to give us + // better performance (we're typically seek rather than bandwidth limited). + // This only has an effect before any tables have been created, otherwise + // this is a NOP. Must be a power of 2 and a max of 8192. + db_.set_page_size(4096); + + // Don't use very much memory caching this database. We seldom use it for + // anything important. + db_.set_cache_size(64); + + // Run the database in exclusive mode. Nobody else should be accessing the + // database while we're running, and this will give somewhat improved perf. + db_.set_exclusive_locking(); + + if (!db_.Open(file_name)) + return false; + + sql::Transaction transaction(&db_); + if (!transaction.Begin()) { + db_.Close(); + return false; + } + + // Version check. + if (!meta_table_.Init(&db_, kCurrentVersionNumber, + kCompatibleVersionNumber)) { + db_.Close(); + return false; + } + + // Create the tables. + if (!CreateURLTable(false) || !InitVisitTable() || + !InitKeywordSearchTermsTable()) { + db_.Close(); + return false; + } + CreateMainURLIndex(); + + if (EnsureCurrentVersion() != sql::INIT_OK) { + db_.Close(); + return false; + } + + return transaction.Commit(); +} + +void ArchivedDatabase::BeginTransaction() { + db_.BeginTransaction(); +} + +void ArchivedDatabase::CommitTransaction() { + db_.CommitTransaction(); +} + +sql::Connection& ArchivedDatabase::GetDB() { + return db_; +} + +// Migration ------------------------------------------------------------------- + +sql::InitStatus ArchivedDatabase::EnsureCurrentVersion() { + // We can't read databases newer than we were designed for. + if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) { + LOG(WARNING) << "Archived database is too new."; + return sql::INIT_TOO_NEW; + } + + // NOTICE: If you are changing structures for things shared with the archived + // history file like URLs, visits, or downloads, that will need migration as + // well. Instead of putting such migration code in this class, it should be + // in the corresponding file (url_database.cc, etc.) and called from here and + // from the archived_database.cc. + + int cur_version = meta_table_.GetVersionNumber(); + if (cur_version == 1) { + if (!DropStarredIDFromURLs()) { + LOG(WARNING) << "Unable to update archived database to version 2."; + return sql::INIT_FAILURE; + } + ++cur_version; + meta_table_.SetVersionNumber(cur_version); + meta_table_.SetCompatibleVersionNumber( + std::min(cur_version, kCompatibleVersionNumber)); + } + + // Put future migration cases here. + + // When the version is too old, we just try to continue anyway, there should + // not be a released product that makes a database too old for us to handle. + LOG_IF(WARNING, cur_version < kCurrentVersionNumber) << + "Archived database version " << cur_version << " is too old to handle."; + + return sql::INIT_OK; +} +} // namespace history diff --git a/chrome/browser/history/archived_database.h b/chrome/browser/history/archived_database.h new file mode 100644 index 0000000..c9d8757 --- /dev/null +++ b/chrome/browser/history/archived_database.h @@ -0,0 +1,63 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_ARCHIVED_DATABASE_H_ +#define CHROME_BROWSER_HISTORY_ARCHIVED_DATABASE_H_ + +#include "app/sql/connection.h" +#include "app/sql/init_status.h" +#include "app/sql/meta_table.h" +#include "base/basictypes.h" +#include "chrome/browser/history/url_database.h" +#include "chrome/browser/history/visit_database.h" + +class FilePath; + +namespace history { + +// Encapsulates the database operations for archived history. +// +// IMPORTANT NOTE: The IDs in this system for URLs and visits will be +// different than those in the main database. This is to eliminate the +// dependency between them so we can deal with each one on its own. +class ArchivedDatabase : public URLDatabase, + public VisitDatabase { + public: + // Must call Init() before using other members. + ArchivedDatabase(); + virtual ~ArchivedDatabase(); + + // Initializes the database connection. This must return true before any other + // functions on this class are called. + bool Init(const FilePath& file_name); + + // Transactions on the database. We support nested transactions and only + // commit when the outermost one is committed (sqlite doesn't support true + // nested transactions). + void BeginTransaction(); + void CommitTransaction(); + + private: + // Implemented for the specialized databases. + virtual sql::Connection& GetDB(); + + // Makes sure the version is up-to-date, updating if necessary. If the + // database is too old to migrate, the user will be notified. In this case, or + // for other errors, false will be returned. True means it is up-to-date and + // ready for use. + // + // This assumes it is called from the init function inside a transaction. It + // may commit the transaction and start a new one if migration requires it. + sql::InitStatus EnsureCurrentVersion(); + + // The database. + sql::Connection db_; + sql::MetaTable meta_table_; + + DISALLOW_COPY_AND_ASSIGN(ArchivedDatabase); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_ARCHIVED_DATABASE_H_ diff --git a/chrome/browser/history/download_database.cc b/chrome/browser/history/download_database.cc new file mode 100644 index 0000000..aa3dbde --- /dev/null +++ b/chrome/browser/history/download_database.cc @@ -0,0 +1,218 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/download_database.h" + +#include <limits> +#include <vector> + +#include "app/sql/connection.h" +#include "app/sql/statement.h" +#include "base/file_path.h" +#include "base/utf_string_conversions.h" +#include "build/build_config.h" +#include "chrome/browser/download/download_item.h" +#include "chrome/browser/history/download_types.h" + +// Download schema: +// +// id SQLite-generated primary key. +// full_path Location of the download on disk. +// url URL of the downloaded file. +// start_time When the download was started. +// received_bytes Total size downloaded. +// total_bytes Total size of the download. +// state Identifies if this download is completed or not. Not used +// directly by the history system. See DownloadItem's +// DownloadState for where this is used. + +namespace history { + +namespace { + +#if defined(OS_POSIX) + +// Binds/reads the given file path to the given column of the given statement. +void BindFilePath(sql::Statement& statement, const FilePath& path, int col) { + statement.BindString(col, path.value()); +} +FilePath ColumnFilePath(sql::Statement& statement, int col) { + return FilePath(statement.ColumnString(col)); +} + +#else + +// See above. +void BindFilePath(sql::Statement& statement, const FilePath& path, int col) { + statement.BindString(col, UTF16ToUTF8(path.value())); +} +FilePath ColumnFilePath(sql::Statement& statement, int col) { + return FilePath(UTF8ToUTF16(statement.ColumnString(col))); +} + +#endif + +} // namespace + +DownloadDatabase::DownloadDatabase() { +} + +DownloadDatabase::~DownloadDatabase() { +} + +bool DownloadDatabase::InitDownloadTable() { + if (!GetDB().DoesTableExist("downloads")) { + if (!GetDB().Execute( + "CREATE TABLE downloads (" + "id INTEGER PRIMARY KEY," + "full_path LONGVARCHAR NOT NULL," + "url LONGVARCHAR NOT NULL," + "start_time INTEGER NOT NULL," + "received_bytes INTEGER NOT NULL," + "total_bytes INTEGER NOT NULL," + "state INTEGER NOT NULL)")) + return false; + } + return true; +} + +bool DownloadDatabase::DropDownloadTable() { + return GetDB().Execute("DROP TABLE downloads"); +} + +void DownloadDatabase::QueryDownloads( + std::vector<DownloadCreateInfo>* results) { + results->clear(); + + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT id, full_path, url, start_time, received_bytes, " + "total_bytes, state " + "FROM downloads " + "ORDER BY start_time")); + if (!statement) + return; + + while (statement.Step()) { + DownloadCreateInfo info; + info.db_handle = statement.ColumnInt64(0); + + info.path = ColumnFilePath(statement, 1); + info.url = GURL(statement.ColumnString(2)); + info.start_time = base::Time::FromTimeT(statement.ColumnInt64(3)); + info.received_bytes = statement.ColumnInt64(4); + info.total_bytes = statement.ColumnInt64(5); + info.state = statement.ColumnInt(6); + results->push_back(info); + } +} + +bool DownloadDatabase::UpdateDownload(int64 received_bytes, + int32 state, + DownloadID db_handle) { + DCHECK(db_handle > 0); + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE downloads " + "SET received_bytes=?, state=? WHERE id=?")); + if (!statement) + return false; + + statement.BindInt64(0, received_bytes); + statement.BindInt(1, state); + statement.BindInt64(2, db_handle); + return statement.Run(); +} + +bool DownloadDatabase::UpdateDownloadPath(const FilePath& path, + DownloadID db_handle) { + DCHECK(db_handle > 0); + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE downloads SET full_path=? WHERE id=?")); + if (!statement) + return false; + + BindFilePath(statement, path, 0); + statement.BindInt64(1, db_handle); + return statement.Run(); +} + +bool DownloadDatabase::CleanUpInProgressEntries() { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE downloads SET state=? WHERE state=?")); + if (!statement) + return false; + statement.BindInt(0, DownloadItem::CANCELLED); + statement.BindInt(1, DownloadItem::IN_PROGRESS); + return statement.Run(); +} + +int64 DownloadDatabase::CreateDownload(const DownloadCreateInfo& info) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO downloads " + "(full_path, url, start_time, received_bytes, total_bytes, state) " + "VALUES (?, ?, ?, ?, ?, ?)")); + if (!statement) + return 0; + + BindFilePath(statement, info.path, 0); + statement.BindString(1, info.url.spec()); + statement.BindInt64(2, info.start_time.ToTimeT()); + statement.BindInt64(3, info.received_bytes); + statement.BindInt64(4, info.total_bytes); + statement.BindInt(5, info.state); + + if (statement.Run()) + return GetDB().GetLastInsertRowId(); + return 0; +} + +void DownloadDatabase::RemoveDownload(DownloadID db_handle) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM downloads WHERE id=?")); + if (!statement) + return; + + statement.BindInt64(0, db_handle); + statement.Run(); +} + +void DownloadDatabase::RemoveDownloadsBetween(base::Time delete_begin, + base::Time delete_end) { + // This does not use an index. We currently aren't likely to have enough + // downloads where an index by time will give us a lot of benefit. + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM downloads WHERE start_time >= ? AND start_time < ? " + "AND (State = ? OR State = ?)")); + if (!statement) + return; + + time_t start_time = delete_begin.ToTimeT(); + time_t end_time = delete_end.ToTimeT(); + statement.BindInt64(0, start_time); + statement.BindInt64( + 1, + end_time ? end_time : std::numeric_limits<int64>::max()); + statement.BindInt(2, DownloadItem::COMPLETE); + statement.BindInt(3, DownloadItem::CANCELLED); + statement.Run(); +} + +void DownloadDatabase::SearchDownloads(std::vector<int64>* results, + const string16& search_text) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT id FROM downloads WHERE url LIKE ? " + "OR full_path LIKE ? ORDER BY id")); + if (!statement) + return; + + std::string text("%"); + text.append(UTF16ToUTF8(search_text)); + text.push_back('%'); + statement.BindString(0, text); + statement.BindString(1, text); + + while (statement.Step()) + results->push_back(statement.ColumnInt64(0)); +} + +} // namespace history diff --git a/chrome/browser/history/download_database.h b/chrome/browser/history/download_database.h new file mode 100644 index 0000000..11adf31 --- /dev/null +++ b/chrome/browser/history/download_database.h @@ -0,0 +1,74 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_DOWNLOAD_DATABASE_H_ +#define CHROME_BROWSER_HISTORY_DOWNLOAD_DATABASE_H_ + +#include "chrome/browser/history/history_types.h" + +struct DownloadCreateInfo; +class FilePath; + +namespace sql { +class Connection; +} + +namespace history { + +// Maintains a table of downloads. +class DownloadDatabase { + public: + // Must call InitDownloadTable before using any other functions. + DownloadDatabase(); + virtual ~DownloadDatabase(); + + // Get all the downloads from the database. + void QueryDownloads(std::vector<DownloadCreateInfo>* results); + + // Update the state of one download. Returns true if successful. + bool UpdateDownload(int64 received_bytes, int32 state, DownloadID db_handle); + + // Update the path of one download. Returns true if successful. + bool UpdateDownloadPath(const FilePath& path, DownloadID db_handle); + + // Fixes state of the download entries. Sometimes entries with IN_PROGRESS + // state are not updated during browser shutdown (particularly when crashing). + // On the next start such entries are considered canceled. This functions + // fixes such entries. + bool CleanUpInProgressEntries(); + + // Create a new database entry for one download and return its primary db id. + int64 CreateDownload(const DownloadCreateInfo& info); + + // Remove a download from the database. + void RemoveDownload(DownloadID db_handle); + + // Remove all completed downloads that started after |remove_begin| + // (inclusive) and before |remove_end|. You may use null Time values + // to do an unbounded delete in either direction. This function ignores + // all downloads that are in progress or are waiting to be cancelled. + void RemoveDownloadsBetween(base::Time remove_begin, base::Time remove_end); + + // Search for downloads matching the search text. + void SearchDownloads(std::vector<int64>* results, + const string16& search_text); + + protected: + // Returns the database for the functions in this interface. + virtual sql::Connection& GetDB() = 0; + + // Creates the downloads table if needed. + bool InitDownloadTable(); + + // Used to quickly clear the downloads. First you would drop it, then you + // would re-initialize it. + bool DropDownloadTable(); + + private: + DISALLOW_COPY_AND_ASSIGN(DownloadDatabase); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_DOWNLOAD_DATABASE_H_ diff --git a/chrome/browser/history/download_types.h b/chrome/browser/history/download_types.h new file mode 100644 index 0000000..642ac5b --- /dev/null +++ b/chrome/browser/history/download_types.h @@ -0,0 +1,104 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Download creation struct used for querying the history service. + +#ifndef CHROME_BROWSER_HISTORY_DOWNLOAD_TYPES_H_ +#define CHROME_BROWSER_HISTORY_DOWNLOAD_TYPES_H_ + +#include <string> + +#include "base/basictypes.h" +#include "base/file_path.h" +#include "base/time.h" +#include "chrome/browser/download/download_file.h" +#include "googleurl/src/gurl.h" + +// Used for informing the download database of a new download, where we don't +// want to pass DownloadItems between threads. The history service also uses a +// vector of these structs for passing us the state of all downloads at +// initialization time (see DownloadQueryInfo below). +struct DownloadCreateInfo { + DownloadCreateInfo(const FilePath& path, + const GURL& url, + base::Time start_time, + int64 received_bytes, + int64 total_bytes, + int32 state, + int32 download_id) + : path(path), + url(url), + path_uniquifier(0), + start_time(start_time), + received_bytes(received_bytes), + total_bytes(total_bytes), + state(state), + download_id(download_id), + child_id(-1), + render_view_id(-1), + request_id(-1), + db_handle(0), + prompt_user_for_save_location(false), + is_dangerous(false), + is_extension_install(false) { + } + + DownloadCreateInfo() + : path_uniquifier(0), + received_bytes(0), + total_bytes(0), + state(-1), + download_id(-1), + child_id(-1), + render_view_id(-1), + request_id(-1), + db_handle(0), + prompt_user_for_save_location(false), + is_dangerous(false), + is_extension_install(false) { + } + + // DownloadItem fields + FilePath path; + GURL url; + GURL referrer_url; + FilePath suggested_path; + // A number that should be added to the suggested path to make it unique. + // 0 means no number should be appended. Not actually stored in the db. + int path_uniquifier; + base::Time start_time; + int64 received_bytes; + int64 total_bytes; + int32 state; + int32 download_id; + int child_id; + int render_view_id; + int request_id; + int64 db_handle; + std::string content_disposition; + std::string mime_type; + // The value of the content type header sent with the downloaded item. It + // may be different from |mime_type|, which may be set based on heuristics + // which may look at the file extension and first few bytes of the file. + std::string original_mime_type; + + // True if we should display the 'save as...' UI and prompt the user + // for the download location. + // False if the UI should be supressed and the download performed to the + // default location. + bool prompt_user_for_save_location; + // Whether this download is potentially dangerous (ex: exe, dll, ...). + bool is_dangerous; + // The original name for a dangerous download. + FilePath original_name; + // Whether this download is for extension install or not. + bool is_extension_install; + // The charset of the referring page where the download request comes from. + // It's used to construct a suggested filename. + std::string referrer_charset; + // The download file save info. + DownloadSaveInfo save_info; +}; + +#endif // CHROME_BROWSER_HISTORY_DOWNLOAD_TYPES_H_ diff --git a/chrome/browser/history/expire_history_backend.cc b/chrome/browser/history/expire_history_backend.cc new file mode 100644 index 0000000..bd471ad --- /dev/null +++ b/chrome/browser/history/expire_history_backend.cc @@ -0,0 +1,698 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/expire_history_backend.h" + +#include <algorithm> +#include <limits> + +#include "base/compiler_specific.h" +#include "base/file_util.h" +#include "base/message_loop.h" +#include "chrome/browser/bookmarks/bookmark_service.h" +#include "chrome/browser/history/archived_database.h" +#include "chrome/browser/history/history_database.h" +#include "chrome/browser/history/history_notifications.h" +#include "chrome/browser/history/text_database.h" +#include "chrome/browser/history/text_database_manager.h" +#include "chrome/browser/history/thumbnail_database.h" +#include "chrome/common/notification_type.h" + +using base::Time; +using base::TimeDelta; + +namespace history { + +namespace { + +// The number of days by which the expiration threshold is advanced for items +// that we want to expire early, such as those of AUTO_SUBFRAME transition type. +const int kEarlyExpirationAdvanceDays = 30; + +// Reads all types of visits starting from beginning of time to the given end +// time. This is the most general reader. +class AllVisitsReader : public ExpiringVisitsReader { + public: + virtual bool Read(Time end_time, HistoryDatabase* db, + VisitVector* visits, int max_visits) const { + DCHECK(db) << "must have a database to operate upon"; + DCHECK(visits) << "visit vector has to exist in order to populate it"; + + db->GetAllVisitsInRange(Time(), end_time, max_visits, visits); + // When we got the maximum number of visits we asked for, we say there could + // be additional things to expire now. + return static_cast<int>(visits->size()) == max_visits; + } +}; + +// Reads only AUTO_SUBFRAME visits, within a computed range. The range is +// computed as follows: +// * |begin_time| is read from the meta table. This value is updated whenever +// there are no more additional visits to expire by this reader. +// * |end_time| is advanced forward by a constant (kEarlyExpirationAdvanceDay), +// but not past the current time. +class AutoSubframeVisitsReader : public ExpiringVisitsReader { + public: + virtual bool Read(Time end_time, HistoryDatabase* db, + VisitVector* visits, int max_visits) const { + DCHECK(db) << "must have a database to operate upon"; + DCHECK(visits) << "visit vector has to exist in order to populate it"; + + Time begin_time = db->GetEarlyExpirationThreshold(); + // Advance |end_time| to expire early. + Time early_end_time = end_time + + TimeDelta::FromDays(kEarlyExpirationAdvanceDays); + + // We don't want to set the early expiration threshold to a time in the + // future. + Time now = Time::Now(); + if (early_end_time > now) + early_end_time = now; + + db->GetVisitsInRangeForTransition(begin_time, early_end_time, + max_visits, + PageTransition::AUTO_SUBFRAME, + visits); + bool more = static_cast<int>(visits->size()) == max_visits; + if (!more) + db->UpdateEarlyExpirationThreshold(early_end_time); + + return more; + } +}; + +// Returns true if this visit is worth archiving. Otherwise, this visit is not +// worth saving (for example, subframe navigations and redirects) and we can +// just delete it when it gets old. +bool ShouldArchiveVisit(const VisitRow& visit) { + int no_qualifier = PageTransition::StripQualifier(visit.transition); + + // These types of transitions are always "important" and the user will want + // to see them. + if (no_qualifier == PageTransition::TYPED || + no_qualifier == PageTransition::AUTO_BOOKMARK || + no_qualifier == PageTransition::START_PAGE) + return true; + + // Only archive these "less important" transitions when they were the final + // navigation and not part of a redirect chain. + if ((no_qualifier == PageTransition::LINK || + no_qualifier == PageTransition::FORM_SUBMIT || + no_qualifier == PageTransition::KEYWORD || + no_qualifier == PageTransition::GENERATED) && + visit.transition & PageTransition::CHAIN_END) + return true; + + // The transition types we ignore are AUTO_SUBFRAME and MANUAL_SUBFRAME. + return false; +} + +// The number of visits we will expire very time we check for old items. This +// Prevents us from doing too much work any given time. +const int kNumExpirePerIteration = 10; + +// The number of seconds between checking for items that should be expired when +// we think there might be more items to expire. This timeout is used when the +// last expiration found at least kNumExpirePerIteration and we want to check +// again "soon." +const int kExpirationDelaySec = 30; + +// The number of minutes between checking, as with kExpirationDelaySec, but +// when we didn't find enough things to expire last time. If there was no +// history to expire last iteration, it's likely there is nothing next +// iteration, so we want to wait longer before checking to avoid wasting CPU. +const int kExpirationEmptyDelayMin = 5; + +// The number of minutes that we wait for before scheduling a task to +// delete old history index files. +const int kIndexExpirationDelayMin = 2; + +// The number of the most recent months for which we do not want to delete +// the history index files. +const int kStoreHistoryIndexesForMonths = 12; + +} // namespace + +struct ExpireHistoryBackend::DeleteDependencies { + // The time range affected. These can be is_null() to be unbounded in one + // or both directions. + base::Time begin_time, end_time; + + // ----- Filled by DeleteVisitRelatedInfo or manually if a function doesn't + // call that function. ----- + + // The unique URL rows affected by this delete. + std::map<URLID, URLRow> affected_urls; + + // ----- Filled by DeleteOneURL ----- + + // The URLs deleted during this operation. + std::vector<URLRow> deleted_urls; + + // The list of all favicon IDs that the affected URLs had. Favicons will be + // shared between all URLs with the same favicon, so this is the set of IDs + // that we will need to check when the delete operations are complete. + std::set<FavIconID> affected_favicons; + + // Tracks the set of databases that have changed so we can optimize when + // when we're done. + TextDatabaseManager::ChangeSet text_db_changes; +}; + +ExpireHistoryBackend::ExpireHistoryBackend( + BroadcastNotificationDelegate* delegate, + BookmarkService* bookmark_service) + : delegate_(delegate), + main_db_(NULL), + archived_db_(NULL), + thumb_db_(NULL), + text_db_(NULL), + ALLOW_THIS_IN_INITIALIZER_LIST(factory_(this)), + bookmark_service_(bookmark_service) { +} + +ExpireHistoryBackend::~ExpireHistoryBackend() { +} + +void ExpireHistoryBackend::SetDatabases(HistoryDatabase* main_db, + ArchivedDatabase* archived_db, + ThumbnailDatabase* thumb_db, + TextDatabaseManager* text_db) { + main_db_ = main_db; + archived_db_ = archived_db; + thumb_db_ = thumb_db; + text_db_ = text_db; +} + +void ExpireHistoryBackend::DeleteURL(const GURL& url) { + if (!main_db_) + return; + + URLRow url_row; + if (!main_db_->GetRowForURL(url, &url_row)) + return; // Nothing to delete. + + // Collect all the visits and delete them. Note that we don't give up if + // there are no visits, since the URL could still have an entry that we should + // delete. + // TODO(brettw): bug 1171148: We should also delete from the archived DB. + VisitVector visits; + main_db_->GetVisitsForURL(url_row.id(), &visits); + + DeleteDependencies dependencies; + DeleteVisitRelatedInfo(visits, &dependencies); + + // We skip ExpireURLsForVisits (since we are deleting from the URL, and not + // starting with visits in a given time range). We therefore need to call the + // deletion and favicon update functions manually. + + BookmarkService* bookmark_service = GetBookmarkService(); + bool is_bookmarked = + (bookmark_service && bookmark_service->IsBookmarked(url)); + + DeleteOneURL(url_row, is_bookmarked, &dependencies); + if (!is_bookmarked) + DeleteFaviconsIfPossible(dependencies.affected_favicons); + + if (text_db_) + text_db_->OptimizeChangedDatabases(dependencies.text_db_changes); + + BroadcastDeleteNotifications(&dependencies); +} + +void ExpireHistoryBackend::ExpireHistoryBetween( + const std::set<GURL>& restrict_urls, Time begin_time, Time end_time) { + if (!main_db_) + return; + + // There may be stuff in the text database manager's temporary cache. + if (text_db_) + text_db_->DeleteFromUncommitted(restrict_urls, begin_time, end_time); + + // Find the affected visits and delete them. + // TODO(brettw): bug 1171164: We should query the archived database here, too. + VisitVector visits; + main_db_->GetAllVisitsInRange(begin_time, end_time, 0, &visits); + if (!restrict_urls.empty()) { + std::set<URLID> url_ids; + for (std::set<GURL>::const_iterator url = restrict_urls.begin(); + url != restrict_urls.end(); ++url) + url_ids.insert(main_db_->GetRowForURL(*url, NULL)); + VisitVector all_visits; + all_visits.swap(visits); + for (VisitVector::iterator visit = all_visits.begin(); + visit != all_visits.end(); ++visit) { + if (url_ids.find(visit->url_id) != url_ids.end()) + visits.push_back(*visit); + } + } + if (visits.empty()) + return; + + DeleteDependencies dependencies; + DeleteVisitRelatedInfo(visits, &dependencies); + + // Delete or update the URLs affected. We want to update the visit counts + // since this is called by the user who wants to delete their recent history, + // and we don't want to leave any evidence. + ExpireURLsForVisits(visits, &dependencies); + DeleteFaviconsIfPossible(dependencies.affected_favicons); + + // An is_null begin time means that all history should be deleted. + BroadcastDeleteNotifications(&dependencies); + + // Pick up any bits possibly left over. + ParanoidExpireHistory(); +} + +void ExpireHistoryBackend::ArchiveHistoryBefore(Time end_time) { + if (!main_db_) + return; + + // Archive as much history as possible before the given date. + ArchiveSomeOldHistory(end_time, GetAllVisitsReader(), + std::numeric_limits<size_t>::max()); + ParanoidExpireHistory(); +} + +void ExpireHistoryBackend::InitWorkQueue() { + DCHECK(work_queue_.empty()) << "queue has to be empty prior to init"; + + for (size_t i = 0; i < readers_.size(); i++) + work_queue_.push(readers_[i]); +} + +const ExpiringVisitsReader* ExpireHistoryBackend::GetAllVisitsReader() { + if (!all_visits_reader_.get()) + all_visits_reader_.reset(new AllVisitsReader()); + return all_visits_reader_.get(); +} + +const ExpiringVisitsReader* + ExpireHistoryBackend::GetAutoSubframeVisitsReader() { + if (!auto_subframe_visits_reader_.get()) + auto_subframe_visits_reader_.reset(new AutoSubframeVisitsReader()); + return auto_subframe_visits_reader_.get(); +} + +void ExpireHistoryBackend::StartArchivingOldStuff( + TimeDelta expiration_threshold) { + expiration_threshold_ = expiration_threshold; + + // Remove all readers, just in case this was method was called before. + readers_.clear(); + // For now, we explicitly add all known readers. If we come up with more + // reader types (in case we want to expire different types of visits in + // different ways), we can make it be populated by creator/owner of + // ExpireHistoryBackend. + readers_.push_back(GetAllVisitsReader()); + readers_.push_back(GetAutoSubframeVisitsReader()); + + // Initialize the queue with all tasks for the first set of iterations. + InitWorkQueue(); + ScheduleArchive(); + ScheduleExpireHistoryIndexFiles(); +} + +void ExpireHistoryBackend::DeleteFaviconsIfPossible( + const std::set<FavIconID>& favicon_set) { + if (!main_db_ || !thumb_db_) + return; + + for (std::set<FavIconID>::const_iterator i = favicon_set.begin(); + i != favicon_set.end(); ++i) { + if (!main_db_->IsFavIconUsed(*i)) + thumb_db_->DeleteFavIcon(*i); + } +} + +void ExpireHistoryBackend::BroadcastDeleteNotifications( + DeleteDependencies* dependencies) { + if (!dependencies->deleted_urls.empty()) { + // Broadcast the URL deleted notification. Note that we also broadcast when + // we were requested to delete everything even if that was a NOP, since + // some components care to know when history is deleted (it's up to them to + // determine if they care whether anything was deleted). + URLsDeletedDetails* deleted_details = new URLsDeletedDetails; + deleted_details->all_history = false; + std::vector<URLRow> typed_urls_changed; // Collect this for later. + for (size_t i = 0; i < dependencies->deleted_urls.size(); i++) { + deleted_details->urls.insert(dependencies->deleted_urls[i].url()); + if (dependencies->deleted_urls[i].typed_count() > 0) + typed_urls_changed.push_back(dependencies->deleted_urls[i]); + } + delegate_->BroadcastNotifications(NotificationType::HISTORY_URLS_DELETED, + deleted_details); + + // Broadcast the typed URL changed modification (this updates the inline + // autocomplete database). + // + // Note: if we ever need to broadcast changes to more than just typed URLs, + // this notification should be changed rather than a new "non-typed" + // notification added. The in-memory database can always do the filtering + // itself in that case. + if (!typed_urls_changed.empty()) { + URLsModifiedDetails* modified_details = new URLsModifiedDetails; + modified_details->changed_urls.swap(typed_urls_changed); + delegate_->BroadcastNotifications( + NotificationType::HISTORY_TYPED_URLS_MODIFIED, + modified_details); + } + } +} + +void ExpireHistoryBackend::DeleteVisitRelatedInfo( + const VisitVector& visits, + DeleteDependencies* dependencies) { + for (size_t i = 0; i < visits.size(); i++) { + // Delete the visit itself. + main_db_->DeleteVisit(visits[i]); + + // Add the URL row to the affected URL list. + std::map<URLID, URLRow>::const_iterator found = + dependencies->affected_urls.find(visits[i].url_id); + const URLRow* cur_row = NULL; + if (found == dependencies->affected_urls.end()) { + URLRow row; + if (!main_db_->GetURLRow(visits[i].url_id, &row)) + continue; + dependencies->affected_urls[visits[i].url_id] = row; + cur_row = &dependencies->affected_urls[visits[i].url_id]; + } else { + cur_row = &found->second; + } + + // Delete any associated full-text indexed data. + if (visits[i].is_indexed && text_db_) { + text_db_->DeletePageData(visits[i].visit_time, cur_row->url(), + &dependencies->text_db_changes); + } + } +} + +void ExpireHistoryBackend::DeleteOneURL( + const URLRow& url_row, + bool is_bookmarked, + DeleteDependencies* dependencies) { + main_db_->DeleteSegmentForURL(url_row.id()); + + // The URL may be in the text database manager's temporary cache. + if (text_db_) { + std::set<GURL> restrict_urls; + restrict_urls.insert(url_row.url()); + text_db_->DeleteFromUncommitted(restrict_urls, base::Time(), base::Time()); + } + + if (!is_bookmarked) { + dependencies->deleted_urls.push_back(url_row); + + // Delete stuff that references this URL. + if (thumb_db_) + thumb_db_->DeleteThumbnail(url_row.id()); + + // Collect shared information. + if (url_row.favicon_id()) + dependencies->affected_favicons.insert(url_row.favicon_id()); + + // Last, delete the URL entry. + main_db_->DeleteURLRow(url_row.id()); + } +} + +URLID ExpireHistoryBackend::ArchiveOneURL(const URLRow& url_row) { + if (!archived_db_) + return 0; + + // See if this URL is present in the archived database already. Note that + // we must look up by ID since the URL ID will be different. + URLRow archived_row; + if (archived_db_->GetRowForURL(url_row.url(), &archived_row)) { + // TODO(sky): bug 1168470, need to archive past search terms. + // FIXME(brettw) should be copy the visit counts over? This will mean that + // the main DB's visit counts are only for the last 3 months rather than + // accumulative. + archived_row.set_last_visit(url_row.last_visit()); + archived_db_->UpdateURLRow(archived_row.id(), archived_row); + return archived_row.id(); + } + + // This row is not in the archived DB, add it. + return archived_db_->AddURL(url_row); +} + +namespace { + +struct ChangedURL { + ChangedURL() : visit_count(0), typed_count(0) {} + int visit_count; + int typed_count; +}; + +} // namespace + +void ExpireHistoryBackend::ExpireURLsForVisits( + const VisitVector& visits, + DeleteDependencies* dependencies) { + // First find all unique URLs and the number of visits we're deleting for + // each one. + std::map<URLID, ChangedURL> changed_urls; + for (size_t i = 0; i < visits.size(); i++) { + ChangedURL& cur = changed_urls[visits[i].url_id]; + cur.visit_count++; + // NOTE: This code must stay in sync with HistoryBackend::AddPageVisit(). + // TODO(pkasting): http://b/1148304 We shouldn't be marking so many URLs as + // typed, which would eliminate the need for this code. + PageTransition::Type transition = + PageTransition::StripQualifier(visits[i].transition); + if ((transition == PageTransition::TYPED && + !PageTransition::IsRedirect(visits[i].transition)) || + transition == PageTransition::KEYWORD_GENERATED) + cur.typed_count++; + } + + // Check each unique URL with deleted visits. + BookmarkService* bookmark_service = GetBookmarkService(); + for (std::map<URLID, ChangedURL>::const_iterator i = changed_urls.begin(); + i != changed_urls.end(); ++i) { + // The unique URL rows should already be filled into the dependencies. + URLRow& url_row = dependencies->affected_urls[i->first]; + if (!url_row.id()) + continue; // URL row doesn't exist in the database. + + // Check if there are any other visits for this URL and update the time + // (the time change may not actually be synced to disk below when we're + // archiving). + VisitRow last_visit; + if (main_db_->GetMostRecentVisitForURL(url_row.id(), &last_visit)) + url_row.set_last_visit(last_visit.visit_time); + else + url_row.set_last_visit(Time()); + + // Don't delete URLs with visits still in the DB, or bookmarked. + bool is_bookmarked = + (bookmark_service && bookmark_service->IsBookmarked(url_row.url())); + if (!is_bookmarked && url_row.last_visit().is_null()) { + // Not bookmarked and no more visits. Nuke the url. + DeleteOneURL(url_row, is_bookmarked, dependencies); + } else { + // NOTE: The calls to std::max() below are a backstop, but they should + // never actually be needed unless the database is corrupt (I think). + url_row.set_visit_count( + std::max(0, url_row.visit_count() - i->second.visit_count)); + url_row.set_typed_count( + std::max(0, url_row.typed_count() - i->second.typed_count)); + + // Update the db with the new details. + main_db_->UpdateURLRow(url_row.id(), url_row); + } + } +} + +void ExpireHistoryBackend::ArchiveURLsAndVisits( + const VisitVector& visits, + DeleteDependencies* dependencies) { + if (!archived_db_) + return; + + // Make sure all unique URL rows are added to the dependency list and the + // archived database. We will also keep the mapping between the main DB URLID + // and the archived one. + std::map<URLID, URLID> main_id_to_archived_id; + for (size_t i = 0; i < visits.size(); i++) { + std::map<URLID, URLRow>::const_iterator found = + dependencies->affected_urls.find(visits[i].url_id); + if (found == dependencies->affected_urls.end()) { + // Unique URL encountered, archive it. + URLRow row; // Row in the main DB. + URLID archived_id; // ID in the archived DB. + if (!main_db_->GetURLRow(visits[i].url_id, &row) || + !(archived_id = ArchiveOneURL(row))) { + // Failure archiving, skip this one. + continue; + } + + // Only add URL to the dependency list once we know we successfully + // archived it. + main_id_to_archived_id[row.id()] = archived_id; + dependencies->affected_urls[row.id()] = row; + } + } + + // Now archive the visits since we know the URL ID to make them reference. + // The source visit list should still reference the visits in the main DB, but + // we will update it to reflect only the visits that were successfully + // archived. + for (size_t i = 0; i < visits.size(); i++) { + // Construct the visit that we will add to the archived database. We do + // not store referring visits since we delete many of the visits when + // archiving. + VisitRow cur_visit(visits[i]); + cur_visit.url_id = main_id_to_archived_id[cur_visit.url_id]; + cur_visit.referring_visit = 0; + archived_db_->AddVisit(&cur_visit); + // Ignore failures, we will delete it from the main DB no matter what. + } +} + +void ExpireHistoryBackend::ScheduleArchive() { + TimeDelta delay; + if (work_queue_.empty()) { + // If work queue is empty, reset the work queue to contain all tasks and + // schedule next iteration after a longer delay. + InitWorkQueue(); + delay = TimeDelta::FromMinutes(kExpirationEmptyDelayMin); + } else { + delay = TimeDelta::FromSeconds(kExpirationDelaySec); + } + + MessageLoop::current()->PostDelayedTask(FROM_HERE, factory_.NewRunnableMethod( + &ExpireHistoryBackend::DoArchiveIteration), delay.InMilliseconds()); +} + +void ExpireHistoryBackend::DoArchiveIteration() { + DCHECK(!work_queue_.empty()) << "queue has to be non-empty"; + + const ExpiringVisitsReader* reader = work_queue_.front(); + bool more_to_expire = ArchiveSomeOldHistory(GetCurrentArchiveTime(), reader, + kNumExpirePerIteration); + + work_queue_.pop(); + // If there are more items to expire, add the reader back to the queue, thus + // creating a new task for future iterations. + if (more_to_expire) + work_queue_.push(reader); + + ScheduleArchive(); +} + +bool ExpireHistoryBackend::ArchiveSomeOldHistory( + base::Time end_time, + const ExpiringVisitsReader* reader, + int max_visits) { + if (!main_db_) + return false; + + // Add an extra time unit to given end time, because + // GetAllVisitsInRange, et al. queries' end value is non-inclusive. + Time effective_end_time = + Time::FromInternalValue(end_time.ToInternalValue() + 1); + + VisitVector affected_visits; + bool more_to_expire = reader->Read(effective_end_time, main_db_, + &affected_visits, max_visits); + + // Some visits we'll delete while others we'll archive. + VisitVector deleted_visits, archived_visits; + for (size_t i = 0; i < affected_visits.size(); i++) { + if (ShouldArchiveVisit(affected_visits[i])) + archived_visits.push_back(affected_visits[i]); + else + deleted_visits.push_back(affected_visits[i]); + } + + // Do the actual archiving. + DeleteDependencies archived_dependencies; + ArchiveURLsAndVisits(archived_visits, &archived_dependencies); + DeleteVisitRelatedInfo(archived_visits, &archived_dependencies); + + DeleteDependencies deleted_dependencies; + DeleteVisitRelatedInfo(deleted_visits, &deleted_dependencies); + + // This will remove or archive all the affected URLs. Must do the deleting + // cleanup before archiving so the delete dependencies structure references + // only those URLs that were actually deleted instead of having some visits + // archived and then the rest deleted. + ExpireURLsForVisits(deleted_visits, &deleted_dependencies); + ExpireURLsForVisits(archived_visits, &archived_dependencies); + + // Create a union of all affected favicons (we don't store favicons for + // archived URLs) and delete them. + std::set<FavIconID> affected_favicons( + archived_dependencies.affected_favicons); + for (std::set<FavIconID>::const_iterator i = + deleted_dependencies.affected_favicons.begin(); + i != deleted_dependencies.affected_favicons.end(); ++i) { + affected_favicons.insert(*i); + } + DeleteFaviconsIfPossible(affected_favicons); + + // Send notifications for the stuff that was deleted. These won't normally be + // in history views since they were subframes, but they will be in the visited + // link system, which needs to be updated now. This function is smart enough + // to not do anything if nothing was deleted. + BroadcastDeleteNotifications(&deleted_dependencies); + + return more_to_expire; +} + +void ExpireHistoryBackend::ParanoidExpireHistory() { + // FIXME(brettw): Bug 1067331: write this to clean up any errors. +} + +void ExpireHistoryBackend::ScheduleExpireHistoryIndexFiles() { + if (!text_db_) { + // Can't expire old history index files because we + // don't know where they're located. + return; + } + + TimeDelta delay = TimeDelta::FromMinutes(kIndexExpirationDelayMin); + MessageLoop::current()->PostDelayedTask( + FROM_HERE, factory_.NewRunnableMethod( + &ExpireHistoryBackend::DoExpireHistoryIndexFiles), + delay.InMilliseconds()); +} + +void ExpireHistoryBackend::DoExpireHistoryIndexFiles() { + Time::Exploded exploded; + Time::Now().LocalExplode(&exploded); + int cutoff_month = + exploded.year * 12 + exploded.month - kStoreHistoryIndexesForMonths; + TextDatabase::DBIdent cutoff_id = + (cutoff_month / 12) * 100 + (cutoff_month % 12); + + FilePath::StringType history_index_files_pattern = TextDatabase::file_base(); + history_index_files_pattern.append(FILE_PATH_LITERAL("*")); + file_util::FileEnumerator file_enumerator( + text_db_->GetDir(), false, file_util::FileEnumerator::FILES, + history_index_files_pattern); + for (FilePath file = file_enumerator.Next(); !file.empty(); + file = file_enumerator.Next()) { + TextDatabase::DBIdent file_id = TextDatabase::FileNameToID(file); + if (file_id < cutoff_id) + file_util::Delete(file, false); + } +} + +BookmarkService* ExpireHistoryBackend::GetBookmarkService() { + // We use the bookmark service to determine if a URL is bookmarked. The + // bookmark service is loaded on a separate thread and may not be done by the + // time we get here. We therefor block until the bookmarks have finished + // loading. + if (bookmark_service_) + bookmark_service_->BlockTillLoaded(); + return bookmark_service_; +} + +} // namespace history diff --git a/chrome/browser/history/expire_history_backend.h b/chrome/browser/history/expire_history_backend.h new file mode 100644 index 0000000..9f060ed --- /dev/null +++ b/chrome/browser/history/expire_history_backend.h @@ -0,0 +1,290 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_EXPIRE_HISTORY_BACKEND_H_ +#define CHROME_BROWSER_HISTORY_EXPIRE_HISTORY_BACKEND_H_ + +#include <queue> +#include <set> +#include <vector> + +#include "base/basictypes.h" +#include "base/gtest_prod_util.h" +#include "base/task.h" +#include "base/time.h" +#include "base/scoped_ptr.h" +#include "chrome/browser/history/history_types.h" + +class BookmarkService; +class GURL; +class NotificationType; +class TestingProfile; + +namespace history { + +class ArchivedDatabase; +class HistoryDatabase; +struct HistoryDetails; +class TextDatabaseManager; +class ThumbnailDatabase; + +// Delegate used to broadcast notifications to the main thread. +class BroadcastNotificationDelegate { + public: + // Schedules a broadcast of the given notification on the application main + // thread. The details argument will have ownership taken by this function. + virtual void BroadcastNotifications(NotificationType type, + HistoryDetails* details_deleted) = 0; + + protected: + virtual ~BroadcastNotificationDelegate() {} +}; + +// Encapsulates visit expiration criteria and type of visits to expire. +class ExpiringVisitsReader { + public: + virtual ~ExpiringVisitsReader() {} + // Populates |visits| from |db|, using provided |end_time| and |max_visits| + // cap. + virtual bool Read(base::Time end_time, HistoryDatabase* db, + VisitVector* visits, int max_visits) const = 0; +}; + +typedef std::vector<const ExpiringVisitsReader*> ExpiringVisitsReaders; + +// Helper component to HistoryBackend that manages expiration and deleting of +// history, as well as moving data from the main database to the archived +// database as it gets old. +// +// It will automatically start periodically archiving old history once you call +// StartArchivingOldStuff(). +class ExpireHistoryBackend { + public: + // The delegate pointer must be non-NULL. We will NOT take ownership of it. + // BookmarkService may be NULL. The BookmarkService is used when expiring + // URLs so that we don't remove any URLs or favicons that are bookmarked + // (visits are removed though). + ExpireHistoryBackend(BroadcastNotificationDelegate* delegate, + BookmarkService* bookmark_service); + ~ExpireHistoryBackend(); + + // Completes initialization by setting the databases that this class will use. + void SetDatabases(HistoryDatabase* main_db, + ArchivedDatabase* archived_db, + ThumbnailDatabase* thumb_db, + TextDatabaseManager* text_db); + + // Begins periodic expiration of history older than the given threshold. This + // will continue until the object is deleted. + void StartArchivingOldStuff(base::TimeDelta expiration_threshold); + + // Deletes everything associated with a URL. + void DeleteURL(const GURL& url); + + // Removes all visits to restrict_urls (or all URLs if empty) in the given + // time range, updating the URLs accordingly, + void ExpireHistoryBetween(const std::set<GURL>& restrict_urls, + base::Time begin_time, base::Time end_time); + + // Archives all visits before and including the given time, updating the URLs + // accordingly. This function is intended for migrating old databases + // (which encompased all time) to the tiered structure and testing, and + // probably isn't useful for anything else. + void ArchiveHistoryBefore(base::Time end_time); + + // Returns the current time that we are archiving stuff to. This will return + // the threshold in absolute time rather than a delta, so the caller should + // not save it. + base::Time GetCurrentArchiveTime() const { + return base::Time::Now() - expiration_threshold_; + } + + private: + FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, DeleteTextIndexForURL); + FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, DeleteFaviconsIfPossible); + FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, ArchiveSomeOldHistory); + FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, ExpiringVisitsReader); + friend class ::TestingProfile; + + struct DeleteDependencies; + + // Removes the data from the full text index associated with the given URL + // string/ID pair. If |update_visits| is set, the visits that reference the + // indexed data will be updated to reflect the fact that the indexed data is + // gone. Setting this to false is a performance optimization when the caller + // knows that the visits will be deleted after the call. + // + // TODO(brettw) when we have an "archived" history database, this should take + // a flag to optionally delete from there. This way it can be used for page + // re-indexing as well as for full URL deletion. + void DeleteTextIndexForURL(const GURL& url, URLID url_id, bool update_visits); + + // Deletes the visit-related stuff for all the visits in the given list, and + // adds the rows for unique URLs affected to the affected_urls list in + // the dependencies structure. + // + // Deleted information is the visits themselves and the full-text index + // entries corresponding to them. + void DeleteVisitRelatedInfo(const VisitVector& visits, + DeleteDependencies* dependencies); + + // Moves the given visits from the main database to the archived one. + void ArchiveVisits(const VisitVector& visits); + + // Finds or deletes dependency information for the given URL. Information that + // is specific to this URL (URL row, thumbnails, full text indexed stuff, + // etc.) is deleted. + // + // This does not affect the visits! This is used for expiration as well as + // deleting from the UI, and they handle visits differently. + // + // Other information will be collected and returned in the output containers. + // This includes some of the things deleted that are needed elsewhere, plus + // some things like favicons that could be shared by many URLs, and need to + // be checked for deletion (this allows us to delete many URLs with only one + // check for shared information at the end). + // + // Assumes the main_db_ is non-NULL. + // + // NOTE: If the url is bookmarked only the segments and text db are updated, + // everything else is unchanged. This is done so that bookmarks retain their + // favicons and thumbnails. + void DeleteOneURL(const URLRow& url_row, + bool is_bookmarked, + DeleteDependencies* dependencies); + + // Adds or merges the given URL row with the archived database, returning the + // ID of the URL in the archived database, or 0 on failure. The main (source) + // database will not be affected (the URL will have to be deleted later). + // + // Assumes the archived database is not NULL. + URLID ArchiveOneURL(const URLRow& url_row); + + // Deletes all the URLs in the given vector and handles their dependencies. + // This will delete starred URLs + void DeleteURLs(const std::vector<URLRow>& urls, + DeleteDependencies* dependencies); + + // Expiration involves removing visits, then propogating the visits out from + // there and delete any orphaned URLs. These will be added to the deleted URLs + // field of the dependencies and DeleteOneURL will handle deleting out from + // there. This function does not handle favicons. + // + // When a URL is not deleted and |archive| is not set, the last visit time and + // the visit and typed counts will be updated (we want to clear these when a + // user is deleting history manually, but not when we're normally expiring old + // things from history). + // + // The visits in the given vector should have already been deleted from the + // database, and the list of affected URLs already be filled into + // |depenencies->affected_urls|. + // + // Starred URLs will not be deleted. The information in the dependencies that + // DeleteOneURL fills in will be updated, and this function will also delete + // any now-unused favicons. + void ExpireURLsForVisits(const VisitVector& visits, + DeleteDependencies* dependencies); + + // Creates entries in the archived database for the unique URLs referenced + // by the given visits. It will then add versions of the visits to that + // database. The source database WILL NOT BE MODIFIED. The source URLs and + // visits will have to be deleted in another pass. + // + // The affected URLs will be filled into the given dependencies structure. + void ArchiveURLsAndVisits(const VisitVector& visits, + DeleteDependencies* dependencies); + + // Deletes the favicons listed in the set if unused. Fails silently (we don't + // care about favicons so much, so don't want to stop everything if it fails). + void DeleteFaviconsIfPossible(const std::set<FavIconID>& favicon_id); + + // Broadcast the URL deleted notification. + void BroadcastDeleteNotifications(DeleteDependencies* dependencies); + + // Schedules a call to DoArchiveIteration. + void ScheduleArchive(); + + // Calls ArchiveSomeOldHistory to expire some amount of old history, according + // to the items in work queue, and schedules another call to happen in the + // future. + void DoArchiveIteration(); + + // Tries to expire the oldest |max_visits| visits from history that are older + // than |time_threshold|. The return value indicates if we think there might + // be more history to expire with the current time threshold (it does not + // indicate success or failure). + bool ArchiveSomeOldHistory(base::Time end_time, + const ExpiringVisitsReader* reader, + int max_visits); + + // Tries to detect possible bad history or inconsistencies in the database + // and deletes items. For example, URLs with no visits. + void ParanoidExpireHistory(); + + // Schedules a call to DoExpireHistoryIndexFiles. + void ScheduleExpireHistoryIndexFiles(); + + // Deletes old history index files. + void DoExpireHistoryIndexFiles(); + + // Returns the BookmarkService, blocking until it is loaded. This may return + // NULL. + BookmarkService* GetBookmarkService(); + + // Initializes periodic expiration work queue by populating it with with tasks + // for all known readers. + void InitWorkQueue(); + + // Returns the reader for all visits. This method is only used by the unit + // tests. + const ExpiringVisitsReader* GetAllVisitsReader(); + + // Returns the reader for AUTO_SUBFRAME visits. This method is only used by + // the unit tests. + const ExpiringVisitsReader* GetAutoSubframeVisitsReader(); + + // Non-owning pointer to the notification delegate (guaranteed non-NULL). + BroadcastNotificationDelegate* delegate_; + + // Non-owning pointers to the databases we deal with (MAY BE NULL). + HistoryDatabase* main_db_; // Main history database. + ArchivedDatabase* archived_db_; // Old history. + ThumbnailDatabase* thumb_db_; // Thumbnails and favicons. + TextDatabaseManager* text_db_; // Full text index. + + // Used to generate runnable methods to do timers on this class. They will be + // automatically canceled when this class is deleted. + ScopedRunnableMethodFactory<ExpireHistoryBackend> factory_; + + // The threshold for "old" history where we will automatically expire it to + // the archived database. + base::TimeDelta expiration_threshold_; + + // List of all distinct types of readers. This list is used to populate the + // work queue. + ExpiringVisitsReaders readers_; + + // Work queue for periodic expiration tasks, used by DoArchiveIteration() to + // determine what to do at an iteration, as well as populate it for future + // iterations. + std::queue<const ExpiringVisitsReader*> work_queue_; + + // Readers for various types of visits. + // TODO(dglazkov): If you are adding another one, please consider reorganizing + // into a map. + scoped_ptr<ExpiringVisitsReader> all_visits_reader_; + scoped_ptr<ExpiringVisitsReader> auto_subframe_visits_reader_; + + // The BookmarkService; may be null. This is owned by the Profile. + // + // Use GetBookmarkService to access this, which makes sure the service is + // loaded. + BookmarkService* bookmark_service_; + + DISALLOW_COPY_AND_ASSIGN(ExpireHistoryBackend); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_EXPIRE_HISTORY_BACKEND_H_ diff --git a/chrome/browser/history/expire_history_backend_unittest.cc b/chrome/browser/history/expire_history_backend_unittest.cc new file mode 100644 index 0000000..ca822bc --- /dev/null +++ b/chrome/browser/history/expire_history_backend_unittest.cc @@ -0,0 +1,812 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/basictypes.h" +#include "base/compiler_specific.h" +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/scoped_ptr.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/bookmarks/bookmark_model.h" +#include "chrome/browser/history/archived_database.h" +#include "chrome/browser/history/expire_history_backend.h" +#include "chrome/browser/history/history_database.h" +#include "chrome/browser/history/history_notifications.h" +#include "chrome/browser/history/text_database_manager.h" +#include "chrome/browser/history/thumbnail_database.h" +#include "chrome/common/notification_service.h" +#include "chrome/common/thumbnail_score.h" +#include "chrome/tools/profiles/thumbnail-inl.h" +#include "gfx/codec/jpeg_codec.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/skia/include/core/SkBitmap.h" + +using base::Time; +using base::TimeDelta; +using base::TimeTicks; + +// Filename constants. +static const FilePath::CharType kTestDir[] = FILE_PATH_LITERAL("ExpireTest"); +static const FilePath::CharType kHistoryFile[] = FILE_PATH_LITERAL("History"); +static const FilePath::CharType kArchivedHistoryFile[] = + FILE_PATH_LITERAL("Archived History"); +static const FilePath::CharType kThumbnailFile[] = + FILE_PATH_LITERAL("Thumbnails"); + +// The test must be in the history namespace for the gtest forward declarations +// to work. It also eliminates a bunch of ugly "history::". +namespace history { + +// ExpireHistoryTest ----------------------------------------------------------- + +class ExpireHistoryTest : public testing::Test, + public BroadcastNotificationDelegate { + public: + ExpireHistoryTest() + : bookmark_model_(NULL), + ALLOW_THIS_IN_INITIALIZER_LIST(expirer_(this, &bookmark_model_)), + now_(Time::Now()) { + } + + protected: + // Called by individual tests when they want data populated. + void AddExampleData(URLID url_ids[3], Time visit_times[4]); + + // Returns true if the given favicon/thumanil has an entry in the DB. + bool HasFavIcon(FavIconID favicon_id); + bool HasThumbnail(URLID url_id); + + // Returns the number of text matches for the given URL in the example data + // added by AddExampleData. + int CountTextMatchesForURL(const GURL& url); + + // EXPECTs that each URL-specific history thing (basically, everything but + // favicons) is gone. + void EnsureURLInfoGone(const URLRow& row); + + // Clears the list of notifications received. + void ClearLastNotifications() { + for (size_t i = 0; i < notifications_.size(); i++) + delete notifications_[i].second; + notifications_.clear(); + } + + void StarURL(const GURL& url) { + bookmark_model_.AddURL( + bookmark_model_.GetBookmarkBarNode(), 0, std::wstring(), url); + } + + static bool IsStringInFile(const FilePath& filename, const char* str); + + BookmarkModel bookmark_model_; + + MessageLoop message_loop_; + + ExpireHistoryBackend expirer_; + + scoped_ptr<HistoryDatabase> main_db_; + scoped_ptr<ArchivedDatabase> archived_db_; + scoped_ptr<ThumbnailDatabase> thumb_db_; + scoped_ptr<TextDatabaseManager> text_db_; + + // Time at the beginning of the test, so everybody agrees what "now" is. + const Time now_; + + // Notifications intended to be broadcast, we can check these values to make + // sure that the deletor is doing the correct broadcasts. We own the details + // pointers. + typedef std::vector< std::pair<NotificationType, HistoryDetails*> > + NotificationList; + NotificationList notifications_; + + // Directory for the history files. + FilePath dir_; + + private: + void SetUp() { + FilePath temp_dir; + PathService::Get(base::DIR_TEMP, &temp_dir); + dir_ = temp_dir.Append(kTestDir); + file_util::Delete(dir_, true); + file_util::CreateDirectory(dir_); + + FilePath history_name = dir_.Append(kHistoryFile); + main_db_.reset(new HistoryDatabase); + if (main_db_->Init(history_name, FilePath()) != sql::INIT_OK) + main_db_.reset(); + + FilePath archived_name = dir_.Append(kArchivedHistoryFile); + archived_db_.reset(new ArchivedDatabase); + if (!archived_db_->Init(archived_name)) + archived_db_.reset(); + + FilePath thumb_name = dir_.Append(kThumbnailFile); + thumb_db_.reset(new ThumbnailDatabase); + if (thumb_db_->Init(thumb_name, NULL) != sql::INIT_OK) + thumb_db_.reset(); + + text_db_.reset(new TextDatabaseManager(dir_, + main_db_.get(), main_db_.get())); + if (!text_db_->Init(NULL)) + text_db_.reset(); + + expirer_.SetDatabases(main_db_.get(), archived_db_.get(), thumb_db_.get(), + text_db_.get()); + } + + void TearDown() { + ClearLastNotifications(); + + expirer_.SetDatabases(NULL, NULL, NULL, NULL); + + main_db_.reset(); + archived_db_.reset(); + thumb_db_.reset(); + text_db_.reset(); + file_util::Delete(dir_, true); + } + + // BroadcastNotificationDelegate implementation. + void BroadcastNotifications(NotificationType type, + HistoryDetails* details_deleted) { + // This gets called when there are notifications to broadcast. Instead, we + // store them so we can tell that the correct notifications were sent. + notifications_.push_back(std::make_pair(type, details_deleted)); + } +}; + +// The example data consists of 4 visits. The middle two visits are to the +// same URL, while the first and last are for unique ones. This allows a test +// for the oldest or newest to include both a URL that should get totally +// deleted (the one on the end) with one that should only get a visit deleted +// (with the one in the middle) when it picks the proper threshold time. +// +// Each visit has indexed data, each URL has thumbnail. The first two URLs will +// share the same favicon, while the last one will have a unique favicon. The +// second visit for the middle URL is typed. +// +// The IDs of the added URLs, and the times of the four added visits will be +// added to the given arrays. +void ExpireHistoryTest::AddExampleData(URLID url_ids[3], Time visit_times[4]) { + if (!main_db_.get() || !text_db_.get()) + return; + + // Four times for each visit. + visit_times[3] = Time::Now(); + visit_times[2] = visit_times[3] - TimeDelta::FromDays(1); + visit_times[1] = visit_times[3] - TimeDelta::FromDays(2); + visit_times[0] = visit_times[3] - TimeDelta::FromDays(3); + + // Two favicons. The first two URLs will share the same one, while the last + // one will have a unique favicon. + FavIconID favicon1 = thumb_db_->AddFavIcon(GURL("http://favicon/url1")); + FavIconID favicon2 = thumb_db_->AddFavIcon(GURL("http://favicon/url2")); + + // Three URLs. + URLRow url_row1(GURL("http://www.google.com/1")); + url_row1.set_last_visit(visit_times[0]); + url_row1.set_favicon_id(favicon1); + url_row1.set_visit_count(1); + url_ids[0] = main_db_->AddURL(url_row1); + + URLRow url_row2(GURL("http://www.google.com/2")); + url_row2.set_last_visit(visit_times[2]); + url_row2.set_favicon_id(favicon1); + url_row2.set_visit_count(2); + url_row2.set_typed_count(1); + url_ids[1] = main_db_->AddURL(url_row2); + + URLRow url_row3(GURL("http://www.google.com/3")); + url_row3.set_last_visit(visit_times[3]); + url_row3.set_favicon_id(favicon2); + url_row3.set_visit_count(1); + url_ids[2] = main_db_->AddURL(url_row3); + + // Thumbnails for each URL. + scoped_ptr<SkBitmap> thumbnail( + gfx::JPEGCodec::Decode(kGoogleThumbnail, sizeof(kGoogleThumbnail))); + ThumbnailScore score(0.25, true, true, Time::Now()); + + Time time; + GURL gurl; + thumb_db_->SetPageThumbnail(gurl, url_ids[0], *thumbnail, score, time); + thumb_db_->SetPageThumbnail(gurl, url_ids[1], *thumbnail, score, time); + thumb_db_->SetPageThumbnail(gurl, url_ids[2], *thumbnail, score, time); + + // Four visits. + VisitRow visit_row1; + visit_row1.url_id = url_ids[0]; + visit_row1.visit_time = visit_times[0]; + visit_row1.is_indexed = true; + main_db_->AddVisit(&visit_row1); + + VisitRow visit_row2; + visit_row2.url_id = url_ids[1]; + visit_row2.visit_time = visit_times[1]; + visit_row2.is_indexed = true; + main_db_->AddVisit(&visit_row2); + + VisitRow visit_row3; + visit_row3.url_id = url_ids[1]; + visit_row3.visit_time = visit_times[2]; + visit_row3.is_indexed = true; + visit_row3.transition = PageTransition::TYPED; + main_db_->AddVisit(&visit_row3); + + VisitRow visit_row4; + visit_row4.url_id = url_ids[2]; + visit_row4.visit_time = visit_times[3]; + visit_row4.is_indexed = true; + main_db_->AddVisit(&visit_row4); + + // Full text index for each visit. + text_db_->AddPageData(url_row1.url(), visit_row1.url_id, visit_row1.visit_id, + visit_row1.visit_time, UTF8ToUTF16("title"), + UTF8ToUTF16("body")); + + text_db_->AddPageData(url_row2.url(), visit_row2.url_id, visit_row2.visit_id, + visit_row2.visit_time, UTF8ToUTF16("title"), + UTF8ToUTF16("body")); + text_db_->AddPageData(url_row2.url(), visit_row3.url_id, visit_row3.visit_id, + visit_row3.visit_time, UTF8ToUTF16("title"), + UTF8ToUTF16("body")); + + // Note the special text in this URL. We'll search the file for this string + // to make sure it doesn't hang around after the delete. + text_db_->AddPageData(url_row3.url(), visit_row4.url_id, visit_row4.visit_id, + visit_row4.visit_time, UTF8ToUTF16("title"), + UTF8ToUTF16("goats body")); +} + +bool ExpireHistoryTest::HasFavIcon(FavIconID favicon_id) { + if (!thumb_db_.get()) + return false; + Time last_updated; + std::vector<unsigned char> icon_data_unused; + GURL icon_url; + return thumb_db_->GetFavIcon(favicon_id, &last_updated, &icon_data_unused, + &icon_url); +} + +bool ExpireHistoryTest::HasThumbnail(URLID url_id) { + std::vector<unsigned char> temp_data; + return thumb_db_->GetPageThumbnail(url_id, &temp_data); +} + +int ExpireHistoryTest::CountTextMatchesForURL(const GURL& url) { + if (!text_db_.get()) + return 0; + + // "body" should match all pages in the example data. + std::vector<TextDatabase::Match> results; + QueryOptions options; + Time first_time; + text_db_->GetTextMatches(UTF8ToUTF16("body"), options, + &results, &first_time); + + int count = 0; + for (size_t i = 0; i < results.size(); i++) { + if (results[i].url == url) + count++; + } + return count; +} + +void ExpireHistoryTest::EnsureURLInfoGone(const URLRow& row) { + // Verify the URL no longer exists. + URLRow temp_row; + EXPECT_FALSE(main_db_->GetURLRow(row.id(), &temp_row)); + + // The indexed data should be gone. + EXPECT_EQ(0, CountTextMatchesForURL(row.url())); + + // There should be no visits. + VisitVector visits; + main_db_->GetVisitsForURL(row.id(), &visits); + EXPECT_EQ(0U, visits.size()); + + // Thumbnail should be gone. + EXPECT_FALSE(HasThumbnail(row.id())); + + // Check the notifications. There should be a delete notification with this + // URL in it. There should also be a "typed URL changed" notification if the + // row is marked typed. + bool found_delete_notification = false; + bool found_typed_changed_notification = false; + for (size_t i = 0; i < notifications_.size(); i++) { + if (notifications_[i].first == NotificationType::HISTORY_URLS_DELETED) { + const URLsDeletedDetails* deleted_details = + reinterpret_cast<URLsDeletedDetails*>(notifications_[i].second); + if (deleted_details->urls.find(row.url()) != + deleted_details->urls.end()) { + found_delete_notification = true; + } + } else if (notifications_[i].first == + NotificationType::HISTORY_TYPED_URLS_MODIFIED) { + // See if we got a typed URL changed notification. + const URLsModifiedDetails* modified_details = + reinterpret_cast<URLsModifiedDetails*>(notifications_[i].second); + for (size_t cur_url = 0; cur_url < modified_details->changed_urls.size(); + cur_url++) { + if (modified_details->changed_urls[cur_url].url() == row.url()) + found_typed_changed_notification = true; + } + } else if (notifications_[i].first == + NotificationType::HISTORY_URL_VISITED) { + // See if we got a visited URL notification. + const URLVisitedDetails* visited_details = + reinterpret_cast<URLVisitedDetails*>(notifications_[i].second); + if (visited_details->row.url() == row.url()) + found_typed_changed_notification = true; + } + } + EXPECT_TRUE(found_delete_notification); + EXPECT_EQ(row.typed_count() > 0, found_typed_changed_notification); +} + +TEST_F(ExpireHistoryTest, DeleteFaviconsIfPossible) { + // Add a favicon record. + const GURL favicon_url("http://www.google.com/favicon.ico"); + FavIconID icon_id = thumb_db_->AddFavIcon(favicon_url); + EXPECT_TRUE(icon_id); + EXPECT_TRUE(HasFavIcon(icon_id)); + + // The favicon should be deletable with no users. + std::set<FavIconID> favicon_set; + favicon_set.insert(icon_id); + expirer_.DeleteFaviconsIfPossible(favicon_set); + EXPECT_FALSE(HasFavIcon(icon_id)); + + // Add back the favicon. + icon_id = thumb_db_->AddFavIcon(favicon_url); + EXPECT_TRUE(icon_id); + EXPECT_TRUE(HasFavIcon(icon_id)); + + // Add a page that references the favicon. + URLRow row(GURL("http://www.google.com/2")); + row.set_visit_count(1); + row.set_favicon_id(icon_id); + EXPECT_TRUE(main_db_->AddURL(row)); + + // Favicon should not be deletable. + favicon_set.clear(); + favicon_set.insert(icon_id); + expirer_.DeleteFaviconsIfPossible(favicon_set); + EXPECT_TRUE(HasFavIcon(icon_id)); +} + +// static +bool ExpireHistoryTest::IsStringInFile(const FilePath& filename, + const char* str) { + std::string contents; + EXPECT_TRUE(file_util::ReadFileToString(filename, &contents)); + return contents.find(str) != std::string::npos; +} + +// Deletes a URL with a favicon that it is the last referencer of, so that it +// should also get deleted. +// Fails near end of month. http://crbug.com/43586 +TEST_F(ExpireHistoryTest, FLAKY_DeleteURLAndFavicon) { + URLID url_ids[3]; + Time visit_times[4]; + AddExampleData(url_ids, visit_times); + + // Verify things are the way we expect with a URL row, favicon, thumbnail. + URLRow last_row; + ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &last_row)); + EXPECT_TRUE(HasFavIcon(last_row.favicon_id())); + EXPECT_TRUE(HasThumbnail(url_ids[2])); + + VisitVector visits; + main_db_->GetVisitsForURL(url_ids[2], &visits); + ASSERT_EQ(1U, visits.size()); + EXPECT_EQ(1, CountTextMatchesForURL(last_row.url())); + + // In this test we also make sure that any pending entries in the text + // database manager are removed. + text_db_->AddPageURL(last_row.url(), last_row.id(), visits[0].visit_id, + visits[0].visit_time); + + // Compute the text DB filename. + FilePath fts_filename = dir_.Append( + TextDatabase::IDToFileName(text_db_->TimeToID(visit_times[3]))); + + // When checking the file, the database must be closed. We then re-initialize + // it just like the test set-up did. + text_db_.reset(); + EXPECT_TRUE(IsStringInFile(fts_filename, "goats")); + text_db_.reset(new TextDatabaseManager(dir_, + main_db_.get(), main_db_.get())); + ASSERT_TRUE(text_db_->Init(NULL)); + expirer_.SetDatabases(main_db_.get(), archived_db_.get(), thumb_db_.get(), + text_db_.get()); + + // Delete the URL and its dependencies. + expirer_.DeleteURL(last_row.url()); + + // The string should be removed from the file. FTS can mark it as gone but + // doesn't remove it from the file, we want to be sure we're doing the latter. + text_db_.reset(); + EXPECT_FALSE(IsStringInFile(fts_filename, "goats")); + text_db_.reset(new TextDatabaseManager(dir_, + main_db_.get(), main_db_.get())); + ASSERT_TRUE(text_db_->Init(NULL)); + expirer_.SetDatabases(main_db_.get(), archived_db_.get(), thumb_db_.get(), + text_db_.get()); + + // Run the text database expirer. This will flush any pending entries so we + // can check that nothing was committed. We use a time far in the future so + // that anything added recently will get flushed. + TimeTicks expiration_time = TimeTicks::Now() + TimeDelta::FromDays(1); + text_db_->FlushOldChangesForTime(expiration_time); + + // All the normal data + the favicon should be gone. + EnsureURLInfoGone(last_row); + EXPECT_FALSE(HasFavIcon(last_row.favicon_id())); +} + +// Deletes a URL with a favicon that other URLs reference, so that the favicon +// should not get deleted. This also tests deleting more than one visit. +TEST_F(ExpireHistoryTest, DeleteURLWithoutFavicon) { + URLID url_ids[3]; + Time visit_times[4]; + AddExampleData(url_ids, visit_times); + + // Verify things are the way we expect with a URL row, favicon, thumbnail. + URLRow last_row; + ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &last_row)); + EXPECT_TRUE(HasFavIcon(last_row.favicon_id())); + EXPECT_TRUE(HasThumbnail(url_ids[1])); + + VisitVector visits; + main_db_->GetVisitsForURL(url_ids[1], &visits); + EXPECT_EQ(2U, visits.size()); + EXPECT_EQ(1, CountTextMatchesForURL(last_row.url())); + + // Delete the URL and its dependencies. + expirer_.DeleteURL(last_row.url()); + + // All the normal data + the favicon should be gone. + EnsureURLInfoGone(last_row); + EXPECT_TRUE(HasFavIcon(last_row.favicon_id())); +} + +// DeleteURL should not delete starred urls. +TEST_F(ExpireHistoryTest, DontDeleteStarredURL) { + URLID url_ids[3]; + Time visit_times[4]; + AddExampleData(url_ids, visit_times); + + URLRow url_row; + ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row)); + + // Star the last URL. + StarURL(url_row.url()); + + // Attempt to delete the url. + expirer_.DeleteURL(url_row.url()); + + // Because the url is starred, it shouldn't be deleted. + GURL url = url_row.url(); + ASSERT_TRUE(main_db_->GetRowForURL(url, &url_row)); + + // And the favicon should exist. + EXPECT_TRUE(HasFavIcon(url_row.favicon_id())); + + // But there should be no fts. + ASSERT_EQ(0, CountTextMatchesForURL(url_row.url())); + + // And no visits. + VisitVector visits; + main_db_->GetVisitsForURL(url_row.id(), &visits); + ASSERT_EQ(0U, visits.size()); + + // Should still have the thumbnail. + ASSERT_TRUE(HasThumbnail(url_row.id())); + + // Unstar the URL and delete again. + bookmark_model_.SetURLStarred(url, std::wstring(), false); + expirer_.DeleteURL(url); + + // Now it should be completely deleted. + EnsureURLInfoGone(url_row); +} + +// Expires all URLs more recent than a given time, with no starred items. +// Our time threshold is such that one URL should be updated (we delete one of +// the two visits) and one is deleted. +TEST_F(ExpireHistoryTest, FlushRecentURLsUnstarred) { + URLID url_ids[3]; + Time visit_times[4]; + AddExampleData(url_ids, visit_times); + + URLRow url_row1, url_row2; + ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1)); + ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row2)); + + // In this test we also make sure that any pending entries in the text + // database manager are removed. + VisitVector visits; + main_db_->GetVisitsForURL(url_ids[2], &visits); + ASSERT_EQ(1U, visits.size()); + text_db_->AddPageURL(url_row2.url(), url_row2.id(), visits[0].visit_id, + visits[0].visit_time); + + // This should delete the last two visits. + std::set<GURL> restrict_urls; + expirer_.ExpireHistoryBetween(restrict_urls, visit_times[2], Time()); + + // Run the text database expirer. This will flush any pending entries so we + // can check that nothing was committed. We use a time far in the future so + // that anything added recently will get flushed. + TimeTicks expiration_time = TimeTicks::Now() + TimeDelta::FromDays(1); + text_db_->FlushOldChangesForTime(expiration_time); + + // Verify that the middle URL had its last visit deleted only. + visits.clear(); + main_db_->GetVisitsForURL(url_ids[1], &visits); + EXPECT_EQ(1U, visits.size()); + EXPECT_EQ(0, CountTextMatchesForURL(url_row1.url())); + + // Verify that the middle URL visit time and visit counts were updated. + URLRow temp_row; + ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &temp_row)); + EXPECT_TRUE(visit_times[2] == url_row1.last_visit()); // Previous value. + EXPECT_TRUE(visit_times[1] == temp_row.last_visit()); // New value. + EXPECT_EQ(2, url_row1.visit_count()); + EXPECT_EQ(1, temp_row.visit_count()); + EXPECT_EQ(1, url_row1.typed_count()); + EXPECT_EQ(0, temp_row.typed_count()); + + // Verify that the middle URL's favicon and thumbnail is still there. + EXPECT_TRUE(HasFavIcon(url_row1.favicon_id())); + EXPECT_TRUE(HasThumbnail(url_row1.id())); + + // Verify that the last URL was deleted. + EnsureURLInfoGone(url_row2); + EXPECT_FALSE(HasFavIcon(url_row2.favicon_id())); +} + +// Expires only a specific URLs more recent than a given time, with no starred +// items. Our time threshold is such that the URL should be updated (we delete +// one of the two visits). +TEST_F(ExpireHistoryTest, FlushRecentURLsUnstarredRestricted) { + URLID url_ids[3]; + Time visit_times[4]; + AddExampleData(url_ids, visit_times); + + URLRow url_row1, url_row2; + ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1)); + ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row2)); + + // In this test we also make sure that any pending entries in the text + // database manager are removed. + VisitVector visits; + main_db_->GetVisitsForURL(url_ids[2], &visits); + ASSERT_EQ(1U, visits.size()); + text_db_->AddPageURL(url_row2.url(), url_row2.id(), visits[0].visit_id, + visits[0].visit_time); + + // This should delete the last two visits. + std::set<GURL> restrict_urls; + restrict_urls.insert(url_row1.url()); + expirer_.ExpireHistoryBetween(restrict_urls, visit_times[2], Time()); + + // Run the text database expirer. This will flush any pending entries so we + // can check that nothing was committed. We use a time far in the future so + // that anything added recently will get flushed. + TimeTicks expiration_time = TimeTicks::Now() + TimeDelta::FromDays(1); + text_db_->FlushOldChangesForTime(expiration_time); + + // Verify that the middle URL had its last visit deleted only. + visits.clear(); + main_db_->GetVisitsForURL(url_ids[1], &visits); + EXPECT_EQ(1U, visits.size()); + EXPECT_EQ(0, CountTextMatchesForURL(url_row1.url())); + + // Verify that the middle URL visit time and visit counts were updated. + URLRow temp_row; + ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &temp_row)); + EXPECT_TRUE(visit_times[2] == url_row1.last_visit()); // Previous value. + EXPECT_TRUE(visit_times[1] == temp_row.last_visit()); // New value. + EXPECT_EQ(2, url_row1.visit_count()); + EXPECT_EQ(1, temp_row.visit_count()); + EXPECT_EQ(1, url_row1.typed_count()); + EXPECT_EQ(0, temp_row.typed_count()); + + // Verify that the middle URL's favicon and thumbnail is still there. + EXPECT_TRUE(HasFavIcon(url_row1.favicon_id())); + EXPECT_TRUE(HasThumbnail(url_row1.id())); + + // Verify that the last URL was not touched. + EXPECT_TRUE(main_db_->GetURLRow(url_ids[2], &temp_row)); + EXPECT_TRUE(HasFavIcon(url_row2.favicon_id())); + EXPECT_TRUE(HasThumbnail(url_row2.id())); +} + +// Expire a starred URL, it shouldn't get deleted +TEST_F(ExpireHistoryTest, FlushRecentURLsStarred) { + URLID url_ids[3]; + Time visit_times[4]; + AddExampleData(url_ids, visit_times); + + URLRow url_row1, url_row2; + ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1)); + ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row2)); + + // Star the last two URLs. + StarURL(url_row1.url()); + StarURL(url_row2.url()); + + // This should delete the last two visits. + std::set<GURL> restrict_urls; + expirer_.ExpireHistoryBetween(restrict_urls, visit_times[2], Time()); + + // The URL rows should still exist. + URLRow new_url_row1, new_url_row2; + ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &new_url_row1)); + ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &new_url_row2)); + + // The visit times should be updated. + EXPECT_TRUE(new_url_row1.last_visit() == visit_times[1]); + EXPECT_TRUE(new_url_row2.last_visit().is_null()); // No last visit time. + + // Visit/typed count should not be updated for bookmarks. + EXPECT_EQ(0, new_url_row1.typed_count()); + EXPECT_EQ(1, new_url_row1.visit_count()); + EXPECT_EQ(0, new_url_row2.typed_count()); + EXPECT_EQ(0, new_url_row2.visit_count()); + + // Thumbnails and favicons should still exist. Note that we keep thumbnails + // that may have been updated since the time threshold. Since the URL still + // exists in history, this should not be a privacy problem, we only update + // the visit counts in this case for consistency anyway. + EXPECT_TRUE(HasFavIcon(new_url_row1.favicon_id())); + EXPECT_TRUE(HasThumbnail(new_url_row1.id())); + EXPECT_TRUE(HasFavIcon(new_url_row2.favicon_id())); + EXPECT_TRUE(HasThumbnail(new_url_row2.id())); +} + +TEST_F(ExpireHistoryTest, ArchiveHistoryBeforeUnstarred) { + URLID url_ids[3]; + Time visit_times[4]; + AddExampleData(url_ids, visit_times); + + URLRow url_row1, url_row2; + ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1)); + ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row2)); + + // Archive the oldest two visits. This will actually result in deleting them + // since their transition types are empty (not important). + expirer_.ArchiveHistoryBefore(visit_times[1]); + + // The first URL should be deleted, the second should not be affected. + URLRow temp_row; + EXPECT_FALSE(main_db_->GetURLRow(url_ids[0], &temp_row)); + EXPECT_TRUE(main_db_->GetURLRow(url_ids[1], &temp_row)); + EXPECT_TRUE(main_db_->GetURLRow(url_ids[2], &temp_row)); + + // Make sure the archived database has nothing in it. + EXPECT_FALSE(archived_db_->GetRowForURL(url_row1.url(), NULL)); + EXPECT_FALSE(archived_db_->GetRowForURL(url_row2.url(), NULL)); + + // Now archive one more visit so that the middle URL should be removed. This + // one will actually be archived instead of deleted. + expirer_.ArchiveHistoryBefore(visit_times[2]); + EXPECT_FALSE(main_db_->GetURLRow(url_ids[1], &temp_row)); + EXPECT_TRUE(main_db_->GetURLRow(url_ids[2], &temp_row)); + + // Make sure the archived database has an entry for the second URL. + URLRow archived_row; + // Note that the ID is different in the archived DB, so look up by URL. + EXPECT_TRUE(archived_db_->GetRowForURL(url_row1.url(), &archived_row)); + VisitVector archived_visits; + archived_db_->GetVisitsForURL(archived_row.id(), &archived_visits); + EXPECT_EQ(1U, archived_visits.size()); +} + +TEST_F(ExpireHistoryTest, ArchiveHistoryBeforeStarred) { + URLID url_ids[3]; + Time visit_times[4]; + AddExampleData(url_ids, visit_times); + + URLRow url_row0, url_row1; + ASSERT_TRUE(main_db_->GetURLRow(url_ids[0], &url_row0)); + ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1)); + + // Star the URLs. + StarURL(url_row0.url()); + StarURL(url_row1.url()); + + // Now archive the first three visits (first two URLs). The first two visits + // should be, the third deleted, but the URL records should not. + expirer_.ArchiveHistoryBefore(visit_times[2]); + + // The first URL should have its visit deleted, but it should still be present + // in the main DB and not in the archived one since it is starred. + URLRow temp_row; + ASSERT_TRUE(main_db_->GetURLRow(url_ids[0], &temp_row)); + // Note that the ID is different in the archived DB, so look up by URL. + EXPECT_FALSE(archived_db_->GetRowForURL(temp_row.url(), NULL)); + VisitVector visits; + main_db_->GetVisitsForURL(temp_row.id(), &visits); + EXPECT_EQ(0U, visits.size()); + + // The second URL should have its first visit deleted and its second visit + // archived. It should be present in both the main DB (because it's starred) + // and the archived DB (for the archived visit). + ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &temp_row)); + main_db_->GetVisitsForURL(temp_row.id(), &visits); + EXPECT_EQ(0U, visits.size()); + + // Note that the ID is different in the archived DB, so look up by URL. + ASSERT_TRUE(archived_db_->GetRowForURL(temp_row.url(), &temp_row)); + archived_db_->GetVisitsForURL(temp_row.id(), &visits); + ASSERT_EQ(1U, visits.size()); + EXPECT_TRUE(visit_times[2] == visits[0].visit_time); + + // The third URL should be unchanged. + EXPECT_TRUE(main_db_->GetURLRow(url_ids[2], &temp_row)); + EXPECT_FALSE(archived_db_->GetRowForURL(temp_row.url(), NULL)); +} + +// Tests the return values from ArchiveSomeOldHistory. The rest of the +// functionality of this function is tested by the ArchiveHistoryBefore* +// tests which use this function internally. +TEST_F(ExpireHistoryTest, ArchiveSomeOldHistory) { + URLID url_ids[3]; + Time visit_times[4]; + AddExampleData(url_ids, visit_times); + const ExpiringVisitsReader* reader = expirer_.GetAllVisitsReader(); + + // Deleting a time range with no URLs should return false (nothing found). + EXPECT_FALSE(expirer_.ArchiveSomeOldHistory( + visit_times[0] - TimeDelta::FromDays(100), reader, 1)); + + // Deleting a time range with not up the the max results should also return + // false (there will only be one visit deleted in this range). + EXPECT_FALSE(expirer_.ArchiveSomeOldHistory(visit_times[0], reader, 2)); + + // Deleting a time range with the max number of results should return true + // (max deleted). + EXPECT_TRUE(expirer_.ArchiveSomeOldHistory(visit_times[2], reader, 1)); +} + +TEST_F(ExpireHistoryTest, ExpiringVisitsReader) { + URLID url_ids[3]; + Time visit_times[4]; + AddExampleData(url_ids, visit_times); + + const ExpiringVisitsReader* all = expirer_.GetAllVisitsReader(); + const ExpiringVisitsReader* auto_subframes = + expirer_.GetAutoSubframeVisitsReader(); + + VisitVector visits; + Time now = Time::Now(); + + // Verify that the early expiration threshold, stored in the meta table is + // initialized. + EXPECT_TRUE(main_db_->GetEarlyExpirationThreshold() == + Time::FromInternalValue(1L)); + + // First, attempt reading AUTO_SUBFRAME visits. We should get none. + EXPECT_FALSE(auto_subframes->Read(now, main_db_.get(), &visits, 1)); + EXPECT_EQ(0U, visits.size()); + + // Verify that the early expiration threshold was updated, since there are no + // AUTO_SUBFRAME visits in the given time range. + EXPECT_TRUE(now <= main_db_->GetEarlyExpirationThreshold()); + + // Now, read all visits and verify that there's at least one. + EXPECT_TRUE(all->Read(now, main_db_.get(), &visits, 1)); + EXPECT_EQ(1U, visits.size()); +} + +// TODO(brettw) add some visits with no URL to make sure everything is updated +// properly. Have the visits also refer to nonexistant FTS rows. +// +// Maybe also refer to invalid favicons. + +} // namespace history diff --git a/chrome/browser/history/history.cc b/chrome/browser/history/history.cc new file mode 100644 index 0000000..0b92b6f --- /dev/null +++ b/chrome/browser/history/history.cc @@ -0,0 +1,762 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The history system runs on a background thread so that potentially slow +// database operations don't delay the browser. This backend processing is +// represented by HistoryBackend. The HistoryService's job is to dispatch to +// that thread. +// +// Main thread History thread +// ----------- -------------- +// HistoryService <----------------> HistoryBackend +// -> HistoryDatabase +// -> SQLite connection to History +// -> ArchivedDatabase +// -> SQLite connection to Archived History +// -> TextDatabaseManager +// -> SQLite connection to one month's data +// -> SQLite connection to one month's data +// ... +// -> ThumbnailDatabase +// -> SQLite connection to Thumbnails +// (and favicons) + +#include "chrome/browser/history/history.h" + +#include "app/l10n_util.h" +#include "base/callback.h" +#include "base/message_loop.h" +#include "base/path_service.h" +#include "base/ref_counted.h" +#include "base/task.h" +#include "chrome/browser/autocomplete/history_url_provider.h" +#include "chrome/browser/browser_list.h" +#include "chrome/browser/browser_process.h" +#include "chrome/browser/browser_window.h" +#include "chrome/browser/chrome_thread.h" +#include "chrome/browser/history/download_types.h" +#include "chrome/browser/history/history_backend.h" +#include "chrome/browser/history/history_notifications.h" +#include "chrome/browser/history/history_types.h" +#include "chrome/browser/history/in_memory_database.h" +#include "chrome/browser/history/in_memory_history_backend.h" +#include "chrome/browser/history/top_sites.h" +#include "chrome/browser/profile.h" +#include "chrome/browser/visitedlink_master.h" +#include "chrome/common/chrome_constants.h" +#include "chrome/common/notification_service.h" +#include "chrome/common/thumbnail_score.h" +#include "chrome/common/url_constants.h" +#include "grit/chromium_strings.h" +#include "grit/generated_resources.h" +#include "third_party/skia/include/core/SkBitmap.h" + +using base::Time; +using history::HistoryBackend; + +namespace { + +static const char* kHistoryThreadName = "Chrome_HistoryThread"; + +} // namespace + +// Sends messages from the backend to us on the main thread. This must be a +// separate class from the history service so that it can hold a reference to +// the history service (otherwise we would have to manually AddRef and +// Release when the Backend has a reference to us). +class HistoryService::BackendDelegate : public HistoryBackend::Delegate { + public: + explicit BackendDelegate(HistoryService* history_service) + : history_service_(history_service), + message_loop_(MessageLoop::current()) { + } + + virtual void NotifyProfileError(int message_id) { + // Send the backend to the history service on the main thread. + message_loop_->PostTask(FROM_HERE, NewRunnableMethod(history_service_.get(), + &HistoryService::NotifyProfileError, message_id)); + } + + virtual void SetInMemoryBackend( + history::InMemoryHistoryBackend* backend) { + // Send the backend to the history service on the main thread. + message_loop_->PostTask(FROM_HERE, NewRunnableMethod(history_service_.get(), + &HistoryService::SetInMemoryBackend, backend)); + } + + virtual void BroadcastNotifications(NotificationType type, + history::HistoryDetails* details) { + // Send the notification on the history thread. + if (NotificationService::current()) { + Details<history::HistoryDetails> det(details); + NotificationService::current()->Notify(type, + NotificationService::AllSources(), + det); + } + // Send the notification to the history service on the main thread. + message_loop_->PostTask(FROM_HERE, NewRunnableMethod(history_service_.get(), + &HistoryService::BroadcastNotifications, type, details)); + } + + virtual void DBLoaded() { + message_loop_->PostTask(FROM_HERE, NewRunnableMethod(history_service_.get(), + &HistoryService::OnDBLoaded)); + } + + virtual void StartTopSitesMigration() { + message_loop_->PostTask(FROM_HERE, NewRunnableMethod(history_service_.get(), + &HistoryService::StartTopSitesMigration)); + } + + private: + scoped_refptr<HistoryService> history_service_; + MessageLoop* message_loop_; +}; + +// static +const history::StarID HistoryService::kBookmarkBarID = 1; + +// The history thread is intentionally not a ChromeThread because the +// sync integration unit tests depend on being able to create more than one +// history thread. +HistoryService::HistoryService() + : thread_(new base::Thread(kHistoryThreadName)), + profile_(NULL), + backend_loaded_(false), + bookmark_service_(NULL), + no_db_(false) { + // Is NULL when running generate_profile. + if (NotificationService::current()) { + registrar_.Add(this, NotificationType::HISTORY_URLS_DELETED, + Source<Profile>(profile_)); + } +} + +HistoryService::HistoryService(Profile* profile) + : thread_(new base::Thread(kHistoryThreadName)), + profile_(profile), + backend_loaded_(false), + bookmark_service_(NULL), + no_db_(false) { + registrar_.Add(this, NotificationType::HISTORY_URLS_DELETED, + Source<Profile>(profile_)); +} + +HistoryService::~HistoryService() { + // Shutdown the backend. This does nothing if Cleanup was already invoked. + Cleanup(); +} + +bool HistoryService::BackendLoaded() { + // NOTE: We start the backend loading even though it completes asynchronously + // and thus won't affect the return value of this function. This is because + // callers of this assume that if the backend isn't yet loaded it will be + // soon, so they will either listen for notifications or just retry this call + // later. If we've purged the backend, we haven't necessarily restarted it + // loading by now, so we need to trigger the load in order to maintain that + // expectation. + LoadBackendIfNecessary(); + return backend_loaded_; +} + +void HistoryService::UnloadBackend() { + if (!history_backend_) + return; // Already unloaded. + + // Get rid of the in-memory backend. + in_memory_backend_.reset(); + + // The backend's destructor must run on the history thread since it is not + // threadsafe. So this thread must not be the last thread holding a reference + // to the backend, or a crash could happen. + // + // We have a reference to the history backend. There is also an extra + // reference held by our delegate installed in the backend, which + // HistoryBackend::Closing will release. This means if we scheduled a call + // to HistoryBackend::Closing and *then* released our backend reference, there + // will be a race between us and the backend's Closing function to see who is + // the last holder of a reference. If the backend thread's Closing manages to + // run before we release our backend refptr, the last reference will be held + // by this thread and the destructor will be called from here. + // + // Therefore, we create a task to run the Closing operation first. This holds + // a reference to the backend. Then we release our reference, then we schedule + // the task to run. After the task runs, it will delete its reference from + // the history thread, ensuring everything works properly. + Task* closing_task = + NewRunnableMethod(history_backend_.get(), &HistoryBackend::Closing); + history_backend_ = NULL; + ScheduleTask(PRIORITY_NORMAL, closing_task); +} + +void HistoryService::Cleanup() { + if (!thread_) { + // We've already cleaned up. + return; + } + + // Unload the backend. + UnloadBackend(); + + // Delete the thread, which joins with the background thread. We defensively + // NULL the pointer before deleting it in case somebody tries to use it + // during shutdown, but this shouldn't happen. + base::Thread* thread = thread_; + thread_ = NULL; + delete thread; +} + +void HistoryService::NotifyRenderProcessHostDestruction(const void* host) { + ScheduleAndForget(PRIORITY_NORMAL, + &HistoryBackend::NotifyRenderProcessHostDestruction, host); +} + +history::URLDatabase* HistoryService::InMemoryDatabase() { + // NOTE: See comments in BackendLoaded() as to why we call + // LoadBackendIfNecessary() here even though it won't affect the return value + // for this call. + LoadBackendIfNecessary(); + if (in_memory_backend_.get()) + return in_memory_backend_->db(); + return NULL; +} + +void HistoryService::SetSegmentPresentationIndex(int64 segment_id, int index) { + ScheduleAndForget(PRIORITY_UI, + &HistoryBackend::SetSegmentPresentationIndex, + segment_id, index); +} + +void HistoryService::SetKeywordSearchTermsForURL(const GURL& url, + TemplateURL::IDType keyword_id, + const string16& term) { + ScheduleAndForget(PRIORITY_UI, + &HistoryBackend::SetKeywordSearchTermsForURL, + url, keyword_id, term); +} + +void HistoryService::DeleteAllSearchTermsForKeyword( + TemplateURL::IDType keyword_id) { + ScheduleAndForget(PRIORITY_UI, + &HistoryBackend::DeleteAllSearchTermsForKeyword, + keyword_id); +} + +HistoryService::Handle HistoryService::GetMostRecentKeywordSearchTerms( + TemplateURL::IDType keyword_id, + const string16& prefix, + int max_count, + CancelableRequestConsumerBase* consumer, + GetMostRecentKeywordSearchTermsCallback* callback) { + return Schedule(PRIORITY_UI, &HistoryBackend::GetMostRecentKeywordSearchTerms, + consumer, + new history::GetMostRecentKeywordSearchTermsRequest(callback), + keyword_id, prefix, max_count); +} + +void HistoryService::URLsNoLongerBookmarked(const std::set<GURL>& urls) { + ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::URLsNoLongerBookmarked, + urls); +} + +HistoryService::Handle HistoryService::ScheduleDBTask( + HistoryDBTask* task, + CancelableRequestConsumerBase* consumer) { + history::HistoryDBTaskRequest* request = new history::HistoryDBTaskRequest( + NewCallback(task, &HistoryDBTask::DoneRunOnMainThread)); + request->value = task; // The value is the task to execute. + return Schedule(PRIORITY_UI, &HistoryBackend::ProcessDBTask, consumer, + request); +} + +HistoryService::Handle HistoryService::QuerySegmentUsageSince( + CancelableRequestConsumerBase* consumer, + const Time from_time, + int max_result_count, + SegmentQueryCallback* callback) { + return Schedule(PRIORITY_UI, &HistoryBackend::QuerySegmentUsage, + consumer, new history::QuerySegmentUsageRequest(callback), + from_time, max_result_count); +} + +void HistoryService::SetOnBackendDestroyTask(Task* task) { + ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::SetOnBackendDestroyTask, + MessageLoop::current(), task); +} + +void HistoryService::AddPage(const GURL& url, + const void* id_scope, + int32 page_id, + const GURL& referrer, + PageTransition::Type transition, + const history::RedirectList& redirects, + bool did_replace_entry) { + AddPage(url, Time::Now(), id_scope, page_id, referrer, transition, redirects, + did_replace_entry); +} + +void HistoryService::AddPage(const GURL& url, + Time time, + const void* id_scope, + int32 page_id, + const GURL& referrer, + PageTransition::Type transition, + const history::RedirectList& redirects, + bool did_replace_entry) { + DCHECK(thread_) << "History service being called after cleanup"; + + // Filter out unwanted URLs. We don't add auto-subframe URLs. They are a + // large part of history (think iframes for ads) and we never display them in + // history UI. We will still add manual subframes, which are ones the user + // has clicked on to get. + if (!CanAddURL(url)) + return; + + // Add link & all redirects to visited link list. + VisitedLinkMaster* visited_links; + if (profile_ && (visited_links = profile_->GetVisitedLinkMaster())) { + visited_links->AddURL(url); + + if (!redirects.empty()) { + // We should not be asked to add a page in the middle of a redirect chain. + DCHECK(redirects[redirects.size() - 1] == url); + + // We need the !redirects.empty() condition above since size_t is unsigned + // and will wrap around when we subtract one from a 0 size. + for (size_t i = 0; i < redirects.size() - 1; i++) + visited_links->AddURL(redirects[i]); + } + } + + scoped_refptr<history::HistoryAddPageArgs> request( + new history::HistoryAddPageArgs(url, time, id_scope, page_id, + referrer, redirects, transition, + did_replace_entry)); + ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::AddPage, request); +} + +void HistoryService::SetPageTitle(const GURL& url, + const string16& title) { + ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::SetPageTitle, url, title); +} + +void HistoryService::AddPageWithDetails(const GURL& url, + const string16& title, + int visit_count, + int typed_count, + Time last_visit, + bool hidden) { + // Filter out unwanted URLs. + if (!CanAddURL(url)) + return; + + // Add to the visited links system. + VisitedLinkMaster* visited_links; + if (profile_ && (visited_links = profile_->GetVisitedLinkMaster())) + visited_links->AddURL(url); + + history::URLRow row(url); + row.set_title(title); + row.set_visit_count(visit_count); + row.set_typed_count(typed_count); + row.set_last_visit(last_visit); + row.set_hidden(hidden); + + std::vector<history::URLRow> rows; + rows.push_back(row); + + ScheduleAndForget(PRIORITY_NORMAL, + &HistoryBackend::AddPagesWithDetails, rows); +} + +void HistoryService::AddPagesWithDetails( + const std::vector<history::URLRow>& info) { + + // Add to the visited links system. + VisitedLinkMaster* visited_links; + if (profile_ && (visited_links = profile_->GetVisitedLinkMaster())) { + std::vector<GURL> urls; + urls.reserve(info.size()); + for (std::vector<history::URLRow>::const_iterator i = info.begin(); + i != info.end(); + ++i) + urls.push_back(i->url()); + + visited_links->AddURLs(urls); + } + + ScheduleAndForget(PRIORITY_NORMAL, + &HistoryBackend::AddPagesWithDetails, info); +} + +void HistoryService::SetPageContents(const GURL& url, + const string16& contents) { + if (!CanAddURL(url)) + return; + + ScheduleAndForget(PRIORITY_LOW, &HistoryBackend::SetPageContents, + url, contents); +} + +void HistoryService::SetPageThumbnail(const GURL& page_url, + const SkBitmap& thumbnail, + const ThumbnailScore& score) { + if (!CanAddURL(page_url)) + return; + + ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::SetPageThumbnail, + page_url, thumbnail, score); +} + +HistoryService::Handle HistoryService::GetPageThumbnail( + const GURL& page_url, + CancelableRequestConsumerBase* consumer, + ThumbnailDataCallback* callback) { + return Schedule(PRIORITY_NORMAL, &HistoryBackend::GetPageThumbnail, consumer, + new history::GetPageThumbnailRequest(callback), page_url); +} + +void HistoryService::GetFavicon(FaviconService::GetFaviconRequest* request, + const GURL& icon_url) { + Schedule(PRIORITY_NORMAL, &HistoryBackend::GetFavIcon, NULL, request, + icon_url); +} + +void HistoryService::UpdateFaviconMappingAndFetch( + FaviconService::GetFaviconRequest* request, + const GURL& page_url, + const GURL& icon_url) { + Schedule(PRIORITY_NORMAL, &HistoryBackend::UpdateFavIconMappingAndFetch, NULL, + request, page_url, icon_url); +} + +void HistoryService::GetFaviconForURL( + FaviconService::GetFaviconRequest* request, + const GURL& page_url) { + Schedule(PRIORITY_NORMAL, &HistoryBackend::GetFavIconForURL, NULL, request, + page_url); +} + +void HistoryService::SetFavicon(const GURL& page_url, + const GURL& icon_url, + const std::vector<unsigned char>& image_data) { + if (!CanAddURL(page_url)) + return; + + ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::SetFavIcon, + page_url, icon_url, + scoped_refptr<RefCountedMemory>(new RefCountedBytes(image_data))); +} + +void HistoryService::SetFaviconOutOfDateForPage(const GURL& page_url) { + ScheduleAndForget(PRIORITY_NORMAL, + &HistoryBackend::SetFavIconOutOfDateForPage, page_url); +} + +void HistoryService::SetImportedFavicons( + const std::vector<history::ImportedFavIconUsage>& favicon_usage) { + ScheduleAndForget(PRIORITY_NORMAL, + &HistoryBackend::SetImportedFavicons, favicon_usage); +} + +void HistoryService::IterateURLs(URLEnumerator* enumerator) { + ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::IterateURLs, enumerator); +} + +HistoryService::Handle HistoryService::QueryURL( + const GURL& url, + bool want_visits, + CancelableRequestConsumerBase* consumer, + QueryURLCallback* callback) { + return Schedule(PRIORITY_UI, &HistoryBackend::QueryURL, consumer, + new history::QueryURLRequest(callback), url, want_visits); +} + +// Downloads ------------------------------------------------------------------- + +// Handle creation of a download by creating an entry in the history service's +// 'downloads' table. +HistoryService::Handle HistoryService::CreateDownload( + const DownloadCreateInfo& create_info, + CancelableRequestConsumerBase* consumer, + HistoryService::DownloadCreateCallback* callback) { + return Schedule(PRIORITY_NORMAL, &HistoryBackend::CreateDownload, consumer, + new history::DownloadCreateRequest(callback), create_info); +} + +// Handle queries for a list of all downloads in the history database's +// 'downloads' table. +HistoryService::Handle HistoryService::QueryDownloads( + CancelableRequestConsumerBase* consumer, + DownloadQueryCallback* callback) { + return Schedule(PRIORITY_NORMAL, &HistoryBackend::QueryDownloads, consumer, + new history::DownloadQueryRequest(callback)); +} + +// Changes all IN_PROGRESS in the database entries to CANCELED. +// IN_PROGRESS entries are the corrupted entries, not updated by next function +// because of the crash or some other extremal exit. +void HistoryService::CleanUpInProgressEntries() { + ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::CleanUpInProgressEntries); +} + +// Handle updates for a particular download. This is a 'fire and forget' +// operation, so we don't need to be called back. +void HistoryService::UpdateDownload(int64 received_bytes, + int32 state, + int64 db_handle) { + ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::UpdateDownload, + received_bytes, state, db_handle); +} + +void HistoryService::UpdateDownloadPath(const FilePath& path, + int64 db_handle) { + ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::UpdateDownloadPath, + path, db_handle); +} + +void HistoryService::RemoveDownload(int64 db_handle) { + ScheduleAndForget(PRIORITY_NORMAL, + &HistoryBackend::RemoveDownload, db_handle); +} + +void HistoryService::RemoveDownloadsBetween(Time remove_begin, + Time remove_end) { + ScheduleAndForget(PRIORITY_NORMAL, + &HistoryBackend::RemoveDownloadsBetween, + remove_begin, + remove_end); +} + +HistoryService::Handle HistoryService::SearchDownloads( + const string16& search_text, + CancelableRequestConsumerBase* consumer, + DownloadSearchCallback* callback) { + return Schedule(PRIORITY_NORMAL, &HistoryBackend::SearchDownloads, consumer, + new history::DownloadSearchRequest(callback), search_text); +} + +HistoryService::Handle HistoryService::QueryHistory( + const string16& text_query, + const history::QueryOptions& options, + CancelableRequestConsumerBase* consumer, + QueryHistoryCallback* callback) { + return Schedule(PRIORITY_UI, &HistoryBackend::QueryHistory, consumer, + new history::QueryHistoryRequest(callback), + text_query, options); +} + +HistoryService::Handle HistoryService::QueryRedirectsFrom( + const GURL& from_url, + CancelableRequestConsumerBase* consumer, + QueryRedirectsCallback* callback) { + return Schedule(PRIORITY_UI, &HistoryBackend::QueryRedirectsFrom, consumer, + new history::QueryRedirectsRequest(callback), from_url); +} + +HistoryService::Handle HistoryService::QueryRedirectsTo( + const GURL& to_url, + CancelableRequestConsumerBase* consumer, + QueryRedirectsCallback* callback) { + return Schedule(PRIORITY_NORMAL, &HistoryBackend::QueryRedirectsTo, consumer, + new history::QueryRedirectsRequest(callback), to_url); +} + +HistoryService::Handle HistoryService::GetVisitCountToHost( + const GURL& url, + CancelableRequestConsumerBase* consumer, + GetVisitCountToHostCallback* callback) { + return Schedule(PRIORITY_UI, &HistoryBackend::GetVisitCountToHost, consumer, + new history::GetVisitCountToHostRequest(callback), url); +} + +HistoryService::Handle HistoryService::QueryTopURLsAndRedirects( + int result_count, + CancelableRequestConsumerBase* consumer, + QueryTopURLsAndRedirectsCallback* callback) { + return Schedule(PRIORITY_NORMAL, &HistoryBackend::QueryTopURLsAndRedirects, + consumer, new history::QueryTopURLsAndRedirectsRequest(callback), + result_count); +} + +HistoryService::Handle HistoryService::QueryMostVisitedURLs( + int result_count, + int days_back, + CancelableRequestConsumerBase* consumer, + QueryMostVisitedURLsCallback* callback) { + return Schedule(PRIORITY_NORMAL, &HistoryBackend::QueryMostVisitedURLs, + consumer, + new history::QueryMostVisitedURLsRequest(callback), + result_count, days_back); +} + +void HistoryService::Observe(NotificationType type, + const NotificationSource& source, + const NotificationDetails& details) { + if (type != NotificationType::HISTORY_URLS_DELETED) { + NOTREACHED(); + return; + } + + // Update the visited link system for deleted URLs. We will update the + // visited link system for added URLs as soon as we get the add + // notification (we don't have to wait for the backend, which allows us to + // be faster to update the state). + // + // For deleted URLs, we don't typically know what will be deleted since + // delete notifications are by time. We would also like to be more + // respectful of privacy and never tell the user something is gone when it + // isn't. Therefore, we update the delete URLs after the fact. + if (!profile_) + return; // No profile, probably unit testing. + Details<history::URLsDeletedDetails> deleted_details(details); + VisitedLinkMaster* visited_links = profile_->GetVisitedLinkMaster(); + if (!visited_links) + return; // Nobody to update. + if (deleted_details->all_history) + visited_links->DeleteAllURLs(); + else // Delete individual ones. + visited_links->DeleteURLs(deleted_details->urls); +} + +bool HistoryService::Init(const FilePath& history_dir, + BookmarkService* bookmark_service, + bool no_db) { + if (!thread_->Start()) { + Cleanup(); + return false; + } + + history_dir_ = history_dir; + bookmark_service_ = bookmark_service; + no_db_ = no_db; + + // Create the history backend. + LoadBackendIfNecessary(); + return true; +} + +void HistoryService::ScheduleAutocomplete(HistoryURLProvider* provider, + HistoryURLProviderParams* params) { + ScheduleAndForget(PRIORITY_UI, &HistoryBackend::ScheduleAutocomplete, + scoped_refptr<HistoryURLProvider>(provider), params); +} + +void HistoryService::ScheduleTask(SchedulePriority priority, + Task* task) { + // FIXME(brettw) do prioritization. + thread_->message_loop()->PostTask(FROM_HERE, task); +} + +// static +bool HistoryService::CanAddURL(const GURL& url) { + if (!url.is_valid()) + return false; + + // TODO: We should allow kChromeUIScheme URLs if they have been explicitly + // typed. Right now, however, these are marked as typed even when triggered + // by a shortcut or menu action. + if (url.SchemeIs(chrome::kJavaScriptScheme) || + url.SchemeIs(chrome::kChromeUIScheme) || + url.SchemeIs(chrome::kViewSourceScheme) || + url.SchemeIs(chrome::kChromeInternalScheme)) + return false; + + if (url.SchemeIs(chrome::kAboutScheme)) { + if (LowerCaseEqualsASCII(url.path(), "blank")) + return false; + // We allow all other about URLs since the user may like to see things + // like "about:memory" or "about:histograms" in their history and + // autocomplete. + } + + return true; +} + +void HistoryService::SetInMemoryBackend( + history::InMemoryHistoryBackend* mem_backend) { + DCHECK(!in_memory_backend_.get()) << "Setting mem DB twice"; + in_memory_backend_.reset(mem_backend); + + // The database requires additional initialization once we own it. + in_memory_backend_->AttachToHistoryService(profile_); +} + +void HistoryService::NotifyProfileError(int message_id) { + Source<HistoryService> source(this); + NotificationService::current()->Notify(NotificationType::PROFILE_ERROR, + source, Details<int>(&message_id)); +} + +void HistoryService::DeleteURL(const GURL& url) { + // We will update the visited links when we observe the delete notifications. + ScheduleAndForget(PRIORITY_NORMAL, &HistoryBackend::DeleteURL, url); +} + +void HistoryService::ExpireHistoryBetween( + const std::set<GURL>& restrict_urls, + Time begin_time, Time end_time, + CancelableRequestConsumerBase* consumer, + ExpireHistoryCallback* callback) { + + // We will update the visited links when we observe the delete notifications. + Schedule(PRIORITY_UI, &HistoryBackend::ExpireHistoryBetween, consumer, + new history::ExpireHistoryRequest(callback), + restrict_urls, begin_time, end_time); +} + +void HistoryService::BroadcastNotifications( + NotificationType type, + history::HistoryDetails* details_deleted) { + // We take ownership of the passed-in pointer and delete it. It was made for + // us on another thread, so the caller doesn't know when we will handle it. + scoped_ptr<history::HistoryDetails> details(details_deleted); + // TODO(evanm): this is currently necessitated by generate_profile, which + // runs without a browser process. generate_profile should really create + // a browser process, at which point this check can then be nuked. + if (!g_browser_process) + return; + + // The source of all of our notifications is the profile. Note that this + // pointer is NULL in unit tests. + Source<Profile> source(profile_); + + // The details object just contains the pointer to the object that the + // backend has allocated for us. The receiver of the notification will cast + // this to the proper type. + Details<history::HistoryDetails> det(details_deleted); + + NotificationService::current()->Notify(type, source, det); +} + +void HistoryService::LoadBackendIfNecessary() { + if (!thread_ || history_backend_) + return; // Failed to init, or already started loading. + + scoped_refptr<HistoryBackend> backend( + new HistoryBackend(history_dir_, + new BackendDelegate(this), + bookmark_service_)); + history_backend_.swap(backend); + + ScheduleAndForget(PRIORITY_UI, &HistoryBackend::Init, no_db_); +} + +void HistoryService::OnDBLoaded() { + LOG(INFO) << "History backend finished loading"; + backend_loaded_ = true; + NotificationService::current()->Notify(NotificationType::HISTORY_LOADED, + Source<Profile>(profile_), + Details<HistoryService>(this)); +} + +void HistoryService::StartTopSitesMigration() { + history::TopSites* ts = profile_->GetTopSites(); + ts->StartMigration(); +} + +void HistoryService::OnTopSitesReady() { + ScheduleAndForget(PRIORITY_NORMAL, + &HistoryBackend::MigrateThumbnailsDatabase); +} diff --git a/chrome/browser/history/history.h b/chrome/browser/history/history.h new file mode 100644 index 0000000..9548f65 --- /dev/null +++ b/chrome/browser/history/history.h @@ -0,0 +1,852 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_HISTORY_H_ +#define CHROME_BROWSER_HISTORY_HISTORY_H_ + +#include <string> +#include <vector> + +#include "base/basictypes.h" +#include "base/callback.h" +#include "base/file_path.h" +#include "base/ref_counted.h" +#include "base/scoped_ptr.h" +#include "base/string16.h" +#include "base/task.h" +#include "chrome/browser/cancelable_request.h" +#include "chrome/browser/favicon_service.h" +#include "chrome/browser/history/history_types.h" +#include "chrome/browser/search_engines/template_url.h" +#include "chrome/common/notification_registrar.h" +#include "chrome/common/page_transition_types.h" +#include "chrome/common/ref_counted_util.h" + +class BookmarkService; +struct DownloadCreateInfo; +class FilePath; +class GURL; +class HistoryURLProvider; +struct HistoryURLProviderParams; +class InMemoryURLDatabase; +class MainPagesRequest; +class PageUsageData; +class PageUsageRequest; +class Profile; +class SkBitmap; +struct ThumbnailScore; + +namespace base { +class Thread; +class Time; +} + +namespace browser_sync { +class HistoryModelWorker; +class TypedUrlDataTypeController; +} + +namespace history { + +class InMemoryHistoryBackend; +class HistoryBackend; +class HistoryDatabase; +struct HistoryDetails; +class HistoryQueryTest; +class URLDatabase; + +} // namespace history + + +// HistoryDBTask can be used to process arbitrary work on the history backend +// thread. HistoryDBTask is scheduled using HistoryService::ScheduleDBTask. +// When HistoryBackend processes the task it invokes RunOnDBThread. Once the +// task completes and has not been canceled, DoneRunOnMainThread is invoked back +// on the main thread. +class HistoryDBTask : public base::RefCountedThreadSafe<HistoryDBTask> { + public: + // Invoked on the database thread. The return value indicates whether the + // task is done. A return value of true signals the task is done and + // RunOnDBThread should NOT be invoked again. A return value of false + // indicates the task is not done, and should be run again after other + // tasks are given a chance to be processed. + virtual bool RunOnDBThread(history::HistoryBackend* backend, + history::HistoryDatabase* db) = 0; + + // Invoked on the main thread once RunOnDBThread has returned false. This is + // only invoked if the request was not canceled and returned true from + // RunOnDBThread. + virtual void DoneRunOnMainThread() = 0; + + protected: + friend class base::RefCountedThreadSafe<HistoryDBTask>; + + virtual ~HistoryDBTask() {} +}; + +// The history service records page titles, and visit times, as well as +// (eventually) information about autocomplete. +// +// This service is thread safe. Each request callback is invoked in the +// thread that made the request. +class HistoryService : public CancelableRequestProvider, + public NotificationObserver, + public base::RefCountedThreadSafe<HistoryService> { + public: + // Miscellaneous commonly-used types. + typedef std::vector<PageUsageData*> PageUsageDataList; + + // ID (both star_id and group_id) of the bookmark bar. + // This entry always exists. + static const history::StarID kBookmarkBarID; + + // Must call Init after construction. + explicit HistoryService(Profile* profile); + // The empty constructor is provided only for testing. + HistoryService(); + + // Initializes the history service, returning true on success. On false, do + // not call any other functions. The given directory will be used for storing + // the history files. The BookmarkService is used when deleting URLs to + // test if a URL is bookmarked; it may be NULL during testing. + bool Init(const FilePath& history_dir, BookmarkService* bookmark_service) { + return Init(history_dir, bookmark_service, false); + } + + // Triggers the backend to load if it hasn't already, and then returns whether + // it's finished loading. + bool BackendLoaded(); + + // Unloads the backend without actually shutting down the history service. + // This can be used to temporarily reduce the browser process' memory + // footprint. + void UnloadBackend(); + + // Called on shutdown, this will tell the history backend to complete and + // will release pointers to it. No other functions should be called once + // cleanup has happened that may dispatch to the history thread (because it + // will be NULL). + // + // In practice, this will be called by the service manager (BrowserProcess) + // when it is being destroyed. Because that reference is being destroyed, it + // should be impossible for anybody else to call the service, even if it is + // still in memory (pending requests may be holding a reference to us). + void Cleanup(); + + // RenderProcessHost pointers are used to scope page IDs (see AddPage). These + // objects must tell us when they are being destroyed so that we can clear + // out any cached data associated with that scope. + // + // The given pointer will not be dereferenced, it is only used for + // identification purposes, hence it is a void*. + void NotifyRenderProcessHostDestruction(const void* host); + + // Triggers the backend to load if it hasn't already, and then returns the + // in-memory URL database. The returned pointer MAY BE NULL if the in-memory + // database has not been loaded yet. This pointer is owned by the history + // system. Callers should not store or cache this value. + // + // TODO(brettw) this should return the InMemoryHistoryBackend. + history::URLDatabase* InMemoryDatabase(); + + // Navigation ---------------------------------------------------------------- + + // Adds the given canonical URL to history with the current time as the visit + // time. Referrer may be the empty string. + // + // The supplied render process host is used to scope the given page ID. Page + // IDs are only unique inside a given render process, so we need that to + // differentiate them. This pointer should not be dereferenced by the history + // system. Since render view host pointers may be reused (if one gets deleted + // and a new one created at the same address), TabContents should notify + // us when they are being destroyed through NotifyTabContentsDestruction. + // + // The scope/ids can be NULL if there is no meaningful tracking information + // that can be performed on the given URL. The 'page_id' should be the ID of + // the current session history entry in the given process. + // + // 'redirects' is an array of redirect URLs leading to this page, with the + // page itself as the last item (so when there is no redirect, it will have + // one entry). If there are no redirects, this array may also be empty for + // the convenience of callers. + // + // 'did_replace_entry' is true when the navigation entry for this page has + // replaced the existing entry. A non-user initiated redirect causes such + // replacement. + // + // All "Add Page" functions will update the visited link database. + void AddPage(const GURL& url, + const void* id_scope, + int32 page_id, + const GURL& referrer, + PageTransition::Type transition, + const history::RedirectList& redirects, + bool did_replace_entry); + + // For adding pages to history with a specific time. This is for testing + // purposes. Call the previous one to use the current time. + void AddPage(const GURL& url, + base::Time time, + const void* id_scope, + int32 page_id, + const GURL& referrer, + PageTransition::Type transition, + const history::RedirectList& redirects, + bool did_replace_entry); + + // For adding pages to history where no tracking information can be done. + void AddPage(const GURL& url) { + AddPage(url, NULL, 0, GURL(), PageTransition::LINK, history::RedirectList(), + false); + } + + // Sets the title for the given page. The page should be in history. If it + // is not, this operation is ignored. This call will not update the full + // text index. The last title set when the page is indexed will be the + // title in the full text index. + void SetPageTitle(const GURL& url, const string16& title); + + // Indexing ------------------------------------------------------------------ + + // Notifies history of the body text of the given recently-visited URL. + // If the URL was not visited "recently enough," the history system may + // discard it. + void SetPageContents(const GURL& url, const string16& contents); + + // Querying ------------------------------------------------------------------ + + // Callback class that a client can implement to iterate over URLs. The + // callbacks WILL BE CALLED ON THE BACKGROUND THREAD! Your implementation + // should handle this appropriately. + class URLEnumerator { + public: + virtual ~URLEnumerator() {} + + // Indicates that a URL is available. There will be exactly one call for + // every URL in history. + virtual void OnURL(const GURL& url) = 0; + + // Indicates we are done iterating over URLs. Once called, there will be no + // more callbacks made. This call is guaranteed to occur, even if there are + // no URLs. If all URLs were iterated, success will be true. + virtual void OnComplete(bool success) = 0; + }; + + // Enumerate all URLs in history. The given iterator will be owned by the + // caller, so the caller should ensure it exists until OnComplete is called. + // You should not generally use this since it will be slow to slurp all URLs + // in from the database. It is designed for rebuilding the visited link + // database from history. + void IterateURLs(URLEnumerator* iterator); + + // Returns the information about the requested URL. If the URL is found, + // success will be true and the information will be in the URLRow parameter. + // On success, the visits, if requested, will be sorted by date. If they have + // not been requested, the pointer will be valid, but the vector will be + // empty. + // + // If success is false, neither the row nor the vector will be valid. + typedef Callback4<Handle, + bool, // Success flag, when false, nothing else is valid. + const history::URLRow*, + history::VisitVector*>::Type + QueryURLCallback; + + // Queries the basic information about the URL in the history database. If + // the caller is interested in the visits (each time the URL is visited), + // set |want_visits| to true. If these are not needed, the function will be + // faster by setting this to false. + Handle QueryURL(const GURL& url, + bool want_visits, + CancelableRequestConsumerBase* consumer, + QueryURLCallback* callback); + + // Provides the result of a query. See QueryResults in history_types.h. + // The common use will be to use QueryResults.Swap to suck the contents of + // the results out of the passed in parameter and take ownership of them. + typedef Callback2<Handle, history::QueryResults*>::Type + QueryHistoryCallback; + + // Queries all history with the given options (see QueryOptions in + // history_types.h). If non-empty, the full-text database will be queried with + // the given |text_query|. If empty, all results matching the given options + // will be returned. + // + // This isn't totally hooked up yet, this will query the "new" full text + // database (see SetPageContents) which won't generally be set yet. + Handle QueryHistory(const string16& text_query, + const history::QueryOptions& options, + CancelableRequestConsumerBase* consumer, + QueryHistoryCallback* callback); + + // Called when the results of QueryRedirectsFrom are available. + // The given vector will contain a list of all redirects, not counting + // the original page. If A redirects to B, the vector will contain only B, + // and A will be in 'source_url'. + // + // If there is no such URL in the database or the most recent visit has no + // redirect, the vector will be empty. If the history system failed for + // some reason, success will additionally be false. If the given page + // has redirected to multiple destinations, this will pick a random one. + typedef Callback4<Handle, + GURL, // from_url + bool, // success + history::RedirectList*>::Type + QueryRedirectsCallback; + + // Schedules a query for the most recent redirect coming out of the given + // URL. See the RedirectQuerySource above, which is guaranteed to be called + // if the request is not canceled. + Handle QueryRedirectsFrom(const GURL& from_url, + CancelableRequestConsumerBase* consumer, + QueryRedirectsCallback* callback); + + // Schedules a query to get the most recent redirects ending at the given + // URL. + Handle QueryRedirectsTo(const GURL& to_url, + CancelableRequestConsumerBase* consumer, + QueryRedirectsCallback* callback); + + typedef Callback4<Handle, + bool, // Were we able to determine the # of visits? + int, // Number of visits. + base::Time>::Type // Time of first visit. Only first bool is + // true and int is > 0. + GetVisitCountToHostCallback; + + // Requests the number of visits to all urls on the scheme/host/post + // identified by url. This is only valid for http and https urls. + Handle GetVisitCountToHost(const GURL& url, + CancelableRequestConsumerBase* consumer, + GetVisitCountToHostCallback* callback); + + // Called when QueryTopURLsAndRedirects completes. The vector contains a list + // of the top |result_count| URLs. For each of these URLs, there is an entry + // in the map containing redirects from the URL. For example, if we have the + // redirect chain A -> B -> C and A is a top visited URL, then A will be in + // the vector and "A => {B -> C}" will be in the map. + typedef Callback4<Handle, + bool, // Did we get the top urls and redirects? + std::vector<GURL>*, // List of top URLs. + history::RedirectMap*>::Type // Redirects for top URLs. + QueryTopURLsAndRedirectsCallback; + + // Request the top |result_count| most visited URLs and the chain of redirects + // leading to each of these URLs. + // TODO(Nik): remove this. Use QueryMostVisitedURLs instead. + Handle QueryTopURLsAndRedirects(int result_count, + CancelableRequestConsumerBase* consumer, + QueryTopURLsAndRedirectsCallback* callback); + + typedef Callback2<Handle, history::MostVisitedURLList>::Type + QueryMostVisitedURLsCallback; + + // Request the |result_count| most visited URLs and the chain of + // redirects leading to each of these URLs. |days_back| is the + // number of days of history to use. Used by TopSites. + Handle QueryMostVisitedURLs(int result_count, int days_back, + CancelableRequestConsumerBase* consumer, + QueryMostVisitedURLsCallback* callback); + + // Thumbnails ---------------------------------------------------------------- + + // Implemented by consumers to get thumbnail data. Called when a request for + // the thumbnail data is complete. Once this callback is made, the request + // will be completed and no other calls will be made for that handle. + // + // This function will be called even on error conditions or if there is no + // thumbnail for that page. In these cases, the data pointer will be NULL. + typedef Callback2<Handle, scoped_refptr<RefCountedBytes> >::Type + ThumbnailDataCallback; + + // Sets the thumbnail for a given URL. The URL must be in the history + // database or the request will be ignored. + void SetPageThumbnail(const GURL& url, + const SkBitmap& thumbnail, + const ThumbnailScore& score); + + // Requests a page thumbnail. See ThumbnailDataCallback definition above. + Handle GetPageThumbnail(const GURL& page_url, + CancelableRequestConsumerBase* consumer, + ThumbnailDataCallback* callback); + + // Database management operations -------------------------------------------- + + // Delete all the information related to a single url. + void DeleteURL(const GURL& url); + + // Implemented by the caller of ExpireHistoryBetween, and + // is called when the history service has deleted the history. + typedef Callback0::Type ExpireHistoryCallback; + + // Removes all visits in the selected time range (including the start time), + // updating the URLs accordingly. This deletes the associated data, including + // the full text index. This function also deletes the associated favicons, + // if they are no longer referenced. |callback| runs when the expiration is + // complete. You may use null Time values to do an unbounded delete in + // either direction. + // If |restrict_urls| is not empty, only visits to the URLs in this set are + // removed. + void ExpireHistoryBetween(const std::set<GURL>& restrict_urls, + base::Time begin_time, base::Time end_time, + CancelableRequestConsumerBase* consumer, + ExpireHistoryCallback* callback); + + // Downloads ----------------------------------------------------------------- + + // Implemented by the caller of 'CreateDownload' below, and is called when the + // history service has created a new entry for a download in the history db. + typedef Callback2<DownloadCreateInfo, int64>::Type DownloadCreateCallback; + + // Begins a history request to create a new persistent entry for a download. + // 'info' contains all the download's creation state, and 'callback' runs + // when the history service request is complete. + Handle CreateDownload(const DownloadCreateInfo& info, + CancelableRequestConsumerBase* consumer, + DownloadCreateCallback* callback); + + // Implemented by the caller of 'QueryDownloads' below, and is called when the + // history service has retrieved a list of all download state. The call + typedef Callback1<std::vector<DownloadCreateInfo>*>::Type + DownloadQueryCallback; + + // Begins a history request to retrieve the state of all downloads in the + // history db. 'callback' runs when the history service request is complete, + // at which point 'info' contains an array of DownloadCreateInfo, one per + // download. + Handle QueryDownloads(CancelableRequestConsumerBase* consumer, + DownloadQueryCallback* callback); + + // Begins a request to clean up entries that has been corrupted (because of + // the crash, for example). + void CleanUpInProgressEntries(); + + // Called to update the history service about the current state of a download. + // This is a 'fire and forget' query, so just pass the relevant state info to + // the database with no need for a callback. + void UpdateDownload(int64 received_bytes, int32 state, int64 db_handle); + + // Called to update the history service about the path of a download. + // This is a 'fire and forget' query. + void UpdateDownloadPath(const FilePath& path, int64 db_handle); + + // Permanently remove a download from the history system. This is a 'fire and + // forget' operation. + void RemoveDownload(int64 db_handle); + + // Permanently removes all completed download from the history system within + // the specified range. This function does not delete downloads that are in + // progress or in the process of being cancelled. This is a 'fire and forget' + // operation. You can pass is_null times to get unbounded time in either or + // both directions. + void RemoveDownloadsBetween(base::Time remove_begin, base::Time remove_end); + + // Implemented by the caller of 'SearchDownloads' below, and is called when + // the history system has retrieved the search results. + typedef Callback2<Handle, std::vector<int64>*>::Type DownloadSearchCallback; + + // Search for downloads that match the search text. + Handle SearchDownloads(const string16& search_text, + CancelableRequestConsumerBase* consumer, + DownloadSearchCallback* callback); + + // Visit Segments ------------------------------------------------------------ + + typedef Callback2<Handle, std::vector<PageUsageData*>*>::Type + SegmentQueryCallback; + + // Query usage data for all visit segments since the provided time. + // + // The request is performed asynchronously and can be cancelled by using the + // returned handle. + // + // The vector provided to the callback and its contents is owned by the + // history system. It will be deeply deleted after the callback is invoked. + // If you want to preserve any PageUsageData instance, simply remove them + // from the vector. + // + // The vector contains a list of PageUsageData. Each PageUsageData ID is set + // to the segment ID. The URL and all the other information is set to the page + // representing the segment. + Handle QuerySegmentUsageSince(CancelableRequestConsumerBase* consumer, + const base::Time from_time, + int max_result_count, + SegmentQueryCallback* callback); + + // Set the presentation index for the segment identified by |segment_id|. + void SetSegmentPresentationIndex(int64 segment_id, int index); + + // Keyword search terms ----------------------------------------------------- + + // Sets the search terms for the specified url and keyword. url_id gives the + // id of the url, keyword_id the id of the keyword and term the search term. + void SetKeywordSearchTermsForURL(const GURL& url, + TemplateURL::IDType keyword_id, + const string16& term); + + // Deletes all search terms for the specified keyword. + void DeleteAllSearchTermsForKeyword(TemplateURL::IDType keyword_id); + + typedef Callback2<Handle, std::vector<history::KeywordSearchTermVisit>*>::Type + GetMostRecentKeywordSearchTermsCallback; + + // Returns up to max_count of the most recent search terms starting with the + // specified text. The matching is case insensitive. The results are ordered + // in descending order up to |max_count| with the most recent search term + // first. + Handle GetMostRecentKeywordSearchTerms( + TemplateURL::IDType keyword_id, + const string16& prefix, + int max_count, + CancelableRequestConsumerBase* consumer, + GetMostRecentKeywordSearchTermsCallback* callback); + + // Bookmarks ----------------------------------------------------------------- + + // Notification that a URL is no longer bookmarked. + void URLsNoLongerBookmarked(const std::set<GURL>& urls); + + // Generic Stuff ------------------------------------------------------------- + + typedef Callback0::Type HistoryDBTaskCallback; + + // Schedules a HistoryDBTask for running on the history backend thread. See + // HistoryDBTask for details on what this does. + virtual Handle ScheduleDBTask(HistoryDBTask* task, + CancelableRequestConsumerBase* consumer); + + // Testing ------------------------------------------------------------------- + + // Designed for unit tests, this passes the given task on to the history + // backend to be called once the history backend has terminated. This allows + // callers to know when the history thread is complete and the database files + // can be deleted and the next test run. Otherwise, the history thread may + // still be running, causing problems in subsequent tests. + // + // There can be only one closing task, so this will override any previously + // set task. We will take ownership of the pointer and delete it when done. + // The task will be run on the calling thread (this function is threadsafe). + void SetOnBackendDestroyTask(Task* task); + + // Used for unit testing and potentially importing to get known information + // into the database. This assumes the URL doesn't exist in the database + // + // Calling this function many times may be slow because each call will + // dispatch to the history thread and will be a separate database + // transaction. If this functionality is needed for importing many URLs, a + // version that takes an array should probably be added. + void AddPageWithDetails(const GURL& url, + const string16& title, + int visit_count, + int typed_count, + base::Time last_visit, + bool hidden); + + // The same as AddPageWithDetails() but takes a vector. + void AddPagesWithDetails(const std::vector<history::URLRow>& info); + + // Starts the TopSites migration in the HistoryThread. Called by the + // BackendDelegate. + void StartTopSitesMigration(); + + // Called by TopSites after the thumbnails were read and it is safe + // to delete the thumbnails DB. + void OnTopSitesReady(); + + // Returns true if this looks like the type of URL we want to add to the + // history. We filter out some URLs such as JavaScript. + static bool CanAddURL(const GURL& url); + + protected: + ~HistoryService(); + + // These are not currently used, hopefully we can do something in the future + // to ensure that the most important things happen first. + enum SchedulePriority { + PRIORITY_UI, // The highest priority (must respond to UI events). + PRIORITY_NORMAL, // Normal stuff like adding a page. + PRIORITY_LOW, // Low priority things like indexing or expiration. + }; + + private: + class BackendDelegate; + friend class base::RefCountedThreadSafe<HistoryService>; + friend class BackendDelegate; + friend class FaviconService; + friend class history::HistoryBackend; + friend class history::HistoryQueryTest; + friend class HistoryOperation; + friend class HistoryURLProvider; + friend class HistoryURLProviderTest; + template<typename Info, typename Callback> friend class DownloadRequest; + friend class PageUsageRequest; + friend class RedirectRequest; + friend class FavIconRequest; + friend class TestingProfile; + + // Implementation of NotificationObserver. + virtual void Observe(NotificationType type, + const NotificationSource& source, + const NotificationDetails& details); + + // Low-level Init(). Same as the public version, but adds a |no_db| parameter + // that is only set by unittests which causes the backend to not init its DB. + bool Init(const FilePath& history_dir, + BookmarkService* bookmark_service, + bool no_db); + + // Called by the HistoryURLProvider class to schedule an autocomplete, it + // will be called back on the internal history thread with the history + // database so it can query. See history_autocomplete.cc for a diagram. + void ScheduleAutocomplete(HistoryURLProvider* provider, + HistoryURLProviderParams* params); + + // Broadcasts the given notification. This is called by the backend so that + // the notification will be broadcast on the main thread. + // + // The |details_deleted| pointer will be sent as the "details" for the + // notification. The function takes ownership of the pointer and deletes it + // when the notification is sent (it is coming from another thread, so must + // be allocated on the heap). + void BroadcastNotifications(NotificationType type, + history::HistoryDetails* details_deleted); + + // Initializes the backend. + void LoadBackendIfNecessary(); + + // Notification from the backend that it has finished loading. Sends + // notification (NOTIFY_HISTORY_LOADED) and sets backend_loaded_ to true. + void OnDBLoaded(); + + // FavIcon ------------------------------------------------------------------- + + // These favicon methods are exposed to the FaviconService. Instead of calling + // these methods directly you should call the respective method on the + // FaviconService. + + // Used by the FaviconService to get a favicon from the history backend. + void GetFavicon(FaviconService::GetFaviconRequest* request, + const GURL& icon_url); + + // Used by the FaviconService to update the favicon mappings on the history + // backend. + void UpdateFaviconMappingAndFetch(FaviconService::GetFaviconRequest* request, + const GURL& page_url, + const GURL& icon_url); + + // Used by the FaviconService to get a favicon from the history backend. + void GetFaviconForURL(FaviconService::GetFaviconRequest* request, + const GURL& page_url); + + // Used by the FaviconService to mark the favicon for the page as being out + // of date. + void SetFaviconOutOfDateForPage(const GURL& page_url); + + // Used by the FaviconService for importing many favicons for many pages at + // once. The pages must exist, any favicon sets for unknown pages will be + // discarded. Existing favicons will not be overwritten. + void SetImportedFavicons( + const std::vector<history::ImportedFavIconUsage>& favicon_usage); + + // Used by the FaviconService to set the favicon for a page on the history + // backend. + void SetFavicon(const GURL& page_url, + const GURL& icon_url, + const std::vector<unsigned char>& image_data); + + + // Sets the in-memory URL database. This is called by the backend once the + // database is loaded to make it available. + void SetInMemoryBackend(history::InMemoryHistoryBackend* mem_backend); + + // Called by our BackendDelegate when there is a problem reading the database. + // |message_id| is the relevant message in the string table to display. + void NotifyProfileError(int message_id); + + // Call to schedule a given task for running on the history thread with the + // specified priority. The task will have ownership taken. + void ScheduleTask(SchedulePriority priority, Task* task); + + // Schedule ------------------------------------------------------------------ + // + // Functions for scheduling operations on the history thread that have a + // handle and may be cancelable. For fire-and-forget operations, see + // ScheduleAndForget below. + + template<typename BackendFunc, class RequestType> + Handle Schedule(SchedulePriority priority, + BackendFunc func, // Function to call on the HistoryBackend. + CancelableRequestConsumerBase* consumer, + RequestType* request) { + DCHECK(thread_) << "History service being called after cleanup"; + LoadBackendIfNecessary(); + if (consumer) + AddRequest(request, consumer); + ScheduleTask(priority, + NewRunnableMethod(history_backend_.get(), func, + scoped_refptr<RequestType>(request))); + return request->handle(); + } + + template<typename BackendFunc, class RequestType, typename ArgA> + Handle Schedule(SchedulePriority priority, + BackendFunc func, // Function to call on the HistoryBackend. + CancelableRequestConsumerBase* consumer, + RequestType* request, + const ArgA& a) { + DCHECK(thread_) << "History service being called after cleanup"; + LoadBackendIfNecessary(); + if (consumer) + AddRequest(request, consumer); + ScheduleTask(priority, + NewRunnableMethod(history_backend_.get(), func, + scoped_refptr<RequestType>(request), + a)); + return request->handle(); + } + + template<typename BackendFunc, + class RequestType, // Descendant of CancelableRequstBase. + typename ArgA, + typename ArgB> + Handle Schedule(SchedulePriority priority, + BackendFunc func, // Function to call on the HistoryBackend. + CancelableRequestConsumerBase* consumer, + RequestType* request, + const ArgA& a, + const ArgB& b) { + DCHECK(thread_) << "History service being called after cleanup"; + LoadBackendIfNecessary(); + if (consumer) + AddRequest(request, consumer); + ScheduleTask(priority, + NewRunnableMethod(history_backend_.get(), func, + scoped_refptr<RequestType>(request), + a, b)); + return request->handle(); + } + + template<typename BackendFunc, + class RequestType, // Descendant of CancelableRequstBase. + typename ArgA, + typename ArgB, + typename ArgC> + Handle Schedule(SchedulePriority priority, + BackendFunc func, // Function to call on the HistoryBackend. + CancelableRequestConsumerBase* consumer, + RequestType* request, + const ArgA& a, + const ArgB& b, + const ArgC& c) { + DCHECK(thread_) << "History service being called after cleanup"; + LoadBackendIfNecessary(); + if (consumer) + AddRequest(request, consumer); + ScheduleTask(priority, + NewRunnableMethod(history_backend_.get(), func, + scoped_refptr<RequestType>(request), + a, b, c)); + return request->handle(); + } + + // ScheduleAndForget --------------------------------------------------------- + // + // Functions for scheduling operations on the history thread that do not need + // any callbacks and are not cancelable. + + template<typename BackendFunc> + void ScheduleAndForget(SchedulePriority priority, + BackendFunc func) { // Function to call on backend. + DCHECK(thread_) << "History service being called after cleanup"; + LoadBackendIfNecessary(); + ScheduleTask(priority, NewRunnableMethod(history_backend_.get(), func)); + } + + template<typename BackendFunc, typename ArgA> + void ScheduleAndForget(SchedulePriority priority, + BackendFunc func, // Function to call on backend. + const ArgA& a) { + DCHECK(thread_) << "History service being called after cleanup"; + LoadBackendIfNecessary(); + ScheduleTask(priority, NewRunnableMethod(history_backend_.get(), func, a)); + } + + template<typename BackendFunc, typename ArgA, typename ArgB> + void ScheduleAndForget(SchedulePriority priority, + BackendFunc func, // Function to call on backend. + const ArgA& a, + const ArgB& b) { + DCHECK(thread_) << "History service being called after cleanup"; + LoadBackendIfNecessary(); + ScheduleTask(priority, NewRunnableMethod(history_backend_.get(), func, + a, b)); + } + + template<typename BackendFunc, typename ArgA, typename ArgB, typename ArgC> + void ScheduleAndForget(SchedulePriority priority, + BackendFunc func, // Function to call on backend. + const ArgA& a, + const ArgB& b, + const ArgC& c) { + DCHECK(thread_) << "History service being called after cleanup"; + LoadBackendIfNecessary(); + ScheduleTask(priority, NewRunnableMethod(history_backend_.get(), func, + a, b, c)); + } + + template<typename BackendFunc, + typename ArgA, + typename ArgB, + typename ArgC, + typename ArgD> + void ScheduleAndForget(SchedulePriority priority, + BackendFunc func, // Function to call on backend. + const ArgA& a, + const ArgB& b, + const ArgC& c, + const ArgD& d) { + DCHECK(thread_) << "History service being called after cleanup"; + LoadBackendIfNecessary(); + ScheduleTask(priority, NewRunnableMethod(history_backend_.get(), func, + a, b, c, d)); + } + + NotificationRegistrar registrar_; + + // Some void primitives require some internal processing in the main thread + // when done. We use this internal consumer for this purpose. + CancelableRequestConsumer internal_consumer_; + + // The thread used by the history service to run complicated operations + base::Thread* thread_; + + // This class has most of the implementation and runs on the 'thread_'. + // You MUST communicate with this class ONLY through the thread_'s + // message_loop(). + // + // This pointer will be NULL once Cleanup() has been called, meaning no + // more calls should be made to the history thread. + scoped_refptr<history::HistoryBackend> history_backend_; + + // A cache of the user-typed URLs kept in memory that is used by the + // autocomplete system. This will be NULL until the database has been created + // on the background thread. + scoped_ptr<history::InMemoryHistoryBackend> in_memory_backend_; + + // The profile, may be null when testing. + Profile* profile_; + + // Has the backend finished loading? The backend is loaded once Init has + // completed. + bool backend_loaded_; + + // Cached values from Init(), used whenever we need to reload the backend. + FilePath history_dir_; + BookmarkService* bookmark_service_; + bool no_db_; + + DISALLOW_COPY_AND_ASSIGN(HistoryService); +}; + +#endif // CHROME_BROWSER_HISTORY_HISTORY_H_ diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc new file mode 100644 index 0000000..0f512db --- /dev/null +++ b/chrome/browser/history/history_backend.cc @@ -0,0 +1,2164 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/history_backend.h" + +#include <set> + +#include "base/command_line.h" +#include "base/compiler_specific.h" +#include "base/file_util.h" +#include "base/histogram.h" +#include "base/message_loop.h" +#include "base/scoped_ptr.h" +#include "base/scoped_vector.h" +#include "base/string_util.h" +#include "base/time.h" +#include "chrome/browser/autocomplete/history_url_provider.h" +#include "chrome/browser/bookmarks/bookmark_service.h" +#include "chrome/browser/history/download_types.h" +#include "chrome/browser/history/history_notifications.h" +#include "chrome/browser/history/history_publisher.h" +#include "chrome/browser/history/in_memory_history_backend.h" +#include "chrome/browser/history/page_usage_data.h" +#include "chrome/common/chrome_constants.h" +#include "chrome/common/chrome_switches.h" +#include "chrome/common/notification_type.h" +#include "chrome/common/url_constants.h" +#include "googleurl/src/gurl.h" +#include "grit/chromium_strings.h" +#include "grit/generated_resources.h" +#include "net/base/registry_controlled_domain.h" + +using base::Time; +using base::TimeDelta; +using base::TimeTicks; + +/* The HistoryBackend consists of a number of components: + + HistoryDatabase (stores past 3 months of history) + URLDatabase (stores a list of URLs) + DownloadDatabase (stores a list of downloads) + VisitDatabase (stores a list of visits for the URLs) + VisitSegmentDatabase (stores groups of URLs for the most visited view). + + ArchivedDatabase (stores history older than 3 months) + URLDatabase (stores a list of URLs) + DownloadDatabase (stores a list of downloads) + VisitDatabase (stores a list of visits for the URLs) + + (this does not store visit segments as they expire after 3 mos.) + + TextDatabaseManager (manages multiple text database for different times) + TextDatabase (represents a single month of full-text index). + ...more TextDatabase objects... + + ExpireHistoryBackend (manages moving things from HistoryDatabase to + the ArchivedDatabase and deleting) +*/ + +namespace history { + +// How long we keep segment data for in days. Currently 3 months. +// This value needs to be greater or equal to +// MostVisitedModel::kMostVisitedScope but we don't want to introduce a direct +// dependency between MostVisitedModel and the history backend. +static const int kSegmentDataRetention = 90; + +// The number of milliseconds we'll wait to do a commit, so that things are +// batched together. +static const int kCommitIntervalMs = 10000; + +// The amount of time before we re-fetch the favicon. +static const int kFavIconRefetchDays = 7; + +// GetSessionTabs returns all open tabs, or tabs closed kSessionCloseTimeWindow +// seconds ago. +static const int kSessionCloseTimeWindowSecs = 10; + +// The maximum number of items we'll allow in the redirect list before +// deleting some. +static const int kMaxRedirectCount = 32; + +// The number of days old a history entry can be before it is considered "old" +// and is archived. +static const int kArchiveDaysThreshold = 90; + +// Converts from PageUsageData to MostVisitedURL. |redirects| is a +// list of redirects for this URL. Empty list means no redirects. +MostVisitedURL MakeMostVisitedURL(const PageUsageData& page_data, + const RedirectList& redirects) { + MostVisitedURL mv; + mv.url = page_data.GetURL(); + mv.title = page_data.GetTitle(); + if (redirects.empty()) { + // Redirects must contain at least the target url. + mv.redirects.push_back(mv.url); + } else { + mv.redirects = redirects; + if (mv.redirects[mv.redirects.size() - 1] != mv.url) { + // The last url must be the target url. + mv.redirects.push_back(mv.url); + } + } + return mv; +} + +// This task is run on a timer so that commits happen at regular intervals +// so they are batched together. The important thing about this class is that +// it supports canceling of the task so the reference to the backend will be +// freed. The problem is that when history is shutting down, there is likely +// to be one of these commits still pending and holding a reference. +// +// The backend can call Cancel to have this task release the reference. The +// task will still run (if we ever get to processing the event before +// shutdown), but it will not do anything. +// +// Note that this is a refcounted object and is not a task in itself. It should +// be assigned to a RunnableMethod. +// +// TODO(brettw): bug 1165182: This should be replaced with a +// ScopedRunnableMethodFactory which will handle everything automatically (like +// we do in ExpireHistoryBackend). +class CommitLaterTask : public base::RefCounted<CommitLaterTask> { + public: + explicit CommitLaterTask(HistoryBackend* history_backend) + : history_backend_(history_backend) { + } + + // The backend will call this function if it is being destroyed so that we + // release our reference. + void Cancel() { + history_backend_ = NULL; + } + + void RunCommit() { + if (history_backend_.get()) + history_backend_->Commit(); + } + + private: + friend class base::RefCounted<CommitLaterTask>; + + ~CommitLaterTask() {} + + scoped_refptr<HistoryBackend> history_backend_; +}; + +// Handles querying first the main database, then the full text database if that +// fails. It will optionally keep track of all URLs seen so duplicates can be +// eliminated. This is used by the querying sub-functions. +// +// TODO(brettw): This class may be able to be simplified or eliminated. After +// this was written, QueryResults can efficiently look up by URL, so the need +// for this extra set of previously queried URLs is less important. +class HistoryBackend::URLQuerier { + public: + URLQuerier(URLDatabase* main_db, URLDatabase* archived_db, bool track_unique) + : main_db_(main_db), + archived_db_(archived_db), + track_unique_(track_unique) { + } + + // When we're tracking unique URLs, returns true if this URL has been + // previously queried. Only call when tracking unique URLs. + bool HasURL(const GURL& url) { + DCHECK(track_unique_); + return unique_urls_.find(url) != unique_urls_.end(); + } + + bool GetRowForURL(const GURL& url, URLRow* row) { + if (!main_db_->GetRowForURL(url, row)) { + if (!archived_db_ || !archived_db_->GetRowForURL(url, row)) { + // This row is neither in the main nor the archived DB. + return false; + } + } + + if (track_unique_) + unique_urls_.insert(url); + return true; + } + + private: + URLDatabase* main_db_; // Guaranteed non-NULL. + URLDatabase* archived_db_; // Possibly NULL. + + bool track_unique_; + + // When track_unique_ is set, this is updated with every URL seen so far. + std::set<GURL> unique_urls_; + + DISALLOW_COPY_AND_ASSIGN(URLQuerier); +}; + +// HistoryBackend -------------------------------------------------------------- + +HistoryBackend::HistoryBackend(const FilePath& history_dir, + Delegate* delegate, + BookmarkService* bookmark_service) + : delegate_(delegate), + history_dir_(history_dir), + ALLOW_THIS_IN_INITIALIZER_LIST(expirer_(this, bookmark_service)), + recent_redirects_(kMaxRedirectCount), + backend_destroy_message_loop_(NULL), + backend_destroy_task_(NULL), + segment_queried_(false), + bookmark_service_(bookmark_service) { +} + +HistoryBackend::~HistoryBackend() { + DCHECK(!scheduled_commit_) << "Deleting without cleanup"; + ReleaseDBTasks(); + + // First close the databases before optionally running the "destroy" task. + if (db_.get()) { + // Commit the long-running transaction. + db_->CommitTransaction(); + db_.reset(); + } + if (thumbnail_db_.get()) { + thumbnail_db_->CommitTransaction(); + thumbnail_db_.reset(); + } + if (archived_db_.get()) { + archived_db_->CommitTransaction(); + archived_db_.reset(); + } + if (text_database_.get()) { + text_database_->CommitTransaction(); + text_database_.reset(); + } + + if (backend_destroy_task_) { + // Notify an interested party (typically a unit test) that we're done. + DCHECK(backend_destroy_message_loop_); + backend_destroy_message_loop_->PostTask(FROM_HERE, backend_destroy_task_); + } +} + +void HistoryBackend::Init(bool force_fail) { + if (!force_fail) + InitImpl(); + delegate_->DBLoaded(); +} + +void HistoryBackend::SetOnBackendDestroyTask(MessageLoop* message_loop, + Task* task) { + if (backend_destroy_task_) { + DLOG(WARNING) << "Setting more than one destroy task, overriding"; + delete backend_destroy_task_; + } + backend_destroy_message_loop_ = message_loop; + backend_destroy_task_ = task; +} + +void HistoryBackend::Closing() { + // Any scheduled commit will have a reference to us, we must make it + // release that reference before we can be destroyed. + CancelScheduledCommit(); + + // Release our reference to the delegate, this reference will be keeping the + // history service alive. + delegate_.reset(); +} + +void HistoryBackend::NotifyRenderProcessHostDestruction(const void* host) { + tracker_.NotifyRenderProcessHostDestruction(host); +} + +FilePath HistoryBackend::GetThumbnailFileName() const { + return history_dir_.Append(chrome::kThumbnailsFilename); +} + +FilePath HistoryBackend::GetFaviconsFileName() const { + return history_dir_.Append(chrome::kFaviconsFilename); +} + +FilePath HistoryBackend::GetArchivedFileName() const { + return history_dir_.Append(chrome::kArchivedHistoryFilename); +} + +SegmentID HistoryBackend::GetLastSegmentID(VisitID from_visit) { + // Set is used to detect referrer loops. Should not happen, but can + // if the database is corrupt. + std::set<VisitID> visit_set; + VisitID visit_id = from_visit; + while (visit_id) { + VisitRow row; + if (!db_->GetRowForVisit(visit_id, &row)) + return 0; + if (row.segment_id) + return row.segment_id; // Found a visit in this change with a segment. + + // Check the referrer of this visit, if any. + visit_id = row.referring_visit; + + if (visit_set.find(visit_id) != visit_set.end()) { + NOTREACHED() << "Loop in referer chain, giving up"; + break; + } + visit_set.insert(visit_id); + } + return 0; +} + +SegmentID HistoryBackend::UpdateSegments(const GURL& url, + VisitID from_visit, + VisitID visit_id, + PageTransition::Type transition_type, + const Time ts) { + if (!db_.get()) + return 0; + + // We only consider main frames. + if (!PageTransition::IsMainFrame(transition_type)) + return 0; + + SegmentID segment_id = 0; + PageTransition::Type t = PageTransition::StripQualifier(transition_type); + + // Are we at the beginning of a new segment? + if (t == PageTransition::TYPED || t == PageTransition::AUTO_BOOKMARK) { + // If so, create or get the segment. + std::string segment_name = db_->ComputeSegmentName(url); + URLID url_id = db_->GetRowForURL(url, NULL); + if (!url_id) + return 0; + + if (!(segment_id = db_->GetSegmentNamed(segment_name))) { + if (!(segment_id = db_->CreateSegment(url_id, segment_name))) { + NOTREACHED(); + return 0; + } + } else { + // Note: if we update an existing segment, we update the url used to + // represent that segment in order to minimize stale most visited + // images. + db_->UpdateSegmentRepresentationURL(segment_id, url_id); + } + } else { + // Note: it is possible there is no segment ID set for this visit chain. + // This can happen if the initial navigation wasn't AUTO_BOOKMARK or + // TYPED. (For example GENERATED). In this case this visit doesn't count + // toward any segment. + if (!(segment_id = GetLastSegmentID(from_visit))) + return 0; + } + + // Set the segment in the visit. + if (!db_->SetSegmentID(visit_id, segment_id)) { + NOTREACHED(); + return 0; + } + + // Finally, increase the counter for that segment / day. + if (!db_->IncreaseSegmentVisitCount(segment_id, ts, 1)) { + NOTREACHED(); + return 0; + } + return segment_id; +} + +void HistoryBackend::AddPage(scoped_refptr<HistoryAddPageArgs> request) { + DLOG(INFO) << "Adding page " << request->url.possibly_invalid_spec(); + + if (!db_.get()) + return; + + // Will be filled with the URL ID and the visit ID of the last addition. + std::pair<URLID, VisitID> last_ids(0, tracker_.GetLastVisit( + request->id_scope, request->page_id, request->referrer)); + + VisitID from_visit_id = last_ids.second; + + // If a redirect chain is given, we expect the last item in that chain to be + // the final URL. + DCHECK(request->redirects.size() == 0 || + request->redirects.back() == request->url); + + // Avoid duplicating times in the database, at least as long as pages are + // added in order. However, we don't want to disallow pages from recording + // times earlier than our last_recorded_time_, because someone might set + // their machine's clock back. + if (last_requested_time_ == request->time) { + last_recorded_time_ = last_recorded_time_ + TimeDelta::FromMicroseconds(1); + } else { + last_requested_time_ = request->time; + last_recorded_time_ = last_requested_time_; + } + + // If the user is adding older history, we need to make sure our times + // are correct. + if (request->time < first_recorded_time_) + first_recorded_time_ = request->time; + + PageTransition::Type transition = + PageTransition::StripQualifier(request->transition); + bool is_keyword_generated = (transition == PageTransition::KEYWORD_GENERATED); + + if (request->redirects.size() <= 1) { + // The single entry is both a chain start and end. + PageTransition::Type t = request->transition | + PageTransition::CHAIN_START | PageTransition::CHAIN_END; + + // No redirect case (one element means just the page itself). + last_ids = AddPageVisit(request->url, last_recorded_time_, + last_ids.second, t); + + // Update the segment for this visit. KEYWORD_GENERATED visits should not + // result in changing most visited, so we don't update segments (most + // visited db). + if (!is_keyword_generated) { + UpdateSegments(request->url, from_visit_id, last_ids.second, t, + last_recorded_time_); + } + } else { + // Redirect case. Add the redirect chain. + + PageTransition::Type redirect_info = PageTransition::CHAIN_START; + + if (request->redirects[0].SchemeIs(chrome::kAboutScheme)) { + // When the redirect source + referrer is "about" we skip it. This + // happens when a page opens a new frame/window to about:blank and then + // script sets the URL to somewhere else (used to hide the referrer). It + // would be nice to keep all these redirects properly but we don't ever + // see the initial about:blank load, so we don't know where the + // subsequent client redirect came from. + // + // In this case, we just don't bother hooking up the source of the + // redirects, so we remove it. + request->redirects.erase(request->redirects.begin()); + } else if (request->transition & PageTransition::CLIENT_REDIRECT) { + redirect_info = PageTransition::CLIENT_REDIRECT; + // The first entry in the redirect chain initiated a client redirect. + // We don't add this to the database since the referrer is already + // there, so we skip over it but change the transition type of the first + // transition to client redirect. + // + // The referrer is invalid when restoring a session that features an + // https tab that redirects to a different host or to http. In this + // case we don't need to reconnect the new redirect with the existing + // chain. + if (request->referrer.is_valid()) { + DCHECK(request->referrer == request->redirects[0]); + request->redirects.erase(request->redirects.begin()); + + // If the navigation entry for this visit has replaced that for the + // first visit, remove the CHAIN_END marker from the first visit. This + // can be called a lot, for example, the page cycler, and most of the + // time we won't have changed anything. + VisitRow visit_row; + if (request->did_replace_entry && + db_->GetRowForVisit(last_ids.second, &visit_row) && + visit_row.transition | PageTransition::CHAIN_END) { + visit_row.transition &= ~PageTransition::CHAIN_END; + db_->UpdateVisitRow(visit_row); + } + } + } + + for (size_t redirect_index = 0; redirect_index < request->redirects.size(); + redirect_index++) { + PageTransition::Type t = transition | redirect_info; + + // If this is the last transition, add a CHAIN_END marker + if (redirect_index == (request->redirects.size() - 1)) + t = t | PageTransition::CHAIN_END; + + // Record all redirect visits with the same timestamp. We don't display + // them anyway, and if we ever decide to, we can reconstruct their order + // from the redirect chain. + last_ids = AddPageVisit(request->redirects[redirect_index], + last_recorded_time_, last_ids.second, t); + if (t & PageTransition::CHAIN_START) { + // Update the segment for this visit. + UpdateSegments(request->redirects[redirect_index], + from_visit_id, last_ids.second, t, last_recorded_time_); + } + + // Subsequent transitions in the redirect list must all be sever + // redirects. + redirect_info = PageTransition::SERVER_REDIRECT; + } + + // Last, save this redirect chain for later so we can set titles & favicons + // on the redirected pages properly. It is indexed by the destination page. + recent_redirects_.Put(request->url, request->redirects); + } + + // TODO(brettw) bug 1140015: Add an "add page" notification so the history + // views can keep in sync. + + // Add the last visit to the tracker so we can get outgoing transitions. + // TODO(evanm): Due to http://b/1194536 we lose the referrers of a subframe + // navigation anyway, so last_visit_id is always zero for them. But adding + // them here confuses main frame history, so we skip them for now. + if (transition != PageTransition::AUTO_SUBFRAME && + transition != PageTransition::MANUAL_SUBFRAME && !is_keyword_generated) { + tracker_.AddVisit(request->id_scope, request->page_id, request->url, + last_ids.second); + } + + if (text_database_.get()) { + text_database_->AddPageURL(request->url, last_ids.first, last_ids.second, + last_recorded_time_); + } + + ScheduleCommit(); +} + +void HistoryBackend::InitImpl() { + DCHECK(!db_.get()) << "Initializing HistoryBackend twice"; + // In the rare case where the db fails to initialize a dialog may get shown + // the blocks the caller, yet allows other messages through. For this reason + // we only set db_ to the created database if creation is successful. That + // way other methods won't do anything as db_ is still NULL. + + TimeTicks beginning_time = TimeTicks::Now(); + + // Compute the file names. Note that the index file can be removed when the + // text db manager is finished being hooked up. + FilePath history_name = history_dir_.Append(chrome::kHistoryFilename); + FilePath thumbnail_name = GetThumbnailFileName(); + FilePath archived_name = GetArchivedFileName(); + FilePath tmp_bookmarks_file = history_dir_.Append( + chrome::kHistoryBookmarksFileName); + + // History database. + db_.reset(new HistoryDatabase()); + switch (db_->Init(history_name, tmp_bookmarks_file)) { + case sql::INIT_OK: + break; + case sql::INIT_FAILURE: + // A NULL db_ will cause all calls on this object to notice this error + // and to not continue. + delegate_->NotifyProfileError(IDS_COULDNT_OPEN_PROFILE_ERROR); + db_.reset(); + return; + case sql::INIT_TOO_NEW: + delegate_->NotifyProfileError(IDS_PROFILE_TOO_NEW_ERROR); + db_.reset(); + return; + default: + NOTREACHED(); + } + + // Fill the in-memory database and send it back to the history service on the + // main thread. + InMemoryHistoryBackend* mem_backend = new InMemoryHistoryBackend; + if (mem_backend->Init(history_name)) + delegate_->SetInMemoryBackend(mem_backend); // Takes ownership of pointer. + else + delete mem_backend; // Error case, run without the in-memory DB. + db_->BeginExclusiveMode(); // Must be after the mem backend read the data. + + // Create the history publisher which needs to be passed on to the text and + // thumbnail databases for publishing history. + history_publisher_.reset(new HistoryPublisher()); + if (!history_publisher_->Init()) { + // The init may fail when there are no indexers wanting our history. + // Hence no need to log the failure. + history_publisher_.reset(); + } + + // Full-text database. This has to be first so we can pass it to the + // HistoryDatabase for migration. + text_database_.reset(new TextDatabaseManager(history_dir_, + db_.get(), db_.get())); + if (!text_database_->Init(history_publisher_.get())) { + LOG(WARNING) << "Text database initialization failed, running without it."; + text_database_.reset(); + } + if (db_->needs_version_17_migration()) { + // See needs_version_17_migration() decl for more. In this case, we want + // to erase all the text database files. This must be done after the text + // database manager has been initialized, since it knows about all the + // files it manages. + text_database_->DeleteAll(); + } + + // Thumbnail database. + thumbnail_db_.reset(new ThumbnailDatabase()); + if (CommandLine::ForCurrentProcess()->HasSwitch(switches::kTopSites)) { + if (!db_->needs_version_18_migration()) { + // No convertion needed - use new filename right away. + thumbnail_name = GetFaviconsFileName(); + } + } + if (thumbnail_db_->Init(thumbnail_name, + history_publisher_.get()) != sql::INIT_OK) { + // Unlike the main database, we don't error out when the database is too + // new because this error is much less severe. Generally, this shouldn't + // happen since the thumbnail and main datbase versions should be in sync. + // We'll just continue without thumbnails & favicons in this case or any + // other error. + LOG(WARNING) << "Could not initialize the thumbnail database."; + thumbnail_db_.reset(); + } + + if (CommandLine::ForCurrentProcess()->HasSwitch(switches::kTopSites)) { + if (db_->needs_version_18_migration()) { + LOG(INFO) << "Starting TopSites migration"; + delegate_->StartTopSitesMigration(); + } + } + + // Archived database. + if (db_->needs_version_17_migration()) { + // See needs_version_17_migration() decl for more. In this case, we want + // to delete the archived database and need to do so before we try to + // open the file. We can ignore any error (maybe the file doesn't exist). + file_util::Delete(archived_name, false); + } + archived_db_.reset(new ArchivedDatabase()); + if (!archived_db_->Init(archived_name)) { + LOG(WARNING) << "Could not initialize the archived database."; + archived_db_.reset(); + } + + // Tell the expiration module about all the nice databases we made. This must + // happen before db_->Init() is called since the callback ForceArchiveHistory + // may need to expire stuff. + // + // *sigh*, this can all be cleaned up when that migration code is removed. + // The main DB initialization should intuitively be first (not that it + // actually matters) and the expirer should be set last. + expirer_.SetDatabases(db_.get(), archived_db_.get(), + thumbnail_db_.get(), text_database_.get()); + + // Open the long-running transaction. + db_->BeginTransaction(); + if (thumbnail_db_.get()) + thumbnail_db_->BeginTransaction(); + if (archived_db_.get()) + archived_db_->BeginTransaction(); + if (text_database_.get()) + text_database_->BeginTransaction(); + + // Get the first item in our database. + db_->GetStartDate(&first_recorded_time_); + + // Start expiring old stuff. + expirer_.StartArchivingOldStuff(TimeDelta::FromDays(kArchiveDaysThreshold)); + + HISTOGRAM_TIMES("History.InitTime", + TimeTicks::Now() - beginning_time); +} + +std::pair<URLID, VisitID> HistoryBackend::AddPageVisit( + const GURL& url, + Time time, + VisitID referring_visit, + PageTransition::Type transition) { + // Top-level frame navigations are visible, everything else is hidden + bool new_hidden = !PageTransition::IsMainFrame(transition); + + // NOTE: This code must stay in sync with + // ExpireHistoryBackend::ExpireURLsForVisits(). + // TODO(pkasting): http://b/1148304 We shouldn't be marking so many URLs as + // typed, which would eliminate the need for this code. + int typed_increment = 0; + PageTransition::Type transition_type = + PageTransition::StripQualifier(transition); + if ((transition_type == PageTransition::TYPED && + !PageTransition::IsRedirect(transition)) || + transition_type == PageTransition::KEYWORD_GENERATED) + typed_increment = 1; + + // See if this URL is already in the DB. + URLRow url_info(url); + URLID url_id = db_->GetRowForURL(url, &url_info); + if (url_id) { + // Update of an existing row. + if (PageTransition::StripQualifier(transition) != PageTransition::RELOAD) + url_info.set_visit_count(url_info.visit_count() + 1); + if (typed_increment) + url_info.set_typed_count(url_info.typed_count() + typed_increment); + url_info.set_last_visit(time); + + // Only allow un-hiding of pages, never hiding. + if (!new_hidden) + url_info.set_hidden(false); + + db_->UpdateURLRow(url_id, url_info); + } else { + // Addition of a new row. + url_info.set_visit_count(1); + url_info.set_typed_count(typed_increment); + url_info.set_last_visit(time); + url_info.set_hidden(new_hidden); + + url_id = db_->AddURL(url_info); + if (!url_id) { + NOTREACHED() << "Adding URL failed."; + return std::make_pair(0, 0); + } + url_info.id_ = url_id; + + // We don't actually add the URL to the full text index at this point. It + // might be nice to do this so that even if we get no title or body, the + // user can search for URL components and get the page. + // + // However, in most cases, we'll get at least a title and usually contents, + // and this add will be redundant, slowing everything down. As a result, + // we ignore this edge case. + } + + // Add the visit with the time to the database. + VisitRow visit_info(url_id, time, referring_visit, transition, 0); + VisitID visit_id = db_->AddVisit(&visit_info); + + if (visit_info.visit_time < first_recorded_time_) + first_recorded_time_ = visit_info.visit_time; + + // Broadcast a notification of the visit. + if (visit_id) { + URLVisitedDetails* details = new URLVisitedDetails; + details->transition = transition; + details->row = url_info; + // TODO(meelapshah) Disabled due to potential PageCycler regression. + // Re-enable this. + // GetMostRecentRedirectsTo(url, &details->redirects); + BroadcastNotifications(NotificationType::HISTORY_URL_VISITED, details); + } + + return std::make_pair(url_id, visit_id); +} + +void HistoryBackend::AddPagesWithDetails(const std::vector<URLRow>& urls) { + if (!db_.get()) + return; + + scoped_ptr<URLsModifiedDetails> modified(new URLsModifiedDetails); + for (std::vector<URLRow>::const_iterator i = urls.begin(); + i != urls.end(); ++i) { + DCHECK(!i->last_visit().is_null()); + + // We will add to either the archived database or the main one depending on + // the date of the added visit. + URLDatabase* url_database; + VisitDatabase* visit_database; + if (i->last_visit() < expirer_.GetCurrentArchiveTime()) { + if (!archived_db_.get()) + return; // No archived database to save it to, just forget this. + url_database = archived_db_.get(); + visit_database = archived_db_.get(); + } else { + url_database = db_.get(); + visit_database = db_.get(); + } + + URLRow existing_url; + URLID url_id = url_database->GetRowForURL(i->url(), &existing_url); + if (!url_id) { + // Add the page if it doesn't exist. + url_id = url_database->AddURL(*i); + if (!url_id) { + NOTREACHED() << "Could not add row to DB"; + return; + } + + if (i->typed_count() > 0) + modified->changed_urls.push_back(*i); + } + + // Add the page to the full text index. This function is also used for + // importing. Even though we don't have page contents, we can at least + // add the title and URL to the index so they can be searched. We don't + // bother to delete any already-existing FTS entries for the URL, since + // this is normally called on import. + // + // If you ever import *after* first run (selecting import from the menu), + // then these additional entries will "shadow" the originals when querying + // for the most recent match only, and the user won't get snippets. This is + // a very minor issue, and fixing it will make import slower, so we don't + // bother. + bool has_indexed = false; + if (text_database_.get()) { + // We do not have to make it update the visit database, below, we will + // create the visit entry with the indexed flag set. + has_indexed = text_database_->AddPageData(i->url(), url_id, 0, + i->last_visit(), + i->title(), string16()); + } + + // Make up a visit to correspond to that page. + VisitRow visit_info(url_id, i->last_visit(), 0, + PageTransition::LINK | PageTransition::CHAIN_START | + PageTransition::CHAIN_END, 0); + visit_info.is_indexed = has_indexed; + if (!visit_database->AddVisit(&visit_info)) { + NOTREACHED() << "Adding visit failed."; + return; + } + + if (visit_info.visit_time < first_recorded_time_) + first_recorded_time_ = visit_info.visit_time; + } + + // Broadcast a notification for typed URLs that have been modified. This + // will be picked up by the in-memory URL database on the main thread. + // + // TODO(brettw) bug 1140015: Add an "add page" notification so the history + // views can keep in sync. + BroadcastNotifications(NotificationType::HISTORY_TYPED_URLS_MODIFIED, + modified.release()); + + ScheduleCommit(); +} + +void HistoryBackend::SetPageTitle(const GURL& url, + const string16& title) { + if (!db_.get()) + return; + + // Search for recent redirects which should get the same title. We make a + // dummy list containing the exact URL visited if there are no redirects so + // the processing below can be the same. + history::RedirectList dummy_list; + history::RedirectList* redirects; + RedirectCache::iterator iter = recent_redirects_.Get(url); + if (iter != recent_redirects_.end()) { + redirects = &iter->second; + + // This redirect chain should have the destination URL as the last item. + DCHECK(!redirects->empty()); + DCHECK(redirects->back() == url); + } else { + // No redirect chain stored, make up one containing the URL we want so we + // can use the same logic below. + dummy_list.push_back(url); + redirects = &dummy_list; + } + + bool typed_url_changed = false; + std::vector<URLRow> changed_urls; + for (size_t i = 0; i < redirects->size(); i++) { + URLRow row; + URLID row_id = db_->GetRowForURL(redirects->at(i), &row); + if (row_id && row.title() != title) { + row.set_title(title); + db_->UpdateURLRow(row_id, row); + changed_urls.push_back(row); + if (row.typed_count() > 0) + typed_url_changed = true; + } + } + + // Broadcast notifications for typed URLs that have changed. This will + // update the in-memory database. + // + // TODO(brettw) bug 1140020: Broadcast for all changes (not just typed), + // in which case some logic can be removed. + if (typed_url_changed) { + URLsModifiedDetails* modified = + new URLsModifiedDetails; + for (size_t i = 0; i < changed_urls.size(); i++) { + if (changed_urls[i].typed_count() > 0) + modified->changed_urls.push_back(changed_urls[i]); + } + BroadcastNotifications(NotificationType::HISTORY_TYPED_URLS_MODIFIED, + modified); + } + + // Update the full text index. + if (text_database_.get()) + text_database_->AddPageTitle(url, title); + + // Only bother committing if things changed. + if (!changed_urls.empty()) + ScheduleCommit(); +} + +void HistoryBackend::IterateURLs(HistoryService::URLEnumerator* iterator) { + if (db_.get()) { + HistoryDatabase::URLEnumerator e; + if (db_->InitURLEnumeratorForEverything(&e)) { + URLRow info; + while (e.GetNextURL(&info)) { + iterator->OnURL(info.url()); + } + iterator->OnComplete(true); // Success. + return; + } + } + iterator->OnComplete(false); // Failure. +} + +bool HistoryBackend::GetAllTypedURLs(std::vector<history::URLRow>* urls) { + if (db_.get()) + return db_->GetAllTypedUrls(urls); + return false; +} + +bool HistoryBackend::GetVisitsForURL(URLID id, VisitVector* visits) { + if (db_.get()) + return db_->GetVisitsForURL(id, visits); + return false; +} + +bool HistoryBackend::UpdateURL(URLID id, const history::URLRow& url) { + if (db_.get()) + return db_->UpdateURLRow(id, url); + return false; +} + +bool HistoryBackend::AddVisits(const GURL& url, + const std::vector<base::Time>& visits) { + if (db_.get()) { + for (std::vector<base::Time>::const_iterator visit = visits.begin(); + visit != visits.end(); ++visit) { + if (!AddPageVisit(url, *visit, 0, 0).first) { + return false; + } + } + ScheduleCommit(); + return true; + } + return false; +} + +bool HistoryBackend::RemoveVisits(const VisitVector& visits) { + if (db_.get()) { + std::map<URLID, int> url_visits_removed; + for (VisitVector::const_iterator visit = visits.begin(); + visit != visits.end(); ++visit) { + db_->DeleteVisit(*visit); + std::map<URLID, int>::iterator visit_count = + url_visits_removed.find(visit->url_id); + if (visit_count == url_visits_removed.end()) { + url_visits_removed[visit->url_id] = 1; + } else { + ++visit_count->second; + } + } + for (std::map<URLID, int>::iterator count = url_visits_removed.begin(); + count != url_visits_removed.end(); ++count) { + history::URLRow url_row; + if (!db_->GetURLRow(count->first, &url_row)) { + return false; + } + DCHECK(count->second <= url_row.visit_count()); + url_row.set_visit_count(url_row.visit_count() - count->second); + if (!db_->UpdateURLRow(url_row.id(), url_row)) { + return false; + } + } + ScheduleCommit(); + return true; + } + return false; +} + +bool HistoryBackend::GetURL(const GURL& url, history::URLRow* url_row) { + if (db_.get()) + return db_->GetRowForURL(url, url_row) != 0; + return false; +} + +void HistoryBackend::QueryURL(scoped_refptr<QueryURLRequest> request, + const GURL& url, + bool want_visits) { + if (request->canceled()) + return; + + bool success = false; + URLRow* row = &request->value.a; + VisitVector* visits = &request->value.b; + if (db_.get()) { + if (db_->GetRowForURL(url, row)) { + // Have a row. + success = true; + + // Optionally query the visits. + if (want_visits) + db_->GetVisitsForURL(row->id(), visits); + } + } + request->ForwardResult(QueryURLRequest::TupleType(request->handle(), success, + row, visits)); +} + +// Segment usage --------------------------------------------------------------- + +void HistoryBackend::DeleteOldSegmentData() { + if (db_.get()) + db_->DeleteSegmentData(Time::Now() - + TimeDelta::FromDays(kSegmentDataRetention)); +} + +void HistoryBackend::SetSegmentPresentationIndex(SegmentID segment_id, + int index) { + if (db_.get()) + db_->SetSegmentPresentationIndex(segment_id, index); +} + +void HistoryBackend::QuerySegmentUsage( + scoped_refptr<QuerySegmentUsageRequest> request, + const Time from_time, + int max_result_count) { + if (request->canceled()) + return; + + if (db_.get()) { + db_->QuerySegmentUsage(from_time, max_result_count, &request->value.get()); + + // If this is the first time we query segments, invoke + // DeleteOldSegmentData asynchronously. We do this to cleanup old + // entries. + if (!segment_queried_) { + segment_queried_ = true; + MessageLoop::current()->PostTask(FROM_HERE, + NewRunnableMethod(this, &HistoryBackend::DeleteOldSegmentData)); + } + } + request->ForwardResult( + QuerySegmentUsageRequest::TupleType(request->handle(), + &request->value.get())); +} + +// Keyword visits -------------------------------------------------------------- + +void HistoryBackend::SetKeywordSearchTermsForURL(const GURL& url, + TemplateURL::IDType keyword_id, + const string16& term) { + if (!db_.get()) + return; + + // Get the ID for this URL. + URLRow url_row; + if (!db_->GetRowForURL(url, &url_row)) { + // There is a small possibility the url was deleted before the keyword + // was added. Ignore the request. + return; + } + + db_->SetKeywordSearchTermsForURL(url_row.id(), keyword_id, term); + ScheduleCommit(); +} + +void HistoryBackend::DeleteAllSearchTermsForKeyword( + TemplateURL::IDType keyword_id) { + if (!db_.get()) + return; + + db_->DeleteAllSearchTermsForKeyword(keyword_id); + // TODO(sky): bug 1168470. Need to move from archive dbs too. + ScheduleCommit(); +} + +void HistoryBackend::GetMostRecentKeywordSearchTerms( + scoped_refptr<GetMostRecentKeywordSearchTermsRequest> request, + TemplateURL::IDType keyword_id, + const string16& prefix, + int max_count) { + if (request->canceled()) + return; + + if (db_.get()) { + db_->GetMostRecentKeywordSearchTerms(keyword_id, prefix, max_count, + &(request->value)); + } + request->ForwardResult( + GetMostRecentKeywordSearchTermsRequest::TupleType(request->handle(), + &request->value)); +} + +// Downloads ------------------------------------------------------------------- + +// Get all the download entries from the database. +void HistoryBackend::QueryDownloads( + scoped_refptr<DownloadQueryRequest> request) { + if (request->canceled()) + return; + if (db_.get()) + db_->QueryDownloads(&request->value); + request->ForwardResult(DownloadQueryRequest::TupleType(&request->value)); +} + +// Clean up entries that has been corrupted (because of the crash, for example). +void HistoryBackend::CleanUpInProgressEntries() { + if (db_.get()) { + // If some "in progress" entries were not updated when Chrome exited, they + // need to be cleaned up. + db_->CleanUpInProgressEntries(); + } +} + +// Update a particular download entry. +void HistoryBackend::UpdateDownload(int64 received_bytes, + int32 state, + int64 db_handle) { + if (db_.get()) + db_->UpdateDownload(received_bytes, state, db_handle); +} + +// Update the path of a particular download entry. +void HistoryBackend::UpdateDownloadPath(const FilePath& path, + int64 db_handle) { + if (db_.get()) + db_->UpdateDownloadPath(path, db_handle); +} + +// Create a new download entry and pass back the db_handle to it. +void HistoryBackend::CreateDownload( + scoped_refptr<DownloadCreateRequest> request, + const DownloadCreateInfo& create_info) { + int64 db_handle = 0; + if (!request->canceled()) { + if (db_.get()) + db_handle = db_->CreateDownload(create_info); + request->ForwardResult(DownloadCreateRequest::TupleType(create_info, + db_handle)); + } +} + +void HistoryBackend::RemoveDownload(int64 db_handle) { + if (db_.get()) + db_->RemoveDownload(db_handle); +} + +void HistoryBackend::RemoveDownloadsBetween(const Time remove_begin, + const Time remove_end) { + if (db_.get()) + db_->RemoveDownloadsBetween(remove_begin, remove_end); +} + +void HistoryBackend::SearchDownloads( + scoped_refptr<DownloadSearchRequest> request, + const string16& search_text) { + if (request->canceled()) + return; + if (db_.get()) + db_->SearchDownloads(&request->value, search_text); + request->ForwardResult(DownloadSearchRequest::TupleType(request->handle(), + &request->value)); +} + +void HistoryBackend::QueryHistory(scoped_refptr<QueryHistoryRequest> request, + const string16& text_query, + const QueryOptions& options) { + if (request->canceled()) + return; + + TimeTicks beginning_time = TimeTicks::Now(); + + if (db_.get()) { + if (text_query.empty()) { + // Basic history query for the main database. + QueryHistoryBasic(db_.get(), db_.get(), options, &request->value); + + // Now query the archived database. This is a bit tricky because we don't + // want to query it if the queried time range isn't going to find anything + // in it. + // TODO(brettw) bug 1171036: do blimpie querying for the archived database + // as well. + // if (archived_db_.get() && + // expirer_.GetCurrentArchiveTime() - TimeDelta::FromDays(7)) { + } else { + // Full text history query. + QueryHistoryFTS(text_query, options, &request->value); + } + } + + request->ForwardResult(QueryHistoryRequest::TupleType(request->handle(), + &request->value)); + + UMA_HISTOGRAM_TIMES("History.QueryHistory", + TimeTicks::Now() - beginning_time); +} + +// Basic time-based querying of history. +void HistoryBackend::QueryHistoryBasic(URLDatabase* url_db, + VisitDatabase* visit_db, + const QueryOptions& options, + QueryResults* result) { + // First get all visits. + VisitVector visits; + visit_db->GetVisibleVisitsInRange(options.begin_time, options.end_time, + options.max_count, &visits); + DCHECK(options.max_count == 0 || + static_cast<int>(visits.size()) <= options.max_count); + + // Now add them and the URL rows to the results. + URLResult url_result; + for (size_t i = 0; i < visits.size(); i++) { + const VisitRow visit = visits[i]; + + // Add a result row for this visit, get the URL info from the DB. + if (!url_db->GetURLRow(visit.url_id, &url_result)) + continue; // DB out of sync and URL doesn't exist, try to recover. + if (!url_result.url().is_valid()) + continue; // Don't report invalid URLs in case of corruption. + + // The archived database may be out of sync with respect to starring, + // titles, last visit date, etc. Therefore, we query the main DB if the + // current URL database is not the main one. + if (url_db == db_.get()) { + // Currently querying the archived DB, update with the main database to + // catch any interesting stuff. This will update it if it exists in the + // main DB, and do nothing otherwise. + db_->GetRowForURL(url_result.url(), &url_result); + } + + url_result.set_visit_time(visit.visit_time); + + // We don't set any of the query-specific parts of the URLResult, since + // snippets and stuff don't apply to basic querying. + result->AppendURLBySwapping(&url_result); + } + + if (options.begin_time <= first_recorded_time_) + result->set_reached_beginning(true); +} + +void HistoryBackend::QueryHistoryFTS(const string16& text_query, + const QueryOptions& options, + QueryResults* result) { + if (!text_database_.get()) + return; + + // Full text query, first get all the FTS results in the time range. + std::vector<TextDatabase::Match> fts_matches; + Time first_time_searched; + text_database_->GetTextMatches(text_query, options, + &fts_matches, &first_time_searched); + + URLQuerier querier(db_.get(), archived_db_.get(), true); + + // Now get the row and visit information for each one. + URLResult url_result; // Declare outside loop to prevent re-construction. + for (size_t i = 0; i < fts_matches.size(); i++) { + if (options.max_count != 0 && + static_cast<int>(result->size()) >= options.max_count) + break; // Got too many items. + + // Get the URL, querying the main and archived databases as necessary. If + // this is not found, the history and full text search databases are out + // of sync and we give up with this result. + if (!querier.GetRowForURL(fts_matches[i].url, &url_result)) + continue; + + if (!url_result.url().is_valid()) + continue; // Don't report invalid URLs in case of corruption. + + // Copy over the FTS stuff that the URLDatabase doesn't know about. + // We do this with swap() to avoid copying, since we know we don't + // need the original any more. Note that we override the title with the + // one from FTS, since that will match the title_match_positions (the + // FTS title and the history DB title may differ). + url_result.set_title(fts_matches[i].title); + url_result.title_match_positions_.swap( + fts_matches[i].title_match_positions); + url_result.snippet_.Swap(&fts_matches[i].snippet); + + // The visit time also comes from the full text search database. Since it + // has the time, we can avoid an extra query of the visits table. + url_result.set_visit_time(fts_matches[i].time); + + // Add it to the vector, this will clear our |url_row| object as a + // result of the swap. + result->AppendURLBySwapping(&url_result); + } + + if (options.begin_time <= first_recorded_time_) + result->set_reached_beginning(true); +} + +// Frontend to GetMostRecentRedirectsFrom from the history thread. +void HistoryBackend::QueryRedirectsFrom( + scoped_refptr<QueryRedirectsRequest> request, + const GURL& url) { + if (request->canceled()) + return; + bool success = GetMostRecentRedirectsFrom(url, &request->value); + request->ForwardResult(QueryRedirectsRequest::TupleType( + request->handle(), url, success, &request->value)); +} + +void HistoryBackend::QueryRedirectsTo( + scoped_refptr<QueryRedirectsRequest> request, + const GURL& url) { + if (request->canceled()) + return; + bool success = GetMostRecentRedirectsTo(url, &request->value); + request->ForwardResult(QueryRedirectsRequest::TupleType( + request->handle(), url, success, &request->value)); +} + +void HistoryBackend::GetVisitCountToHost( + scoped_refptr<GetVisitCountToHostRequest> request, + const GURL& url) { + if (request->canceled()) + return; + int count = 0; + Time first_visit; + const bool success = (db_.get() && db_->GetVisitCountToHost(url, &count, + &first_visit)); + request->ForwardResult(GetVisitCountToHostRequest::TupleType( + request->handle(), success, count, first_visit)); +} + +void HistoryBackend::QueryTopURLsAndRedirects( + scoped_refptr<QueryTopURLsAndRedirectsRequest> request, + int result_count) { + if (request->canceled()) + return; + + if (!db_.get()) { + request->ForwardResult(QueryTopURLsAndRedirectsRequest::TupleType( + request->handle(), false, NULL, NULL)); + return; + } + + std::vector<GURL>* top_urls = &request->value.a; + history::RedirectMap* redirects = &request->value.b; + + ScopedVector<PageUsageData> data; + db_->QuerySegmentUsage(base::Time::Now() - base::TimeDelta::FromDays(90), + result_count, &data.get()); + + for (size_t i = 0; i < data.size(); ++i) { + top_urls->push_back(data[i]->GetURL()); + RefCountedVector<GURL>* list = new RefCountedVector<GURL>; + GetMostRecentRedirectsFrom(top_urls->back(), &list->data); + (*redirects)[top_urls->back()] = list; + } + + request->ForwardResult(QueryTopURLsAndRedirectsRequest::TupleType( + request->handle(), true, top_urls, redirects)); +} + +// Will replace QueryTopURLsAndRedirectsRequest. +void HistoryBackend::QueryMostVisitedURLs( + scoped_refptr<QueryMostVisitedURLsRequest> request, + int result_count, + int days_back) { + if (request->canceled()) + return; + + if (!db_.get()) { + // No History Database - return an empty list. + request->ForwardResult(QueryMostVisitedURLsRequest::TupleType( + request->handle(), MostVisitedURLList())); + return; + } + + MostVisitedURLList* result = &request->value; + + ScopedVector<PageUsageData> data; + db_->QuerySegmentUsage(base::Time::Now() - + base::TimeDelta::FromDays(days_back), + result_count, &data.get()); + + for (size_t i = 0; i < data.size(); ++i) { + PageUsageData* current_data = data[i]; + RedirectList redirects; + GetMostRecentRedirectsFrom(current_data->GetURL(), &redirects); + MostVisitedURL url = MakeMostVisitedURL(*current_data, redirects); + result->push_back(url); + } + + request->ForwardResult(QueryMostVisitedURLsRequest::TupleType( + request->handle(), *result)); +} + +void HistoryBackend::GetRedirectsFromSpecificVisit( + VisitID cur_visit, history::RedirectList* redirects) { + // Follow any redirects from the given visit and add them to the list. + // It *should* be impossible to get a circular chain here, but we check + // just in case to avoid infinite loops. + GURL cur_url; + std::set<VisitID> visit_set; + visit_set.insert(cur_visit); + while (db_->GetRedirectFromVisit(cur_visit, &cur_visit, &cur_url)) { + if (visit_set.find(cur_visit) != visit_set.end()) { + NOTREACHED() << "Loop in visit chain, giving up"; + return; + } + visit_set.insert(cur_visit); + redirects->push_back(cur_url); + } +} + +void HistoryBackend::GetRedirectsToSpecificVisit( + VisitID cur_visit, + history::RedirectList* redirects) { + // Follow redirects going to cur_visit. These are added to |redirects| in + // the order they are found. If a redirect chain looks like A -> B -> C and + // |cur_visit| = C, redirects will be {B, A} in that order. + if (!db_.get()) + return; + + GURL cur_url; + std::set<VisitID> visit_set; + visit_set.insert(cur_visit); + while (db_->GetRedirectToVisit(cur_visit, &cur_visit, &cur_url)) { + if (visit_set.find(cur_visit) != visit_set.end()) { + NOTREACHED() << "Loop in visit chain, giving up"; + return; + } + visit_set.insert(cur_visit); + redirects->push_back(cur_url); + } +} + +bool HistoryBackend::GetMostRecentRedirectsFrom( + const GURL& from_url, + history::RedirectList* redirects) { + redirects->clear(); + if (!db_.get()) + return false; + + URLID from_url_id = db_->GetRowForURL(from_url, NULL); + VisitID cur_visit = db_->GetMostRecentVisitForURL(from_url_id, NULL); + if (!cur_visit) + return false; // No visits for URL. + + GetRedirectsFromSpecificVisit(cur_visit, redirects); + return true; +} + +bool HistoryBackend::GetMostRecentRedirectsTo( + const GURL& to_url, + history::RedirectList* redirects) { + redirects->clear(); + if (!db_.get()) + return false; + + URLID to_url_id = db_->GetRowForURL(to_url, NULL); + VisitID cur_visit = db_->GetMostRecentVisitForURL(to_url_id, NULL); + if (!cur_visit) + return false; // No visits for URL. + + GetRedirectsToSpecificVisit(cur_visit, redirects); + return true; +} + +void HistoryBackend::ScheduleAutocomplete(HistoryURLProvider* provider, + HistoryURLProviderParams* params) { + // ExecuteWithDB should handle the NULL database case. + provider->ExecuteWithDB(this, db_.get(), params); +} + +void HistoryBackend::SetPageContents(const GURL& url, + const string16& contents) { + // This is histogrammed in the text database manager. + if (!text_database_.get()) + return; + text_database_->AddPageContents(url, contents); +} + +void HistoryBackend::SetPageThumbnail( + const GURL& url, + const SkBitmap& thumbnail, + const ThumbnailScore& score) { + if (!db_.get() || !thumbnail_db_.get()) + return; + + URLRow url_row; + URLID url_id = db_->GetRowForURL(url, &url_row); + if (url_id) { + thumbnail_db_->SetPageThumbnail(url, url_id, thumbnail, score, + url_row.last_visit()); + } + + ScheduleCommit(); +} + +void HistoryBackend::GetPageThumbnail( + scoped_refptr<GetPageThumbnailRequest> request, + const GURL& page_url) { + if (request->canceled()) + return; + + scoped_refptr<RefCountedBytes> data; + GetPageThumbnailDirectly(page_url, &data); + + request->ForwardResult(GetPageThumbnailRequest::TupleType( + request->handle(), data)); +} + +void HistoryBackend::GetPageThumbnailDirectly( + const GURL& page_url, + scoped_refptr<RefCountedBytes>* data) { + if (thumbnail_db_.get()) { + *data = new RefCountedBytes; + + // Time the result. + TimeTicks beginning_time = TimeTicks::Now(); + + history::RedirectList redirects; + URLID url_id; + bool success = false; + + // If there are some redirects, try to get a thumbnail from the last + // redirect destination. + if (GetMostRecentRedirectsFrom(page_url, &redirects) && + !redirects.empty()) { + if ((url_id = db_->GetRowForURL(redirects.back(), NULL))) + success = thumbnail_db_->GetPageThumbnail(url_id, &(*data)->data); + } + + // If we don't have a thumbnail from redirects, try the URL directly. + if (!success) { + if ((url_id = db_->GetRowForURL(page_url, NULL))) + success = thumbnail_db_->GetPageThumbnail(url_id, &(*data)->data); + } + + // In this rare case, we start to mine the older redirect sessions + // from the visit table to try to find a thumbnail. + if (!success) { + success = GetThumbnailFromOlderRedirect(page_url, &(*data)->data); + } + + if (!success) + *data = NULL; // This will tell the callback there was an error. + + UMA_HISTOGRAM_TIMES("History.GetPageThumbnail", + TimeTicks::Now() - beginning_time); + } +} + +bool HistoryBackend::GetThumbnailFromOlderRedirect( + const GURL& page_url, + std::vector<unsigned char>* data) { + // Look at a few previous visit sessions. + VisitVector older_sessions; + URLID page_url_id = db_->GetRowForURL(page_url, NULL); + static const int kVisitsToSearchForThumbnail = 4; + db_->GetMostRecentVisitsForURL( + page_url_id, kVisitsToSearchForThumbnail, &older_sessions); + + // Iterate across all those previous visits, and see if any of the + // final destinations of those redirect chains have a good thumbnail + // for us. + bool success = false; + for (VisitVector::const_iterator it = older_sessions.begin(); + !success && it != older_sessions.end(); ++it) { + history::RedirectList redirects; + if (it->visit_id) { + GetRedirectsFromSpecificVisit(it->visit_id, &redirects); + + if (!redirects.empty()) { + URLID url_id; + if ((url_id = db_->GetRowForURL(redirects.back(), NULL))) + success = thumbnail_db_->GetPageThumbnail(url_id, data); + } + } + } + + return success; +} + +void HistoryBackend::GetFavIcon(scoped_refptr<GetFavIconRequest> request, + const GURL& icon_url) { + UpdateFavIconMappingAndFetchImpl(NULL, icon_url, request); +} + +void HistoryBackend::UpdateFavIconMappingAndFetch( + scoped_refptr<GetFavIconRequest> request, + const GURL& page_url, + const GURL& icon_url) { + UpdateFavIconMappingAndFetchImpl(&page_url, icon_url, request); +} + +void HistoryBackend::SetFavIconOutOfDateForPage(const GURL& page_url) { + if (!thumbnail_db_.get() || !db_.get()) + return; + + URLRow url_row; + URLID url_id = db_->GetRowForURL(page_url, &url_row); + if (!url_id || !url_row.favicon_id()) + return; + + thumbnail_db_->SetFavIconLastUpdateTime(url_row.favicon_id(), Time()); + ScheduleCommit(); +} + +void HistoryBackend::SetImportedFavicons( + const std::vector<ImportedFavIconUsage>& favicon_usage) { + if (!db_.get() || !thumbnail_db_.get()) + return; + + Time now = Time::Now(); + + // Track all URLs that had their favicons set or updated. + std::set<GURL> favicons_changed; + + for (size_t i = 0; i < favicon_usage.size(); i++) { + FavIconID favicon_id = thumbnail_db_->GetFavIconIDForFavIconURL( + favicon_usage[i].favicon_url); + if (!favicon_id) { + // This favicon doesn't exist yet, so we create it using the given data. + favicon_id = thumbnail_db_->AddFavIcon(favicon_usage[i].favicon_url); + if (!favicon_id) + continue; // Unable to add the favicon. + thumbnail_db_->SetFavIcon(favicon_id, + new RefCountedBytes(favicon_usage[i].png_data), now); + } + + // Save the mapping from all the URLs to the favicon. + BookmarkService* bookmark_service = GetBookmarkService(); + for (std::set<GURL>::const_iterator url = favicon_usage[i].urls.begin(); + url != favicon_usage[i].urls.end(); ++url) { + URLRow url_row; + if (!db_->GetRowForURL(*url, &url_row)) { + // If the URL is present as a bookmark, add the url in history to + // save the favicon mapping. This will match with what history db does + // for regular bookmarked URLs with favicons - when history db is + // cleaned, we keep an entry in the db with 0 visits as long as that + // url is bookmarked. + if (bookmark_service && bookmark_service_->IsBookmarked(*url)) { + URLRow url_info(*url); + url_info.set_visit_count(0); + url_info.set_typed_count(0); + url_info.set_last_visit(base::Time()); + url_info.set_hidden(false); + url_info.set_favicon_id(favicon_id); + db_->AddURL(url_info); + favicons_changed.insert(*url); + } + } else if (url_row.favicon_id() == 0) { + // URL is present in history, update the favicon *only* if it + // is not set already. + url_row.set_favicon_id(favicon_id); + db_->UpdateURLRow(url_row.id(), url_row); + favicons_changed.insert(*url); + } + } + } + + if (!favicons_changed.empty()) { + // Send the notification about the changed favicon URLs. + FavIconChangeDetails* changed_details = new FavIconChangeDetails; + changed_details->urls.swap(favicons_changed); + BroadcastNotifications(NotificationType::FAVICON_CHANGED, changed_details); + } +} + +void HistoryBackend::UpdateFavIconMappingAndFetchImpl( + const GURL* page_url, + const GURL& icon_url, + scoped_refptr<GetFavIconRequest> request) { + if (request->canceled()) + return; + + bool know_favicon = false; + bool expired = true; + scoped_refptr<RefCountedBytes> data; + + if (thumbnail_db_.get()) { + const FavIconID favicon_id = + thumbnail_db_->GetFavIconIDForFavIconURL(icon_url); + if (favicon_id) { + data = new RefCountedBytes; + know_favicon = true; + Time last_updated; + if (thumbnail_db_->GetFavIcon(favicon_id, &last_updated, &data->data, + NULL)) { + expired = (Time::Now() - last_updated) > + TimeDelta::FromDays(kFavIconRefetchDays); + } + + if (page_url) + SetFavIconMapping(*page_url, favicon_id); + } + // else case, haven't cached entry yet. Caller is responsible for + // downloading the favicon and invoking SetFavIcon. + } + request->ForwardResult(GetFavIconRequest::TupleType( + request->handle(), know_favicon, data, expired, + icon_url)); +} + +void HistoryBackend::GetFavIconForURL( + scoped_refptr<GetFavIconRequest> request, + const GURL& page_url) { + if (request->canceled()) + return; + + bool know_favicon = false; + bool expired = false; + GURL icon_url; + + scoped_refptr<RefCountedBytes> data; + + if (db_.get() && thumbnail_db_.get()) { + // Time the query. + TimeTicks beginning_time = TimeTicks::Now(); + + URLRow url_info; + data = new RefCountedBytes; + Time last_updated; + if (db_->GetRowForURL(page_url, &url_info) && url_info.favicon_id() && + thumbnail_db_->GetFavIcon(url_info.favicon_id(), &last_updated, + &data->data, &icon_url)) { + know_favicon = true; + expired = (Time::Now() - last_updated) > + TimeDelta::FromDays(kFavIconRefetchDays); + } + + UMA_HISTOGRAM_TIMES("History.GetFavIconForURL", + TimeTicks::Now() - beginning_time); + } + + request->ForwardResult( + GetFavIconRequest::TupleType(request->handle(), know_favicon, data, + expired, icon_url)); +} + +void HistoryBackend::SetFavIcon( + const GURL& page_url, + const GURL& icon_url, + scoped_refptr<RefCountedMemory> data) { + DCHECK(data.get()); + if (!thumbnail_db_.get() || !db_.get()) + return; + + FavIconID id = thumbnail_db_->GetFavIconIDForFavIconURL(icon_url); + if (!id) + id = thumbnail_db_->AddFavIcon(icon_url); + + // Set the image data. + thumbnail_db_->SetFavIcon(id, data, Time::Now()); + + SetFavIconMapping(page_url, id); +} + +void HistoryBackend::SetFavIconMapping(const GURL& page_url, + FavIconID id) { + // Find all the pages whose favicons we should set, we want to set it for + // all the pages in the redirect chain if it redirected. + history::RedirectList dummy_list; + history::RedirectList* redirects; + RedirectCache::iterator iter = recent_redirects_.Get(page_url); + if (iter != recent_redirects_.end()) { + redirects = &iter->second; + + // This redirect chain should have the destination URL as the last item. + DCHECK(!redirects->empty()); + DCHECK(redirects->back() == page_url); + } else { + // No redirect chain stored, make up one containing the URL we want to we + // can use the same logic below. + dummy_list.push_back(page_url); + redirects = &dummy_list; + } + + std::set<GURL> favicons_changed; + + // Save page <-> favicon association. + for (history::RedirectList::const_iterator i(redirects->begin()); + i != redirects->end(); ++i) { + URLRow row; + if (!db_->GetRowForURL(*i, &row) || row.favicon_id() == id) + continue; + + FavIconID old_id = row.favicon_id(); + if (old_id == id) + continue; + row.set_favicon_id(id); + db_->UpdateURLRow(row.id(), row); + + if (old_id) { + // The page's favicon ID changed. This means that the one we just + // changed from could have been orphaned, and we need to re-check it. + // This is not super fast, but this case will get triggered rarely, + // since normally a page will always map to the same favicon ID. It + // will mostly happen for favicons we import. + if (!db_->IsFavIconUsed(old_id) && thumbnail_db_.get()) + thumbnail_db_->DeleteFavIcon(old_id); + } + + favicons_changed.insert(row.url()); + } + + // Send the notification about the changed favicons. + FavIconChangeDetails* changed_details = new FavIconChangeDetails; + changed_details->urls.swap(favicons_changed); + BroadcastNotifications(NotificationType::FAVICON_CHANGED, changed_details); + + ScheduleCommit(); +} + +void HistoryBackend::Commit() { + if (!db_.get()) + return; + + // Note that a commit may not actually have been scheduled if a caller + // explicitly calls this instead of using ScheduleCommit. Likewise, we + // may reset the flag written by a pending commit. But this is OK! It + // will merely cause extra commits (which is kind of the idea). We + // could optimize more for this case (we may get two extra commits in + // some cases) but it hasn't been important yet. + CancelScheduledCommit(); + + db_->CommitTransaction(); + DCHECK(db_->transaction_nesting() == 0) << "Somebody left a transaction open"; + db_->BeginTransaction(); + + if (thumbnail_db_.get()) { + thumbnail_db_->CommitTransaction(); + DCHECK(thumbnail_db_->transaction_nesting() == 0) << + "Somebody left a transaction open"; + thumbnail_db_->BeginTransaction(); + } + + if (archived_db_.get()) { + archived_db_->CommitTransaction(); + archived_db_->BeginTransaction(); + } + + if (text_database_.get()) { + text_database_->CommitTransaction(); + text_database_->BeginTransaction(); + } +} + +void HistoryBackend::ScheduleCommit() { + if (scheduled_commit_.get()) + return; + scheduled_commit_ = new CommitLaterTask(this); + MessageLoop::current()->PostDelayedTask(FROM_HERE, + NewRunnableMethod(scheduled_commit_.get(), + &CommitLaterTask::RunCommit), + kCommitIntervalMs); +} + +void HistoryBackend::CancelScheduledCommit() { + if (scheduled_commit_) { + scheduled_commit_->Cancel(); + scheduled_commit_ = NULL; + } +} + +void HistoryBackend::ProcessDBTaskImpl() { + if (!db_.get()) { + // db went away, release all the refs. + ReleaseDBTasks(); + return; + } + + // Remove any canceled tasks. + while (!db_task_requests_.empty() && db_task_requests_.front()->canceled()) { + db_task_requests_.front()->Release(); + db_task_requests_.pop_front(); + } + if (db_task_requests_.empty()) + return; + + // Run the first task. + HistoryDBTaskRequest* request = db_task_requests_.front(); + db_task_requests_.pop_front(); + if (request->value->RunOnDBThread(this, db_.get())) { + // The task is done. Notify the callback. + request->ForwardResult(HistoryDBTaskRequest::TupleType()); + // We AddRef'd the request before adding, need to release it now. + request->Release(); + } else { + // Tasks wants to run some more. Schedule it at the end of current tasks. + db_task_requests_.push_back(request); + // And process it after an invoke later. + MessageLoop::current()->PostTask(FROM_HERE, NewRunnableMethod( + this, &HistoryBackend::ProcessDBTaskImpl)); + } +} + +void HistoryBackend::ReleaseDBTasks() { + for (std::list<HistoryDBTaskRequest*>::iterator i = + db_task_requests_.begin(); i != db_task_requests_.end(); ++i) { + (*i)->Release(); + } + db_task_requests_.clear(); +} + +//////////////////////////////////////////////////////////////////////////////// +// +// Generic operations +// +//////////////////////////////////////////////////////////////////////////////// + +void HistoryBackend::DeleteURLs(const std::vector<GURL>& urls) { + for (std::vector<GURL>::const_iterator url = urls.begin(); url != urls.end(); + ++url) { + expirer_.DeleteURL(*url); + } + + db_->GetStartDate(&first_recorded_time_); + // Force a commit, if the user is deleting something for privacy reasons, we + // want to get it on disk ASAP. + Commit(); +} + +void HistoryBackend::DeleteURL(const GURL& url) { + expirer_.DeleteURL(url); + + db_->GetStartDate(&first_recorded_time_); + // Force a commit, if the user is deleting something for privacy reasons, we + // want to get it on disk ASAP. + Commit(); +} + +void HistoryBackend::ExpireHistoryBetween( + scoped_refptr<ExpireHistoryRequest> request, + const std::set<GURL>& restrict_urls, + Time begin_time, + Time end_time) { + if (request->canceled()) + return; + + if (db_.get()) { + if (begin_time.is_null() && end_time.is_null() && restrict_urls.empty()) { + // Special case deleting all history so it can be faster and to reduce the + // possibility of an information leak. + DeleteAllHistory(); + } else { + // Clearing parts of history, have the expirer do the depend + expirer_.ExpireHistoryBetween(restrict_urls, begin_time, end_time); + + // Force a commit, if the user is deleting something for privacy reasons, + // we want to get it on disk ASAP. + Commit(); + } + } + + if (begin_time <= first_recorded_time_) + db_->GetStartDate(&first_recorded_time_); + + request->ForwardResult(ExpireHistoryRequest::TupleType()); + + if (history_publisher_.get() && restrict_urls.empty()) + history_publisher_->DeleteUserHistoryBetween(begin_time, end_time); +} + +void HistoryBackend::URLsNoLongerBookmarked(const std::set<GURL>& urls) { + if (!db_.get()) + return; + + for (std::set<GURL>::const_iterator i = urls.begin(); i != urls.end(); ++i) { + URLRow url_row; + if (!db_->GetRowForURL(*i, &url_row)) + continue; // The URL isn't in the db; nothing to do. + + VisitVector visits; + db_->GetVisitsForURL(url_row.id(), &visits); + + if (visits.empty()) + expirer_.DeleteURL(*i); // There are no more visits; nuke the URL. + } +} + +void HistoryBackend::ProcessDBTask( + scoped_refptr<HistoryDBTaskRequest> request) { + DCHECK(request.get()); + if (request->canceled()) + return; + + bool task_scheduled = !db_task_requests_.empty(); + // Make sure we up the refcount of the request. ProcessDBTaskImpl will + // release when done with the task. + request->AddRef(); + db_task_requests_.push_back(request.get()); + if (!task_scheduled) { + // No other tasks are scheduled. Process request now. + ProcessDBTaskImpl(); + } +} + +void HistoryBackend::BroadcastNotifications( + NotificationType type, + HistoryDetails* details_deleted) { + DCHECK(delegate_.get()); + delegate_->BroadcastNotifications(type, details_deleted); +} + +// Deleting -------------------------------------------------------------------- + +void HistoryBackend::DeleteAllHistory() { + // Our approach to deleting all history is: + // 1. Copy the bookmarks and their dependencies to new tables with temporary + // names. + // 2. Delete the original tables. Since tables can not share pages, we know + // that any data we don't want to keep is now in an unused page. + // 3. Renaming the temporary tables to match the original. + // 4. Vacuuming the database to delete the unused pages. + // + // Since we are likely to have very few bookmarks and their dependencies + // compared to all history, this is also much faster than just deleting from + // the original tables directly. + + // Get the bookmarked URLs. + std::vector<GURL> starred_urls; + BookmarkService* bookmark_service = GetBookmarkService(); + if (bookmark_service) + bookmark_service_->GetBookmarks(&starred_urls); + + std::vector<URLRow> kept_urls; + for (size_t i = 0; i < starred_urls.size(); i++) { + URLRow row; + if (!db_->GetRowForURL(starred_urls[i], &row)) + continue; + + // Clear the last visit time so when we write these rows they are "clean." + row.set_last_visit(Time()); + row.set_visit_count(0); + row.set_typed_count(0); + kept_urls.push_back(row); + } + + // Clear thumbnail and favicon history. The favicons for the given URLs will + // be kept. + if (!ClearAllThumbnailHistory(&kept_urls)) { + LOG(ERROR) << "Thumbnail history could not be cleared"; + // We continue in this error case. If the user wants to delete their + // history, we should delete as much as we can. + } + + // ClearAllMainHistory will change the IDs of the URLs in kept_urls. Therfore, + // we clear the list afterwards to make sure nobody uses this invalid data. + if (!ClearAllMainHistory(kept_urls)) + LOG(ERROR) << "Main history could not be cleared"; + kept_urls.clear(); + + // Delete FTS files & archived history. + if (text_database_.get()) { + // We assume that the text database has one transaction on them that we need + // to close & restart (the long-running history transaction). + text_database_->CommitTransaction(); + text_database_->DeleteAll(); + text_database_->BeginTransaction(); + } + + if (archived_db_.get()) { + // Close the database and delete the file. + archived_db_.reset(); + FilePath archived_file_name = GetArchivedFileName(); + file_util::Delete(archived_file_name, false); + + // Now re-initialize the database (which may fail). + archived_db_.reset(new ArchivedDatabase()); + if (!archived_db_->Init(archived_file_name)) { + LOG(WARNING) << "Could not initialize the archived database."; + archived_db_.reset(); + } else { + // Open our long-running transaction on this database. + archived_db_->BeginTransaction(); + } + } + + db_->GetStartDate(&first_recorded_time_); + + // Send out the notfication that history is cleared. The in-memory datdabase + // will pick this up and clear itself. + URLsDeletedDetails* details = new URLsDeletedDetails; + details->all_history = true; + BroadcastNotifications(NotificationType::HISTORY_URLS_DELETED, details); +} + +bool HistoryBackend::ClearAllThumbnailHistory( + std::vector<URLRow>* kept_urls) { + if (!thumbnail_db_.get()) { + // When we have no reference to the thumbnail database, maybe there was an + // error opening it. In this case, we just try to blow it away to try to + // fix the error if it exists. This may fail, in which case either the + // file doesn't exist or there's no more we can do. + file_util::Delete(GetThumbnailFileName(), false); + return true; + } + + // Create the duplicate favicon table, this is where the favicons we want + // to keep will be stored. + if (!thumbnail_db_->InitTemporaryFavIconsTable()) + return false; + + // This maps existing favicon IDs to the ones in the temporary table. + typedef std::map<FavIconID, FavIconID> FavIconMap; + FavIconMap copied_favicons; + + // Copy all unique favicons to the temporary table, and update all the + // URLs to have the new IDs. + for (std::vector<URLRow>::iterator i = kept_urls->begin(); + i != kept_urls->end(); ++i) { + FavIconID old_id = i->favicon_id(); + if (!old_id) + continue; // URL has no favicon. + FavIconID new_id; + + FavIconMap::const_iterator found = copied_favicons.find(old_id); + if (found == copied_favicons.end()) { + new_id = thumbnail_db_->CopyToTemporaryFavIconTable(old_id); + copied_favicons[old_id] = new_id; + } else { + // We already encountered a URL that used this favicon, use the ID we + // previously got. + new_id = found->second; + } + i->set_favicon_id(new_id); + } + + // Rename the duplicate favicon table back and recreate the other tables. + // This will make the database consistent again. + thumbnail_db_->CommitTemporaryFavIconTable(); + thumbnail_db_->RecreateThumbnailTable(); + + // Vacuum to remove all the pages associated with the dropped tables. There + // must be no transaction open on the table when we do this. We assume that + // our long-running transaction is open, so we complete it and start it again. + DCHECK(thumbnail_db_->transaction_nesting() == 1); + thumbnail_db_->CommitTransaction(); + thumbnail_db_->Vacuum(); + thumbnail_db_->BeginTransaction(); + return true; +} + +bool HistoryBackend::ClearAllMainHistory( + const std::vector<URLRow>& kept_urls) { + // Create the duplicate URL table. We will copy the kept URLs into this. + if (!db_->CreateTemporaryURLTable()) + return false; + + // Insert the URLs into the temporary table, we need to keep a map of changed + // IDs since the ID will be different in the new table. + typedef std::map<URLID, URLID> URLIDMap; + URLIDMap old_to_new; // Maps original ID to new one. + for (std::vector<URLRow>::const_iterator i = kept_urls.begin(); + i != kept_urls.end(); + ++i) { + URLID new_id = db_->AddTemporaryURL(*i); + old_to_new[i->id()] = new_id; + } + + // Replace the original URL table with the temporary one. + if (!db_->CommitTemporaryURLTable()) + return false; + + // Delete the old tables and recreate them empty. + db_->RecreateAllTablesButURL(); + + // Vacuum to reclaim the space from the dropped tables. This must be done + // when there is no transaction open, and we assume that our long-running + // transaction is currently open. + db_->CommitTransaction(); + db_->Vacuum(); + db_->BeginTransaction(); + db_->GetStartDate(&first_recorded_time_); + + return true; +} + +BookmarkService* HistoryBackend::GetBookmarkService() { + if (bookmark_service_) + bookmark_service_->BlockTillLoaded(); + return bookmark_service_; +} + +void HistoryBackend::MigrateThumbnailsDatabase() { + thumbnail_db_->RenameAndDropThumbnails(GetThumbnailFileName(), + GetFaviconsFileName()); + db_->MigrationToTopSitesDone(); +} + +} // namespace history diff --git a/chrome/browser/history/history_backend.h b/chrome/browser/history/history_backend.h new file mode 100644 index 0000000..cbda2e9 --- /dev/null +++ b/chrome/browser/history/history_backend.h @@ -0,0 +1,560 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_HISTORY_BACKEND_H_ +#define CHROME_BROWSER_HISTORY_HISTORY_BACKEND_H_ + +#include <utility> + +#include "base/file_path.h" +#include "base/gtest_prod_util.h" +#include "base/scoped_ptr.h" +#include "chrome/browser/history/archived_database.h" +#include "chrome/browser/history/download_types.h" +#include "chrome/browser/history/expire_history_backend.h" +#include "chrome/browser/history/history_database.h" +#include "chrome/browser/history/history_marshaling.h" +#include "chrome/browser/history/history_types.h" +#include "chrome/browser/history/text_database_manager.h" +#include "chrome/browser/history/thumbnail_database.h" +#include "chrome/browser/history/visit_tracker.h" +#include "chrome/common/mru_cache.h" + +class BookmarkService; +class TestingProfile; +struct ThumbnailScore; + +namespace history { + +class CommitLaterTask; +class HistoryPublisher; + +// *See the .cc file for more information on the design.* +// +// Internal history implementation which does most of the work of the history +// system. This runs on a background thread (to not block the browser when we +// do expensive operations) and is NOT threadsafe, so it must only be called +// from message handlers on the background thread. Invoking on another thread +// requires threadsafe refcounting. +// +// Most functions here are just the implementations of the corresponding +// functions in the history service. These functions are not documented +// here, see the history service for behavior. +class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, + public BroadcastNotificationDelegate { + public: + // Interface implemented by the owner of the HistoryBackend object. Normally, + // the history service implements this to send stuff back to the main thread. + // The unit tests can provide a different implementation if they don't have + // a history service object. + class Delegate { + public: + virtual ~Delegate() {} + + // Called when the database cannot be read correctly for some reason. + virtual void NotifyProfileError(int message_id) = 0; + + // Sets the in-memory history backend. The in-memory backend is created by + // the main backend. For non-unit tests, this happens on the background + // thread. It is to be used on the main thread, so this would transfer + // it to the history service. Unit tests can override this behavior. + // + // This function is NOT guaranteed to be called. If there is an error, + // there may be no in-memory database. + // + // Ownership of the backend pointer is transferred to this function. + virtual void SetInMemoryBackend(InMemoryHistoryBackend* backend) = 0; + + // Broadcasts the specified notification to the notification service. + // This is implemented here because notifications must only be sent from + // the main thread. + // + // Ownership of the HistoryDetails is transferred to this function. + virtual void BroadcastNotifications(NotificationType type, + HistoryDetails* details) = 0; + + // Invoked when the backend has finished loading the db. + virtual void DBLoaded() = 0; + + // Tell TopSites to start reading thumbnails from the ThumbnailsDB. + virtual void StartTopSitesMigration() = 0; + }; + + // Init must be called to complete object creation. This object can be + // constructed on any thread, but all other functions including Init() must + // be called on the history thread. + // + // |history_dir| is the directory where the history files will be placed. + // See the definition of BroadcastNotificationsCallback above. This function + // takes ownership of the callback pointer. + // + // |bookmark_service| is used to determine bookmarked URLs when deleting and + // may be NULL. + // + // This constructor is fast and does no I/O, so can be called at any time. + HistoryBackend(const FilePath& history_dir, + Delegate* delegate, + BookmarkService* bookmark_service); + + // Must be called after creation but before any objects are created. If this + // fails, all other functions will fail as well. (Since this runs on another + // thread, we don't bother returning failure.) + // + // |force_fail| can be set during unittests to unconditionally fail to init. + void Init(bool force_fail); + + // Notification that the history system is shutting down. This will break + // the refs owned by the delegate and any pending transaction so it will + // actually be deleted. + void Closing(); + + // See NotifyRenderProcessHostDestruction. + void NotifyRenderProcessHostDestruction(const void* host); + + // Navigation ---------------------------------------------------------------- + + void AddPage(scoped_refptr<HistoryAddPageArgs> request); + virtual void SetPageTitle(const GURL& url, const string16& title); + + // Indexing ------------------------------------------------------------------ + + void SetPageContents(const GURL& url, const string16& contents); + + // Querying ------------------------------------------------------------------ + + // ScheduleAutocomplete() never frees |provider| (which is globally live). + // It passes |params| on to the autocomplete system which will eventually + // free it. + void ScheduleAutocomplete(HistoryURLProvider* provider, + HistoryURLProviderParams* params); + + void IterateURLs(HistoryService::URLEnumerator* enumerator); + void QueryURL(scoped_refptr<QueryURLRequest> request, + const GURL& url, + bool want_visits); + void QueryHistory(scoped_refptr<QueryHistoryRequest> request, + const string16& text_query, + const QueryOptions& options); + void QueryRedirectsFrom(scoped_refptr<QueryRedirectsRequest> request, + const GURL& url); + void QueryRedirectsTo(scoped_refptr<QueryRedirectsRequest> request, + const GURL& url); + + void GetVisitCountToHost(scoped_refptr<GetVisitCountToHostRequest> request, + const GURL& url); + + // TODO(Nik): remove. Use QueryMostVisitedURLs instead. + void QueryTopURLsAndRedirects( + scoped_refptr<QueryTopURLsAndRedirectsRequest> request, + int result_count); + + // Request the |result_count| most visited URLs and the chain of + // redirects leading to each of these URLs. |days_back| is the + // number of days of history to use. Used by TopSites. + void QueryMostVisitedURLs( + scoped_refptr<QueryMostVisitedURLsRequest> request, + int result_count, + int days_back); + + // Computes the most recent URL(s) that the given canonical URL has + // redirected to and returns true on success. There may be more than one + // redirect in a row, so this function will fill the given array with the + // entire chain. If there are no redirects for the most recent visit of the + // URL, or the URL is not in history, returns false. + // + // Backend for QueryRedirectsFrom. + bool GetMostRecentRedirectsFrom(const GURL& url, + history::RedirectList* redirects); + + // Similar to above function except computes a chain of redirects to the + // given URL. Stores the most recent list of redirects ending at |url| in the + // given RedirectList. For example, if we have the redirect list A -> B -> C, + // then calling this function with url=C would fill redirects with {B, A}. + bool GetMostRecentRedirectsTo(const GURL& url, + history::RedirectList* redirects); + + // Thumbnails ---------------------------------------------------------------- + + void SetPageThumbnail(const GURL& url, + const SkBitmap& thumbnail, + const ThumbnailScore& score); + + // Retrieves a thumbnail, passing it across thread boundaries + // via. the included callback. + void GetPageThumbnail(scoped_refptr<GetPageThumbnailRequest> request, + const GURL& page_url); + + // Backend implementation of GetPageThumbnail. Unlike + // GetPageThumbnail(), this method has way to transport data across + // thread boundaries. + // + // Exposed for testing reasons. + void GetPageThumbnailDirectly( + const GURL& page_url, + scoped_refptr<RefCountedBytes>* data); + + void MigrateThumbnailsDatabase(); + + // Favicon ------------------------------------------------------------------- + + void GetFavIcon(scoped_refptr<GetFavIconRequest> request, + const GURL& icon_url); + void GetFavIconForURL(scoped_refptr<GetFavIconRequest> request, + const GURL& page_url); + void SetFavIcon(const GURL& page_url, + const GURL& icon_url, + scoped_refptr<RefCountedMemory> data); + void UpdateFavIconMappingAndFetch(scoped_refptr<GetFavIconRequest> request, + const GURL& page_url, + const GURL& icon_url); + void SetFavIconOutOfDateForPage(const GURL& page_url); + void SetImportedFavicons( + const std::vector<ImportedFavIconUsage>& favicon_usage); + + // Downloads ----------------------------------------------------------------- + + void QueryDownloads(scoped_refptr<DownloadQueryRequest> request); + void CleanUpInProgressEntries(); + void UpdateDownload(int64 received_bytes, int32 state, int64 db_handle); + void UpdateDownloadPath(const FilePath& path, int64 db_handle); + void CreateDownload(scoped_refptr<DownloadCreateRequest> request, + const DownloadCreateInfo& info); + void RemoveDownload(int64 db_handle); + void RemoveDownloadsBetween(const base::Time remove_begin, + const base::Time remove_end); + void RemoveDownloads(const base::Time remove_end); + void SearchDownloads(scoped_refptr<DownloadSearchRequest>, + const string16& search_text); + + // Segment usage ------------------------------------------------------------- + + void QuerySegmentUsage(scoped_refptr<QuerySegmentUsageRequest> request, + const base::Time from_time, + int max_result_count); + void DeleteOldSegmentData(); + void SetSegmentPresentationIndex(SegmentID segment_id, int index); + + // Keyword search terms ------------------------------------------------------ + + void SetKeywordSearchTermsForURL(const GURL& url, + TemplateURL::IDType keyword_id, + const string16& term); + + void DeleteAllSearchTermsForKeyword(TemplateURL::IDType keyword_id); + + void GetMostRecentKeywordSearchTerms( + scoped_refptr<GetMostRecentKeywordSearchTermsRequest> request, + TemplateURL::IDType keyword_id, + const string16& prefix, + int max_count); + + // Generic operations -------------------------------------------------------- + + void ProcessDBTask(scoped_refptr<HistoryDBTaskRequest> request); + + virtual bool GetAllTypedURLs(std::vector<history::URLRow>* urls); + + virtual bool GetVisitsForURL(URLID id, VisitVector* visits); + + virtual bool UpdateURL(URLID id, const history::URLRow& url); + + virtual bool AddVisits(const GURL& url, + const std::vector<base::Time>& visits); + + virtual bool RemoveVisits(const VisitVector& visits); + + virtual bool GetURL(const GURL& url, history::URLRow* url_row); + + // Deleting ------------------------------------------------------------------ + + virtual void DeleteURLs(const std::vector<GURL>& urls); + + virtual void DeleteURL(const GURL& url); + + // Calls ExpireHistoryBackend::ExpireHistoryBetween and commits the change. + void ExpireHistoryBetween(scoped_refptr<ExpireHistoryRequest> request, + const std::set<GURL>& restrict_urls, + base::Time begin_time, + base::Time end_time); + + // Bookmarks ----------------------------------------------------------------- + + // Notification that a URL is no longer bookmarked. If there are no visits + // for the specified url, it is deleted. + void URLsNoLongerBookmarked(const std::set<GURL>& urls); + + // Testing ------------------------------------------------------------------- + + // Sets the task to run and the message loop to run it on when this object + // is destroyed. See HistoryService::SetOnBackendDestroyTask for a more + // complete description. + void SetOnBackendDestroyTask(MessageLoop* message_loop, Task* task); + + // Adds the given rows to the database if it doesn't exist. A visit will be + // added for each given URL at the last visit time in the URLRow. + void AddPagesWithDetails(const std::vector<URLRow>& info); + +#if defined(UNIT_TEST) + HistoryDatabase* db() const { return db_.get(); } + + ExpireHistoryBackend* expire_backend() { return &expirer_; } +#endif + + protected: + virtual ~HistoryBackend(); + + private: + friend class base::RefCountedThreadSafe<HistoryBackend>; + friend class CommitLaterTask; // The commit task needs to call Commit(). + friend class HistoryTest; // So the unit tests can poke our innards. + FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, DeleteAll); + FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, ImportedFaviconsTest); + FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, URLsNoLongerBookmarked); + FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, StripUsernamePasswordTest); + FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, DeleteThumbnailsDatabaseTest); + friend class ::TestingProfile; + + // Computes the name of the specified database on disk. + FilePath GetThumbnailFileName() const; + + // Returns the name of the Favicons database. This is the new name + // of the Thumbnails database. + // See ThumbnailDatabase::RenameAndDropThumbnails. + FilePath GetFaviconsFileName() const; + FilePath GetArchivedFileName() const; + + class URLQuerier; + friend class URLQuerier; + + // Does the work of Init. + void InitImpl(); + + // Adds a single visit to the database, updating the URL information such + // as visit and typed count. The visit ID of the added visit and the URL ID + // of the associated URL (whether added or not) is returned. Both values will + // be 0 on failure. + // + // This does not schedule database commits, it is intended to be used as a + // subroutine for AddPage only. It also assumes the database is valid. + std::pair<URLID, VisitID> AddPageVisit(const GURL& url, + base::Time time, + VisitID referring_visit, + PageTransition::Type transition); + + // Returns a redirect chain in |redirects| for the VisitID + // |cur_visit|. |cur_visit| is assumed to be valid. Assumes that + // this HistoryBackend object has been Init()ed successfully. + void GetRedirectsFromSpecificVisit( + VisitID cur_visit, history::RedirectList* redirects); + + // Similar to the above function except returns a redirect list ending + // at |cur_visit|. + void GetRedirectsToSpecificVisit( + VisitID cur_visit, history::RedirectList* redirects); + + // Thumbnail Helpers --------------------------------------------------------- + + // When a simple GetMostRecentRedirectsFrom() fails, this method is + // called which searches the last N visit sessions instead of just + // the current one. Returns true and puts thumbnail data in |data| + // if a proper thumbnail was found. Returns false otherwise. Assumes + // that this HistoryBackend object has been Init()ed successfully. + bool GetThumbnailFromOlderRedirect( + const GURL& page_url, std::vector<unsigned char>* data); + + // Querying ------------------------------------------------------------------ + + // Backends for QueryHistory. *Basic() handles queries that are not FTS (full + // text search) queries and can just be given directly to the history DB). + // The FTS version queries the text_database, then merges with the history DB. + // Both functions assume QueryHistory already checked the DB for validity. + void QueryHistoryBasic(URLDatabase* url_db, VisitDatabase* visit_db, + const QueryOptions& options, QueryResults* result); + void QueryHistoryFTS(const string16& text_query, + const QueryOptions& options, + QueryResults* result); + + // Committing ---------------------------------------------------------------- + + // We always keep a transaction open on the history database so that multiple + // transactions can be batched. Periodically, these are flushed (use + // ScheduleCommit). This function does the commit to write any new changes to + // disk and opens a new transaction. This will be called automatically by + // ScheduleCommit, or it can be called explicitly if a caller really wants + // to write something to disk. + void Commit(); + + // Schedules a commit to happen in the future. We do this so that many + // operations over a period of time will be batched together. If there is + // already a commit scheduled for the future, this will do nothing. + void ScheduleCommit(); + + // Cancels the scheduled commit, if any. If there is no scheduled commit, + // does nothing. + void CancelScheduledCommit(); + + // Segments ------------------------------------------------------------------ + + // Walks back a segment chain to find the last visit with a non null segment + // id and returns it. If there is none found, returns 0. + SegmentID GetLastSegmentID(VisitID from_visit); + + // Update the segment information. This is called internally when a page is + // added. Return the segment id of the segment that has been updated. + SegmentID UpdateSegments(const GURL& url, + VisitID from_visit, + VisitID visit_id, + PageTransition::Type transition_type, + const base::Time ts); + + // Favicons ------------------------------------------------------------------ + + // Used by both UpdateFavIconMappingAndFetch and GetFavIcon. + // If page_url is non-null and SetFavIcon has previously been invoked for + // icon_url the favicon url for page_url (and all redirects) is set to + // icon_url. + void UpdateFavIconMappingAndFetchImpl( + const GURL* page_url, + const GURL& icon_url, + scoped_refptr<GetFavIconRequest> request); + + // Sets the favicon url id for page_url to id. This will also broadcast + // notifications as necessary. + void SetFavIconMapping(const GURL& page_url, FavIconID id); + + // Generic stuff ------------------------------------------------------------- + + // Processes the next scheduled HistoryDBTask, scheduling this method + // to be invoked again if there are more tasks that need to run. + void ProcessDBTaskImpl(); + + // Release all tasks in history_db_tasks_ and clears it. + void ReleaseDBTasks(); + + // Schedules a broadcast of the given notification on the main thread. The + // details argument will have ownership taken by this function (it will be + // sent to the main thread and deleted there). + void BroadcastNotifications(NotificationType type, + HistoryDetails* details_deleted); + + // Deleting all history ------------------------------------------------------ + + // Deletes all history. This is a special case of deleting that is separated + // from our normal dependency-following method for performance reasons. The + // logic lives here instead of ExpireHistoryBackend since it will cause + // re-initialization of some databases such as Thumbnails or Archived that + // could fail. When these databases are not valid, our pointers must be NULL, + // so we need to handle this type of operation to keep the pointers in sync. + void DeleteAllHistory(); + + // Given a vector of all URLs that we will keep, removes all thumbnails + // referenced by any URL, and also all favicons that aren't used by those + // URLs. The favicon IDs will change, so this will update the url rows in the + // vector to reference the new IDs. + bool ClearAllThumbnailHistory(std::vector<URLRow>* kept_urls); + + // Deletes all information in the history database, except for the supplied + // set of URLs in the URL table (these should correspond to the bookmarked + // URLs). + // + // The IDs of the URLs may change. + bool ClearAllMainHistory(const std::vector<URLRow>& kept_urls); + + // Returns the BookmarkService, blocking until it is loaded. This may return + // NULL during testing. + BookmarkService* GetBookmarkService(); + + // Data ---------------------------------------------------------------------- + + // Delegate. See the class definition above for more information. This will + // be NULL before Init is called and after Cleanup, but is guaranteed + // non-NULL in between. + scoped_ptr<Delegate> delegate_; + + // Directory where database files will be stored. + FilePath history_dir_; + + // The history/thumbnail databases. Either MAY BE NULL if the database could + // not be opened, all users must first check for NULL and return immediately + // if it is. The thumbnail DB may be NULL when the history one isn't, but not + // vice-versa. + scoped_ptr<HistoryDatabase> db_; + scoped_ptr<ThumbnailDatabase> thumbnail_db_; + + // Stores old history in a larger, slower database. + scoped_ptr<ArchivedDatabase> archived_db_; + + // Full text database manager, possibly NULL if the database could not be + // created. + scoped_ptr<TextDatabaseManager> text_database_; + + // Manages expiration between the various databases. + ExpireHistoryBackend expirer_; + + // A commit has been scheduled to occur sometime in the future. We can check + // non-null-ness to see if there is a commit scheduled in the future, and we + // can use the pointer to cancel the scheduled commit. There can be only one + // scheduled commit at a time (see ScheduleCommit). + scoped_refptr<CommitLaterTask> scheduled_commit_; + + // Maps recent redirect destination pages to the chain of redirects that + // brought us to there. Pages that did not have redirects or were not the + // final redirect in a chain will not be in this list, as well as pages that + // redirected "too long" ago (as determined by ExpireOldRedirects above). + // It is used to set titles & favicons for redirects to that of the + // destination. + // + // As with AddPage, the last item in the redirect chain will be the + // destination of the redirect (i.e., the key into recent_redirects_); + typedef MRUCache<GURL, history::RedirectList> RedirectCache; + RedirectCache recent_redirects_; + + // Timestamp of the last page addition request. We use this to detect when + // multiple additions are requested at the same time (within the resolution + // of the timer), so we can try to ensure they're unique when they're added + // to the database by using the last_recorded_time_ (q.v.). We still can't + // enforce or guarantee uniqueness, since the user might set his clock back. + base::Time last_requested_time_; + + // Timestamp of the last page addition, as it was recorded in the database. + // If two or more requests come in at the same time, we increment that time + // by 1 us between them so it's more likely to be unique in the database. + // This keeps track of that higher-resolution timestamp. + base::Time last_recorded_time_; + + // Timestamp of the first entry in our database. + base::Time first_recorded_time_; + + // When non-NULL, this is the task that should be invoked on + MessageLoop* backend_destroy_message_loop_; + Task* backend_destroy_task_; + + // Tracks page transition types. + VisitTracker tracker_; + + // A boolean variable to track whether we have already purged obsolete segment + // data. + bool segment_queried_; + + // HistoryDBTasks to run. Be sure to AddRef when adding, and Release when + // done. + std::list<HistoryDBTaskRequest*> db_task_requests_; + + // Used to determine if a URL is bookmarked. This is owned by the Profile and + // may be NULL (during testing). + // + // Use GetBookmarkService to access this, which makes sure the service is + // loaded. + BookmarkService* bookmark_service_; + + // Publishes the history to all indexers which are registered to receive + // history data from us. Can be NULL if there are no listeners. + scoped_ptr<HistoryPublisher> history_publisher_; + + DISALLOW_COPY_AND_ASSIGN(HistoryBackend); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_HISTORY_BACKEND_H_ diff --git a/chrome/browser/history/history_backend_unittest.cc b/chrome/browser/history/history_backend_unittest.cc new file mode 100644 index 0000000..24cc1cd --- /dev/null +++ b/chrome/browser/history/history_backend_unittest.cc @@ -0,0 +1,606 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/ref_counted.h" +#include "base/scoped_ptr.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/bookmarks/bookmark_model.h" +#include "chrome/browser/history/history_backend.h" +#include "chrome/browser/history/history_notifications.h" +#include "chrome/browser/history/in_memory_history_backend.h" +#include "chrome/browser/history/in_memory_database.h" +#include "chrome/common/notification_service.h" +#include "chrome/common/thumbnail_score.h" +#include "chrome/tools/profiles/thumbnail-inl.h" +#include "gfx/codec/jpeg_codec.h" +#include "googleurl/src/gurl.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::Time; + +// This file only tests functionality where it is most convenient to call the +// backend directly. Most of the history backend functions are tested by the +// history unit test. Because of the elaborate callbacks involved, this is no +// harder than calling it directly for many things. + +namespace history { + +class HistoryBackendTest; + +// This must be a separate object since HistoryBackend manages its lifetime. +// This just forwards the messages we're interested in to the test object. +class HistoryBackendTestDelegate : public HistoryBackend::Delegate { + public: + explicit HistoryBackendTestDelegate(HistoryBackendTest* test) : test_(test) {} + + virtual void NotifyProfileError(int message_id) {} + virtual void SetInMemoryBackend(InMemoryHistoryBackend* backend); + virtual void BroadcastNotifications(NotificationType type, + HistoryDetails* details); + virtual void DBLoaded(); + virtual void StartTopSitesMigration(); + + private: + // Not owned by us. + HistoryBackendTest* test_; + + DISALLOW_COPY_AND_ASSIGN(HistoryBackendTestDelegate); +}; + +class HistoryBackendTest : public testing::Test { + public: + HistoryBackendTest() : bookmark_model_(NULL), loaded_(false) {} + virtual ~HistoryBackendTest() { + } + + protected: + scoped_refptr<HistoryBackend> backend_; // Will be NULL on init failure. + scoped_ptr<InMemoryHistoryBackend> mem_backend_; + + void AddRedirectChain(const char* sequence[], int page_id) { + history::RedirectList redirects; + for (int i = 0; sequence[i] != NULL; ++i) + redirects.push_back(GURL(sequence[i])); + + int int_scope = 1; + void* scope = 0; + memcpy(&scope, &int_scope, sizeof(int_scope)); + scoped_refptr<history::HistoryAddPageArgs> request( + new history::HistoryAddPageArgs( + redirects.back(), Time::Now(), scope, page_id, GURL(), + redirects, PageTransition::LINK, true)); + backend_->AddPage(request); + } + + // Adds CLIENT_REDIRECT page transition. + // |url1| is the source URL and |url2| is the destination. + // |did_replace| is true if the transition is non-user initiated and the + // navigation entry for |url2| has replaced that for |url1|. The possibly + // updated transition code of the visit records for |url1| and |url2| is + // returned by filling in |*transition1| and |*transition2|, respectively. + void AddClientRedirect(const GURL& url1, const GURL& url2, bool did_replace, + int* transition1, int* transition2) { + void* const dummy_scope = reinterpret_cast<void*>(0x87654321); + history::RedirectList redirects; + if (url1.is_valid()) + redirects.push_back(url1); + if (url2.is_valid()) + redirects.push_back(url2); + scoped_refptr<HistoryAddPageArgs> request( + new HistoryAddPageArgs(url2, base::Time(), dummy_scope, 0, url1, + redirects, PageTransition::CLIENT_REDIRECT, did_replace)); + backend_->AddPage(request); + + *transition1 = getTransition(url1); + *transition2 = getTransition(url2); + } + + int getTransition(const GURL& url) { + if (!url.is_valid()) + return 0; + URLRow row; + URLID id = backend_->db()->GetRowForURL(url, &row); + VisitVector visits; + EXPECT_TRUE(backend_->db()->GetVisitsForURL(id, &visits)); + return visits[0].transition; + } + + BookmarkModel bookmark_model_; + + protected: + bool loaded_; + + private: + friend class HistoryBackendTestDelegate; + + // testing::Test + virtual void SetUp() { + if (!file_util::CreateNewTempDirectory(FILE_PATH_LITERAL("BackendTest"), + &test_dir_)) + return; + backend_ = new HistoryBackend(test_dir_, + new HistoryBackendTestDelegate(this), + &bookmark_model_); + backend_->Init(false); + } + virtual void TearDown() { + backend_->Closing(); + backend_ = NULL; + mem_backend_.reset(); + file_util::Delete(test_dir_, true); + } + + void SetInMemoryBackend(InMemoryHistoryBackend* backend) { + mem_backend_.reset(backend); + } + + void BroadcastNotifications(NotificationType type, + HistoryDetails* details) { + // Send the notifications directly to the in-memory database. + Details<HistoryDetails> det(details); + mem_backend_->Observe(type, Source<HistoryBackendTest>(NULL), det); + + // The backend passes ownership of the details pointer to us. + delete details; + } + + MessageLoop message_loop_; + FilePath test_dir_; +}; + +void HistoryBackendTestDelegate::SetInMemoryBackend( + InMemoryHistoryBackend* backend) { + test_->SetInMemoryBackend(backend); +} + +void HistoryBackendTestDelegate::BroadcastNotifications( + NotificationType type, + HistoryDetails* details) { + test_->BroadcastNotifications(type, details); +} + +void HistoryBackendTestDelegate::DBLoaded() { + test_->loaded_ = true; +} + +void HistoryBackendTestDelegate::StartTopSitesMigration() { + test_->backend_->MigrateThumbnailsDatabase(); +} + +TEST_F(HistoryBackendTest, Loaded) { + ASSERT_TRUE(backend_.get()); + ASSERT_TRUE(loaded_); +} + +TEST_F(HistoryBackendTest, DeleteAll) { + ASSERT_TRUE(backend_.get()); + + // Add two favicons, use the characters '1' and '2' for the image data. Note + // that we do these in the opposite order. This is so the first one gets ID + // 2 autoassigned to the database, which will change when the other one is + // deleted. This way we can test that updating works properly. + GURL favicon_url1("http://www.google.com/favicon.ico"); + GURL favicon_url2("http://news.google.com/favicon.ico"); + FavIconID favicon2 = backend_->thumbnail_db_->AddFavIcon(favicon_url2); + FavIconID favicon1 = backend_->thumbnail_db_->AddFavIcon(favicon_url1); + + std::vector<unsigned char> data; + data.push_back('1'); + EXPECT_TRUE(backend_->thumbnail_db_->SetFavIcon(favicon1, + new RefCountedBytes(data), Time::Now())); + + data[0] = '2'; + EXPECT_TRUE(backend_->thumbnail_db_->SetFavIcon( + favicon2, new RefCountedBytes(data), Time::Now())); + + // First visit two URLs. + URLRow row1(GURL("http://www.google.com/")); + row1.set_visit_count(2); + row1.set_typed_count(1); + row1.set_last_visit(Time::Now()); + row1.set_favicon_id(favicon1); + + URLRow row2(GURL("http://news.google.com/")); + row2.set_visit_count(1); + row2.set_last_visit(Time::Now()); + row2.set_favicon_id(favicon2); + + std::vector<URLRow> rows; + rows.push_back(row2); // Reversed order for the same reason as favicons. + rows.push_back(row1); + backend_->AddPagesWithDetails(rows); + + URLID row1_id = backend_->db_->GetRowForURL(row1.url(), NULL); + URLID row2_id = backend_->db_->GetRowForURL(row2.url(), NULL); + + // Get the two visits for the URLs we just added. + VisitVector visits; + backend_->db_->GetVisitsForURL(row1_id, &visits); + ASSERT_EQ(1U, visits.size()); + VisitID visit1_id = visits[0].visit_id; + + visits.clear(); + backend_->db_->GetVisitsForURL(row2_id, &visits); + ASSERT_EQ(1U, visits.size()); + VisitID visit2_id = visits[0].visit_id; + + // The in-memory backend should have been set and it should have gotten the + // typed URL. + ASSERT_TRUE(mem_backend_.get()); + URLRow outrow1; + EXPECT_TRUE(mem_backend_->db_->GetRowForURL(row1.url(), NULL)); + + // Add thumbnails for each page. + ThumbnailScore score(0.25, true, true); + scoped_ptr<SkBitmap> google_bitmap( + gfx::JPEGCodec::Decode(kGoogleThumbnail, sizeof(kGoogleThumbnail))); + + Time time; + GURL gurl; + backend_->thumbnail_db_->SetPageThumbnail(gurl, row1_id, *google_bitmap, + score, time); + scoped_ptr<SkBitmap> weewar_bitmap( + gfx::JPEGCodec::Decode(kWeewarThumbnail, sizeof(kWeewarThumbnail))); + backend_->thumbnail_db_->SetPageThumbnail(gurl, row2_id, *weewar_bitmap, + score, time); + + // Star row1. + bookmark_model_.AddURL( + bookmark_model_.GetBookmarkBarNode(), 0, std::wstring(), row1.url()); + + // Set full text index for each one. + backend_->text_database_->AddPageData(row1.url(), row1_id, visit1_id, + row1.last_visit(), + UTF8ToUTF16("Title 1"), + UTF8ToUTF16("Body 1")); + backend_->text_database_->AddPageData(row2.url(), row2_id, visit2_id, + row2.last_visit(), + UTF8ToUTF16("Title 2"), + UTF8ToUTF16("Body 2")); + + // Now finally clear all history. + backend_->DeleteAllHistory(); + + // The first URL should be preserved but the time should be cleared. + EXPECT_TRUE(backend_->db_->GetRowForURL(row1.url(), &outrow1)); + EXPECT_EQ(0, outrow1.visit_count()); + EXPECT_EQ(0, outrow1.typed_count()); + EXPECT_TRUE(Time() == outrow1.last_visit()); + + // The second row should be deleted. + URLRow outrow2; + EXPECT_FALSE(backend_->db_->GetRowForURL(row2.url(), &outrow2)); + + // All visits should be deleted for both URLs. + VisitVector all_visits; + backend_->db_->GetAllVisitsInRange(Time(), Time(), 0, &all_visits); + ASSERT_EQ(0U, all_visits.size()); + + // All thumbnails should be deleted. + std::vector<unsigned char> out_data; + EXPECT_FALSE(backend_->thumbnail_db_->GetPageThumbnail(outrow1.id(), + &out_data)); + EXPECT_FALSE(backend_->thumbnail_db_->GetPageThumbnail(row2_id, &out_data)); + + // We should have a favicon for the first URL only. We look them up by favicon + // URL since the IDs may hav changed. + FavIconID out_favicon1 = backend_->thumbnail_db_-> + GetFavIconIDForFavIconURL(favicon_url1); + EXPECT_TRUE(out_favicon1); + FavIconID out_favicon2 = backend_->thumbnail_db_-> + GetFavIconIDForFavIconURL(favicon_url2); + EXPECT_FALSE(out_favicon2) << "Favicon not deleted"; + + // The remaining URL should still reference the same favicon, even if its + // ID has changed. + EXPECT_EQ(out_favicon1, outrow1.favicon_id()); + + // The first URL should still be bookmarked. + EXPECT_TRUE(bookmark_model_.IsBookmarked(row1.url())); + + // The full text database should have no data. + std::vector<TextDatabase::Match> text_matches; + Time first_time_searched; + backend_->text_database_->GetTextMatches(UTF8ToUTF16("Body"), + QueryOptions(), + &text_matches, + &first_time_searched); + EXPECT_EQ(0U, text_matches.size()); +} + +TEST_F(HistoryBackendTest, URLsNoLongerBookmarked) { + GURL favicon_url1("http://www.google.com/favicon.ico"); + GURL favicon_url2("http://news.google.com/favicon.ico"); + FavIconID favicon2 = backend_->thumbnail_db_->AddFavIcon(favicon_url2); + FavIconID favicon1 = backend_->thumbnail_db_->AddFavIcon(favicon_url1); + + std::vector<unsigned char> data; + data.push_back('1'); + EXPECT_TRUE(backend_->thumbnail_db_->SetFavIcon( + favicon1, new RefCountedBytes(data), Time::Now())); + + data[0] = '2'; + EXPECT_TRUE(backend_->thumbnail_db_->SetFavIcon( + favicon2, new RefCountedBytes(data), Time::Now())); + + // First visit two URLs. + URLRow row1(GURL("http://www.google.com/")); + row1.set_visit_count(2); + row1.set_typed_count(1); + row1.set_last_visit(Time::Now()); + row1.set_favicon_id(favicon1); + + URLRow row2(GURL("http://news.google.com/")); + row2.set_visit_count(1); + row2.set_last_visit(Time::Now()); + row2.set_favicon_id(favicon2); + + std::vector<URLRow> rows; + rows.push_back(row2); // Reversed order for the same reason as favicons. + rows.push_back(row1); + backend_->AddPagesWithDetails(rows); + + URLID row1_id = backend_->db_->GetRowForURL(row1.url(), NULL); + URLID row2_id = backend_->db_->GetRowForURL(row2.url(), NULL); + + // Star the two URLs. + bookmark_model_.SetURLStarred(row1.url(), std::wstring(), true); + bookmark_model_.SetURLStarred(row2.url(), std::wstring(), true); + + // Delete url 2. Because url 2 is starred this won't delete the URL, only + // the visits. + backend_->expirer_.DeleteURL(row2.url()); + + // Make sure url 2 is still valid, but has no visits. + URLRow tmp_url_row; + EXPECT_EQ(row2_id, backend_->db_->GetRowForURL(row2.url(), NULL)); + VisitVector visits; + backend_->db_->GetVisitsForURL(row2_id, &visits); + EXPECT_EQ(0U, visits.size()); + // The favicon should still be valid. + EXPECT_EQ(favicon2, + backend_->thumbnail_db_->GetFavIconIDForFavIconURL(favicon_url2)); + + // Unstar row2. + bookmark_model_.SetURLStarred(row2.url(), std::wstring(), false); + // Tell the backend it was unstarred. We have to explicitly do this as + // BookmarkModel isn't wired up to the backend during testing. + std::set<GURL> unstarred_urls; + unstarred_urls.insert(row2.url()); + backend_->URLsNoLongerBookmarked(unstarred_urls); + + // The URL should no longer exist. + EXPECT_FALSE(backend_->db_->GetRowForURL(row2.url(), &tmp_url_row)); + // And the favicon should be deleted. + EXPECT_EQ(0, + backend_->thumbnail_db_->GetFavIconIDForFavIconURL(favicon_url2)); + + // Unstar row 1. + bookmark_model_.SetURLStarred(row1.url(), std::wstring(), false); + // Tell the backend it was unstarred. We have to explicitly do this as + // BookmarkModel isn't wired up to the backend during testing. + unstarred_urls.clear(); + unstarred_urls.insert(row1.url()); + backend_->URLsNoLongerBookmarked(unstarred_urls); + + // The URL should still exist (because there were visits). + EXPECT_EQ(row1_id, backend_->db_->GetRowForURL(row1.url(), NULL)); + + // There should still be visits. + visits.clear(); + backend_->db_->GetVisitsForURL(row1_id, &visits); + EXPECT_EQ(1U, visits.size()); + + // The favicon should still be valid. + EXPECT_EQ(favicon1, + backend_->thumbnail_db_->GetFavIconIDForFavIconURL(favicon_url1)); +} + +TEST_F(HistoryBackendTest, GetPageThumbnailAfterRedirects) { + ASSERT_TRUE(backend_.get()); + + const char* base_url = "http://mail"; + const char* thumbnail_url = "http://mail.google.com"; + const char* first_chain[] = { + base_url, + thumbnail_url, + NULL + }; + AddRedirectChain(first_chain, 0); + + // Add a thumbnail for the end of that redirect chain. + scoped_ptr<SkBitmap> thumbnail( + gfx::JPEGCodec::Decode(kGoogleThumbnail, sizeof(kGoogleThumbnail))); + backend_->SetPageThumbnail(GURL(thumbnail_url), *thumbnail, + ThumbnailScore(0.25, true, true)); + + // Write a second URL chain so that if you were to simply check what + // "http://mail" redirects to, you wouldn't see the URL that has + // contains the thumbnail. + const char* second_chain[] = { + base_url, + "http://mail.google.com/somewhere/else", + NULL + }; + AddRedirectChain(second_chain, 1); + + // Now try to get the thumbnail for the base url. It shouldn't be + // distracted by the second chain and should return the thumbnail + // attached to thumbnail_url_. + scoped_refptr<RefCountedBytes> data; + backend_->GetPageThumbnailDirectly(GURL(base_url), &data); + + EXPECT_TRUE(data.get()); +} + +// Tests a handful of assertions for a navigation with a type of +// KEYWORD_GENERATED. +TEST_F(HistoryBackendTest, KeywordGenerated) { + ASSERT_TRUE(backend_.get()); + + GURL url("http://google.com"); + + Time visit_time = Time::Now() - base::TimeDelta::FromDays(1); + scoped_refptr<HistoryAddPageArgs> request( + new HistoryAddPageArgs(url, visit_time, NULL, 0, GURL(), + history::RedirectList(), + PageTransition::KEYWORD_GENERATED, false)); + backend_->AddPage(request); + + // A row should have been added for the url. + URLRow row; + URLID url_id = backend_->db()->GetRowForURL(url, &row); + ASSERT_NE(0, url_id); + + // The typed count should be 1. + ASSERT_EQ(1, row.typed_count()); + + // KEYWORD_GENERATED urls should not be added to the segment db. + std::string segment_name = VisitSegmentDatabase::ComputeSegmentName(url); + EXPECT_EQ(0, backend_->db()->GetSegmentNamed(segment_name)); + + // One visit should be added. + VisitVector visits; + EXPECT_TRUE(backend_->db()->GetVisitsForURL(url_id, &visits)); + EXPECT_EQ(1U, visits.size()); + + // But no visible visits. + visits.clear(); + backend_->db()->GetVisibleVisitsInRange(base::Time(), base::Time(), 1, + &visits); + EXPECT_TRUE(visits.empty()); + + // Expire the visits. + std::set<GURL> restrict_urls; + backend_->expire_backend()->ExpireHistoryBetween(restrict_urls, + visit_time, Time::Now()); + + // The visit should have been nuked. + visits.clear(); + EXPECT_TRUE(backend_->db()->GetVisitsForURL(url_id, &visits)); + EXPECT_TRUE(visits.empty()); + + // As well as the url. + ASSERT_EQ(0, backend_->db()->GetRowForURL(url, &row)); +} + +TEST_F(HistoryBackendTest, ClientRedirect) { + ASSERT_TRUE(backend_.get()); + + int transition1; + int transition2; + + // Initial transition to page A. + GURL url_a("http://google.com/a"); + AddClientRedirect(GURL(), url_a, false, &transition1, &transition2); + EXPECT_TRUE(transition2 & PageTransition::CHAIN_END); + + // User initiated redirect to page B. + GURL url_b("http://google.com/b"); + AddClientRedirect(url_a, url_b, false, &transition1, &transition2); + EXPECT_TRUE(transition1 & PageTransition::CHAIN_END); + EXPECT_TRUE(transition2 & PageTransition::CHAIN_END); + + // Non-user initiated redirect to page C. + GURL url_c("http://google.com/c"); + AddClientRedirect(url_b, url_c, true, &transition1, &transition2); + EXPECT_FALSE(transition1 & PageTransition::CHAIN_END); + EXPECT_TRUE(transition2 & PageTransition::CHAIN_END); +} + +TEST_F(HistoryBackendTest, ImportedFaviconsTest) { + // Setup test data - two Urls in the history, one with favicon assigned and + // one without. + GURL favicon_url1("http://www.google.com/favicon.ico"); + FavIconID favicon1 = backend_->thumbnail_db_->AddFavIcon(favicon_url1); + std::vector<unsigned char> data; + data.push_back('1'); + EXPECT_TRUE(backend_->thumbnail_db_->SetFavIcon(favicon1, + RefCountedBytes::TakeVector(&data), Time::Now())); + URLRow row1(GURL("http://www.google.com/")); + row1.set_favicon_id(favicon1); + row1.set_visit_count(1); + row1.set_last_visit(Time::Now()); + URLRow row2(GURL("http://news.google.com/")); + row2.set_visit_count(1); + row2.set_last_visit(Time::Now()); + std::vector<URLRow> rows; + rows.push_back(row1); + rows.push_back(row2); + backend_->AddPagesWithDetails(rows); + URLRow url_row1, url_row2; + EXPECT_FALSE(backend_->db_->GetRowForURL(row1.url(), &url_row1) == 0); + EXPECT_FALSE(backend_->db_->GetRowForURL(row2.url(), &url_row2) == 0); + EXPECT_FALSE(url_row1.favicon_id() == 0); + EXPECT_TRUE(url_row2.favicon_id() == 0); + + // Now provide one imported favicon for both URLs already in the registry. + // The new favicon should only be used with the URL that doesn't already have + // a favicon. + std::vector<history::ImportedFavIconUsage> favicons; + history::ImportedFavIconUsage favicon; + favicon.favicon_url = GURL("http://news.google.com/favicon.ico"); + favicon.png_data.push_back('2'); + favicon.urls.insert(row1.url()); + favicon.urls.insert(row2.url()); + favicons.push_back(favicon); + backend_->SetImportedFavicons(favicons); + EXPECT_FALSE(backend_->db_->GetRowForURL(row1.url(), &url_row1) == 0); + EXPECT_FALSE(backend_->db_->GetRowForURL(row2.url(), &url_row2) == 0); + EXPECT_FALSE(url_row1.favicon_id() == 0); + EXPECT_FALSE(url_row2.favicon_id() == 0); + EXPECT_FALSE(url_row1.favicon_id() == url_row2.favicon_id()); + + // A URL should not be added to history (to store favicon), if + // the URL is not bookmarked. + GURL url3("http://mail.google.com"); + favicons.clear(); + favicon.favicon_url = GURL("http://mail.google.com/favicon.ico"); + favicon.png_data.push_back('3'); + favicon.urls.insert(url3); + favicons.push_back(favicon); + backend_->SetImportedFavicons(favicons); + URLRow url_row3; + EXPECT_TRUE(backend_->db_->GetRowForURL(url3, &url_row3) == 0); + + // If the URL is bookmarked, it should get added to history with 0 visits. + bookmark_model_.AddURL(bookmark_model_.GetBookmarkBarNode(), 0, + std::wstring(), url3); + backend_->SetImportedFavicons(favicons); + EXPECT_FALSE(backend_->db_->GetRowForURL(url3, &url_row3) == 0); + EXPECT_TRUE(url_row3.visit_count() == 0); +} + +TEST_F(HistoryBackendTest, StripUsernamePasswordTest) { + ASSERT_TRUE(backend_.get()); + + GURL url("http://anyuser:anypass@www.google.com"); + GURL stripped_url("http://www.google.com"); + + // Clear all history. + backend_->DeleteAllHistory(); + + // Visit the url with username, password. + backend_->AddPageVisit(url, base::Time::Now(), 0, + PageTransition::GetQualifier(PageTransition::TYPED)); + + // Fetch the row information about stripped url from history db. + VisitVector visits; + URLID row_id = backend_->db_->GetRowForURL(stripped_url, NULL); + backend_->db_->GetVisitsForURL(row_id, &visits); + + // Check if stripped url is stored in database. + ASSERT_EQ(1U, visits.size()); +} + +TEST_F(HistoryBackendTest, DeleteThumbnailsDatabaseTest) { + EXPECT_TRUE(backend_->thumbnail_db_->NeedsMigrationToTopSites()); + backend_->delegate_->StartTopSitesMigration(); + EXPECT_FALSE(backend_->thumbnail_db_->NeedsMigrationToTopSites()); +} + +} // namespace history diff --git a/chrome/browser/history/history_database.cc b/chrome/browser/history/history_database.cc new file mode 100644 index 0000000..26bb81a --- /dev/null +++ b/chrome/browser/history/history_database.cc @@ -0,0 +1,337 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/history_database.h" + +#include <algorithm> +#include <set> +#include <string> +#include "app/sql/transaction.h" +#include "base/command_line.h" +#include "base/file_util.h" +#if defined(OS_MACOSX) +#include "base/mac_util.h" +#endif +#include "base/histogram.h" +#include "base/rand_util.h" +#include "base/string_util.h" +#include "chrome/browser/diagnostics/sqlite_diagnostics.h" +#include "chrome/common/chrome_switches.h" + +namespace history { + +namespace { + +// Current version number. We write databases at the "current" version number, +// but any previous version that can read the "compatible" one can make do with +// or database without *too* many bad effects. +static const int kCurrentVersionNumber = 18; +static const int kCompatibleVersionNumber = 16; +static const char kEarlyExpirationThresholdKey[] = "early_expiration_threshold"; + +void ComputeDatabaseMetrics(const FilePath& history_name, + sql::Connection& db) { + if (base::RandInt(1, 100) != 50) + return; // Only do this computation sometimes since it can be expensive. + + int64 file_size = 0; + if (!file_util::GetFileSize(history_name, &file_size)) + return; + int file_mb = static_cast<int>(file_size / (1024 * 1024)); + UMA_HISTOGRAM_MEMORY_MB("History.DatabaseFileMB", file_mb); + + sql::Statement url_count(db.GetUniqueStatement("SELECT count(*) FROM urls")); + if (!url_count || !url_count.Step()) + return; + UMA_HISTOGRAM_COUNTS("History.URLTableCount", url_count.ColumnInt(0)); + + sql::Statement visit_count(db.GetUniqueStatement( + "SELECT count(*) FROM visits")); + if (!visit_count || !visit_count.Step()) + return; + UMA_HISTOGRAM_COUNTS("History.VisitTableCount", visit_count.ColumnInt(0)); +} + +} // namespace + +HistoryDatabase::HistoryDatabase() + : needs_version_17_migration_(false), + needs_version_18_migration_(false) { +} + +HistoryDatabase::~HistoryDatabase() { +} + +sql::InitStatus HistoryDatabase::Init(const FilePath& history_name, + const FilePath& bookmarks_path) { + // Set the exceptional sqlite error handler. + db_.set_error_delegate(GetErrorHandlerForHistoryDb()); + + // Set the database page size to something a little larger to give us + // better performance (we're typically seek rather than bandwidth limited). + // This only has an effect before any tables have been created, otherwise + // this is a NOP. Must be a power of 2 and a max of 8192. + db_.set_page_size(4096); + + // Increase the cache size. The page size, plus a little extra, times this + // value, tells us how much memory the cache will use maximum. + // 6000 * 4MB = 24MB + // TODO(brettw) scale this value to the amount of available memory. + db_.set_cache_size(6000); + + // Note that we don't set exclusive locking here. That's done by + // BeginExclusiveMode below which is called later (we have to be in shared + // mode to start out for the in-memory backend to read the data). + + if (!db_.Open(history_name)) + return sql::INIT_FAILURE; + + // Wrap the rest of init in a tranaction. This will prevent the database from + // getting corrupted if we crash in the middle of initialization or migration. + sql::Transaction committer(&db_); + if (!committer.Begin()) + return sql::INIT_FAILURE; + +#if defined(OS_MACOSX) + // Exclude the history file and its journal from backups. + mac_util::SetFileBackupExclusion(history_name, true); + FilePath::StringType history_name_string(history_name.value()); + history_name_string += "-journal"; + FilePath history_journal_name(history_name_string); + mac_util::SetFileBackupExclusion(history_journal_name, true); +#endif + + // Prime the cache. + db_.Preload(); + + // Create the tables and indices. + // NOTE: If you add something here, also add it to + // RecreateAllButStarAndURLTables. + if (!meta_table_.Init(&db_, GetCurrentVersion(), kCompatibleVersionNumber)) + return sql::INIT_FAILURE; + if (!CreateURLTable(false) || !InitVisitTable() || + !InitKeywordSearchTermsTable() || !InitDownloadTable() || + !InitSegmentTables()) + return sql::INIT_FAILURE; + CreateMainURLIndex(); + CreateSupplimentaryURLIndices(); + + // Version check. + sql::InitStatus version_status = EnsureCurrentVersion(bookmarks_path); + if (version_status != sql::INIT_OK) + return version_status; + + ComputeDatabaseMetrics(history_name, db_); + return committer.Commit() ? sql::INIT_OK : sql::INIT_FAILURE; +} + +void HistoryDatabase::BeginExclusiveMode() { + // We can't use set_exclusive_locking() since that only has an effect before + // the DB is opened. + db_.Execute("PRAGMA locking_mode=EXCLUSIVE"); +} + +// static +int HistoryDatabase::GetCurrentVersion() { + // Temporary solution while TopSites is behind a flag. If there is + // no flag, we are still using the Thumbnails file, i.e. we are at + // version 17. + if (CommandLine::ForCurrentProcess()->HasSwitch(switches::kTopSites)) { + return kCurrentVersionNumber; + } else { + return kCurrentVersionNumber - 1; + } +} + +void HistoryDatabase::BeginTransaction() { + db_.BeginTransaction(); +} + +void HistoryDatabase::CommitTransaction() { + db_.CommitTransaction(); +} + +bool HistoryDatabase::RecreateAllTablesButURL() { + if (!DropVisitTable()) + return false; + if (!InitVisitTable()) + return false; + + if (!DropKeywordSearchTermsTable()) + return false; + if (!InitKeywordSearchTermsTable()) + return false; + + if (!DropSegmentTables()) + return false; + if (!InitSegmentTables()) + return false; + + // We also add the supplementary URL indices at this point. This index is + // over parts of the URL table that weren't automatically created when the + // temporary URL table was + CreateSupplimentaryURLIndices(); + return true; +} + +void HistoryDatabase::Vacuum() { + DCHECK_EQ(0, db_.transaction_nesting()) << + "Can not have a transaction when vacuuming."; + db_.Execute("VACUUM"); +} + +bool HistoryDatabase::SetSegmentID(VisitID visit_id, SegmentID segment_id) { + sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE, + "UPDATE visits SET segment_id = ? WHERE id = ?")); + if (!s) { + NOTREACHED() << db_.GetErrorMessage(); + return false; + } + s.BindInt64(0, segment_id); + s.BindInt64(1, visit_id); + DCHECK(db_.GetLastChangeCount() == 1); + return s.Run(); +} + +SegmentID HistoryDatabase::GetSegmentID(VisitID visit_id) { + sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT segment_id FROM visits WHERE id = ?")); + if (!s) { + NOTREACHED() << db_.GetErrorMessage(); + return 0; + } + + s.BindInt64(0, visit_id); + if (s.Step()) { + if (s.ColumnType(0) == sql::COLUMN_TYPE_NULL) + return 0; + else + return s.ColumnInt64(0); + } + return 0; +} + +base::Time HistoryDatabase::GetEarlyExpirationThreshold() { + if (!cached_early_expiration_threshold_.is_null()) + return cached_early_expiration_threshold_; + + int64 threshold; + if (!meta_table_.GetValue(kEarlyExpirationThresholdKey, &threshold)) { + // Set to a very early non-zero time, so it's before all history, but not + // zero to avoid re-retrieval. + threshold = 1L; + } + + cached_early_expiration_threshold_ = base::Time::FromInternalValue(threshold); + return cached_early_expiration_threshold_; +} + +void HistoryDatabase::UpdateEarlyExpirationThreshold(base::Time threshold) { + meta_table_.SetValue(kEarlyExpirationThresholdKey, + threshold.ToInternalValue()); + cached_early_expiration_threshold_ = threshold; +} + +sql::Connection& HistoryDatabase::GetDB() { + return db_; +} + +// Migration ------------------------------------------------------------------- + +sql::InitStatus HistoryDatabase::EnsureCurrentVersion( + const FilePath& tmp_bookmarks_path) { + // We can't read databases newer than we were designed for. + if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) { + LOG(WARNING) << "History database is too new."; + return sql::INIT_TOO_NEW; + } + + // NOTICE: If you are changing structures for things shared with the archived + // history file like URLs, visits, or downloads, that will need migration as + // well. Instead of putting such migration code in this class, it should be + // in the corresponding file (url_database.cc, etc.) and called from here and + // from the archived_database.cc. + + int cur_version = meta_table_.GetVersionNumber(); + + // Put migration code here + + if (cur_version == 15) { + if (!MigrateBookmarksToFile(tmp_bookmarks_path) || + !DropStarredIDFromURLs()) { + LOG(WARNING) << "Unable to update history database to version 16."; + return sql::INIT_FAILURE; + } + ++cur_version; + meta_table_.SetVersionNumber(cur_version); + meta_table_.SetCompatibleVersionNumber( + std::min(cur_version, kCompatibleVersionNumber)); + } + + if (cur_version == 16) { +#if !defined(OS_WIN) + // In this version we bring the time format on Mac & Linux in sync with the + // Windows version so that profiles can be moved between computers. + MigrateTimeEpoch(); +#endif + // On all platforms we bump the version number, so on Windows this + // migration is a NOP. We keep the compatible version at 16 since things + // will basically still work, just history will be in the future if an + // old version reads it. + ++cur_version; + meta_table_.SetVersionNumber(cur_version); + } + + if (cur_version == 17) + needs_version_18_migration_ = true; + + if (!CommandLine::ForCurrentProcess()->HasSwitch(switches::kTopSites) && + cur_version == 18) { + // Set DB version back to pre-top sites. + cur_version = 17; + meta_table_.SetVersionNumber(cur_version); + } + + // When the version is too old, we just try to continue anyway, there should + // not be a released product that makes a database too old for us to handle. + LOG_IF(WARNING, (cur_version < GetCurrentVersion() && + !needs_version_18_migration_)) << + "History database version " << cur_version << " is too old to handle."; + + return sql::INIT_OK; +} + +#if !defined(OS_WIN) +void HistoryDatabase::MigrateTimeEpoch() { + // Update all the times in the URLs and visits table in the main database. + // For visits, clear the indexed flag since we'll delete the FTS databases in + // the next step. + db_.Execute( + "UPDATE urls " + "SET last_visit_time = last_visit_time + 11644473600000000 " + "WHERE id IN (SELECT id FROM urls WHERE last_visit_time > 0);"); + db_.Execute( + "UPDATE visits " + "SET visit_time = visit_time + 11644473600000000, is_indexed = 0 " + "WHERE id IN (SELECT id FROM visits WHERE visit_time > 0);"); + db_.Execute( + "UPDATE segment_usage " + "SET time_slot = time_slot + 11644473600000000 " + "WHERE id IN (SELECT id FROM segment_usage WHERE time_slot > 0);"); + + // Erase all the full text index files. These will take a while to update and + // are less important, so we just blow them away. Same with the archived + // database. + needs_version_17_migration_ = true; +} +#endif + +void HistoryDatabase::MigrationToTopSitesDone() { + // We should be migrating from 17 to 18. + DCHECK_EQ(17, meta_table_.GetVersionNumber()); + meta_table_.SetVersionNumber(18); + needs_version_18_migration_ = false; +} + +} // namespace history diff --git a/chrome/browser/history/history_database.h b/chrome/browser/history/history_database.h new file mode 100644 index 0000000..3b3414a --- /dev/null +++ b/chrome/browser/history/history_database.h @@ -0,0 +1,188 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_HISTORY_DATABASE_H_ +#define CHROME_BROWSER_HISTORY_HISTORY_DATABASE_H_ + +#include "app/sql/connection.h" +#include "app/sql/init_status.h" +#include "app/sql/meta_table.h" +#include "build/build_config.h" +#include "chrome/browser/history/download_database.h" +#include "chrome/browser/history/history_types.h" +#include "chrome/browser/history/starred_url_database.h" +#include "chrome/browser/history/url_database.h" +#include "chrome/browser/history/visit_database.h" +#include "chrome/browser/history/visitsegment_database.h" + +class FilePath; + +namespace history { + +// Forward declaration for the temporary migration code in Init(). +class TextDatabaseManager; + +// Encapsulates the SQL connection for the history database. This class holds +// the database connection and has methods the history system (including full +// text search) uses for writing and retrieving information. +// +// We try to keep most logic out of the history database; this should be seen +// as the storage interface. Logic for manipulating this storage layer should +// be in HistoryBackend.cc. +class HistoryDatabase : public DownloadDatabase, + // TODO(sky): See if we can nuke StarredURLDatabase and just create on the + // stack for migration. Then HistoryDatabase would directly descend from + // URLDatabase. + public StarredURLDatabase, + public VisitDatabase, + public VisitSegmentDatabase { + public: + // A simple class for scoping a history database transaction. This does not + // support rollback since the history database doesn't, either. + class TransactionScoper { + public: + explicit TransactionScoper(HistoryDatabase* db) : db_(db) { + db_->BeginTransaction(); + } + ~TransactionScoper() { + db_->CommitTransaction(); + } + private: + HistoryDatabase* db_; + }; + + // Must call Init() to complete construction. Although it can be created on + // any thread, it must be destructed on the history thread for proper + // database cleanup. + HistoryDatabase(); + + virtual ~HistoryDatabase(); + + // Must call this function to complete initialization. Will return true on + // success. On false, no other function should be called. You may want to call + // BeginExclusiveMode after this when you are ready. + sql::InitStatus Init(const FilePath& history_name, + const FilePath& tmp_bookmarks_path); + + // Call to set the mode on the database to exclusive. The default locking mode + // is "normal" but we want to run in exclusive mode for slightly better + // performance since we know nobody else is using the database. This is + // separate from Init() since the in-memory database attaches to slurp the + // data out, and this can't happen in exclusive mode. + void BeginExclusiveMode(); + + // Returns the current version that we will generate history databases with. + static int GetCurrentVersion(); + + // Transactions on the history database. Use the Transaction object above + // for most work instead of these directly. We support nested transactions + // and only commit when the outermost transaction is committed. This means + // that it is impossible to rollback a specific transaction. We could roll + // back the outermost transaction if any inner one is rolled back, but it + // turns out we don't really need this type of integrity for the history + // database, so we just don't support it. + void BeginTransaction(); + void CommitTransaction(); + int transaction_nesting() const { // for debugging and assertion purposes + return db_.transaction_nesting(); + } + + // Drops all tables except the URL, and download tables, and recreates them + // from scratch. This is done to rapidly clean up stuff when deleting all + // history. It is faster and less likely to have problems that deleting all + // rows in the tables. + // + // We don't delete the downloads table, since there may be in progress + // downloads. We handle the download history clean up separately in: + // DownloadManager::RemoveDownloadsFromHistoryBetween. + // + // Returns true on success. On failure, the caller should assume that the + // database is invalid. There could have been an error recreating a table. + // This should be treated the same as an init failure, and the database + // should not be used any more. + // + // This will also recreate the supplementary URL indices, since these + // indices won't be created automatically when using the temporary URL + // table (what the caller does right before calling this). + bool RecreateAllTablesButURL(); + + // Vacuums the database. This will cause sqlite to defragment and collect + // unused space in the file. It can be VERY SLOW. + void Vacuum(); + + // Returns true if the history backend should erase the full text search + // and archived history files as part of version 16 -> 17 migration. The + // time format changed in this revision, and these files would be much slower + // to migrate. Since the data is less important, they should be deleted. + // + // This flag will be valid after Init() is called. It will always be false + // when running on Windows. + bool needs_version_17_migration() const { + return needs_version_17_migration_; + } + + // Returns true if the Thumbnails database should be renamed to + // Favicons database. 17 -> 18 is migration to TopSites. ThumbnailsDatabase + // doesn't store the thumbnails any more, only the favicons. Hence, its file + // is renamed from Thumbnails to Favicons. + bool needs_version_18_migration() const { + return needs_version_18_migration_; + } + + // Update the database version after the TopSites migration. + void MigrationToTopSitesDone(); + + // Visit table functions ---------------------------------------------------- + + // Update the segment id of a visit. Return true on success. + bool SetSegmentID(VisitID visit_id, SegmentID segment_id); + + // Query the segment ID for the provided visit. Return 0 on failure or if the + // visit id wasn't found. + SegmentID GetSegmentID(VisitID visit_id); + + // Retrieves/Updates early expiration threshold, which specifies the earliest + // known point in history that may possibly to contain visits suitable for + // early expiration (AUTO_SUBFRAMES). + virtual base::Time GetEarlyExpirationThreshold(); + virtual void UpdateEarlyExpirationThreshold(base::Time threshold); + + private: + // Implemented for URLDatabase. + virtual sql::Connection& GetDB(); + + // Migration ----------------------------------------------------------------- + + // Makes sure the version is up-to-date, updating if necessary. If the + // database is too old to migrate, the user will be notified. In this case, or + // for other errors, false will be returned. True means it is up-to-date and + // ready for use. + // + // This assumes it is called from the init function inside a transaction. It + // may commit the transaction and start a new one if migration requires it. + sql::InitStatus EnsureCurrentVersion(const FilePath& tmp_bookmarks_path); + +#if !defined(OS_WIN) + // Converts the time epoch in the database from being 1970-based to being + // 1601-based which corresponds to the change in Time.internal_value_. + void MigrateTimeEpoch(); +#endif + + // --------------------------------------------------------------------------- + + sql::Connection db_; + sql::MetaTable meta_table_; + + base::Time cached_early_expiration_threshold_; + + // See the getters above. + bool needs_version_17_migration_; + bool needs_version_18_migration_; + + DISALLOW_COPY_AND_ASSIGN(HistoryDatabase); +}; + +} // history + +#endif // CHROME_BROWSER_HISTORY_HISTORY_DATABASE_H_ diff --git a/chrome/browser/history/history_indexer.idl b/chrome/browser/history/history_indexer.idl new file mode 100644 index 0000000..8fdbf36 --- /dev/null +++ b/chrome/browser/history/history_indexer.idl @@ -0,0 +1,57 @@ +// Copyright (c) 2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import "oaidl.idl"; +import "ocidl.idl"; + +[ + object, + uuid(9C1100DD-51D4-4827-AE9F-3B8FAC4AED72), + oleautomation, + nonextensible, + pointer_default(unique) +] +interface IChromeHistoryIndexer : IUnknown { + // This is the method called by Chrome to send content and thumbnail of the + // page to be indexed. The html content and thumbnail for the same url + // are sent at different points in time. The thumbnail_format and + // thumbnail parameters will be NULL when sending only the content. + // |time| - The last time at which user visited the page. The time is in UTC. + // |url| - The url of the page being published for indexing. + // |html| - The html content of the page being published for indexing. + // |title| - The url of the page being published for indexing. + // |thumbnail_format| - The format of the thumbnail image. It is currently + // "image/jpeg", indicating that the thumbail is in jpeg + // format. + // |thumbnail| - This is an array of bytes that represents the thumbnail in + // the format specified by the "thumbnail_format" parameter. + HRESULT SendPageData([in] VARIANT time, + [in] BSTR url, + [in] BSTR html, + [in] BSTR title, + [in] BSTR thumbnail_format, + [in] VARIANT thumbnail); + + // This method is called by Chrome when the users delete their history. + // |begin_time| - Represents the start time from which the history needs to be + // deleted. It is given in UTC. + // |end_time| - Represents the end time until when the history needs to be + // deleted. It is given in UTC + // If both begin_time and end_time are '0', full user history needs to be + // deleted. + HRESULT DeleteUserHistoryBetween([in] VARIANT begin_time, + [in] VARIANT end_time); +}; + + +// This dummy library statement enforces the creation of a history_indexer.tlb. +// This is necessary since MSVC assumes a .idl always creates a .tlb. Otherwise, +// this .idl is always recompiled, giving many engs a headache. +[ + uuid(A5C5B8BE-E7E5-4cb9-A13B-B063361E7B6D), + helpstring("Dummy library") +] +library history_indexerLib +{ +}; diff --git a/chrome/browser/history/history_marshaling.h b/chrome/browser/history/history_marshaling.h new file mode 100644 index 0000000..39b8983 --- /dev/null +++ b/chrome/browser/history/history_marshaling.h @@ -0,0 +1,140 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Data structures for communication between the history service on the main +// thread and the backend on the history thread. + +#ifndef CHROME_BROWSER_HISTORY_HISTORY_MARSHALING_H__ +#define CHROME_BROWSER_HISTORY_HISTORY_MARSHALING_H__ + +#include "base/scoped_vector.h" +#include "chrome/browser/cancelable_request.h" +#include "chrome/browser/favicon_service.h" +#include "chrome/browser/history/history.h" +#include "chrome/browser/history/page_usage_data.h" + +namespace history { + +// Navigation ----------------------------------------------------------------- + +// Marshalling structure for AddPage. +class HistoryAddPageArgs + : public base::RefCountedThreadSafe<HistoryAddPageArgs> { + public: + HistoryAddPageArgs(const GURL& arg_url, + base::Time arg_time, + const void* arg_id_scope, + int32 arg_page_id, + const GURL& arg_referrer, + const history::RedirectList& arg_redirects, + PageTransition::Type arg_transition, + bool arg_did_replace_entry) + : url(arg_url), + time(arg_time), + id_scope(arg_id_scope), + page_id(arg_page_id), + referrer(arg_referrer), + redirects(arg_redirects), + transition(arg_transition), + did_replace_entry(arg_did_replace_entry) { + } + + GURL url; + base::Time time; + + const void* id_scope; + int32 page_id; + + GURL referrer; + history::RedirectList redirects; + PageTransition::Type transition; + bool did_replace_entry; + + private: + friend class base::RefCountedThreadSafe<HistoryAddPageArgs>; + + ~HistoryAddPageArgs() {} + + DISALLOW_COPY_AND_ASSIGN(HistoryAddPageArgs); +}; + +// Querying ------------------------------------------------------------------- + +typedef CancelableRequest1<HistoryService::QueryURLCallback, + Tuple2<URLRow, VisitVector> > + QueryURLRequest; + +typedef CancelableRequest1<HistoryService::QueryHistoryCallback, + QueryResults> + QueryHistoryRequest; + +typedef CancelableRequest1<HistoryService::QueryRedirectsCallback, + history::RedirectList> + QueryRedirectsRequest; + +typedef CancelableRequest<HistoryService::GetVisitCountToHostCallback> + GetVisitCountToHostRequest; + +typedef CancelableRequest1<HistoryService::QueryTopURLsAndRedirectsCallback, + Tuple2<std::vector<GURL>, + history::RedirectMap> > + QueryTopURLsAndRedirectsRequest; + +typedef CancelableRequest1<HistoryService::QueryMostVisitedURLsCallback, + history::MostVisitedURLList> + QueryMostVisitedURLsRequest; + +// Thumbnails ----------------------------------------------------------------- + +typedef CancelableRequest<HistoryService::ThumbnailDataCallback> + GetPageThumbnailRequest; + +// Favicons ------------------------------------------------------------------- + +typedef CancelableRequest<FaviconService::FaviconDataCallback> + GetFavIconRequest; + +// Downloads ------------------------------------------------------------------ + +typedef CancelableRequest1<HistoryService::DownloadQueryCallback, + std::vector<DownloadCreateInfo> > + DownloadQueryRequest; + +typedef CancelableRequest<HistoryService::DownloadCreateCallback> + DownloadCreateRequest; + +typedef CancelableRequest1<HistoryService::DownloadSearchCallback, + std::vector<int64> > + DownloadSearchRequest; + +// Deletion -------------------------------------------------------------------- + +typedef CancelableRequest<HistoryService::ExpireHistoryCallback> + ExpireHistoryRequest; + +// Segment usage -------------------------------------------------------------- + +typedef CancelableRequest1<HistoryService::SegmentQueryCallback, + ScopedVector<PageUsageData> > + QuerySegmentUsageRequest; + +// Keyword search terms ------------------------------------------------------- + +typedef + CancelableRequest1<HistoryService::GetMostRecentKeywordSearchTermsCallback, + std::vector<KeywordSearchTermVisit> > + GetMostRecentKeywordSearchTermsRequest; + +// Generic operations --------------------------------------------------------- + +// The argument here is an input value, which is the task to run on the +// background thread. The callback is used to execute the portion of the task +// that executes on the main thread. +typedef CancelableRequest1<HistoryService::HistoryDBTaskCallback, + scoped_refptr<HistoryDBTask> > + HistoryDBTaskRequest; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_HISTORY_MARSHALING_H__ diff --git a/chrome/browser/history/history_notifications.h b/chrome/browser/history/history_notifications.h new file mode 100644 index 0000000..80fc9d5 --- /dev/null +++ b/chrome/browser/history/history_notifications.h @@ -0,0 +1,74 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Structs that hold data used in broadcasting notifications. + +#ifndef CHROME_BROWSER_HISTORY_HISTORY_NOTIFICATIONS_H__ +#define CHROME_BROWSER_HISTORY_HISTORY_NOTIFICATIONS_H__ + +#include <set> +#include <vector> + +#include "googleurl/src/gurl.h" +#include "chrome/browser/history/history_types.h" + +namespace history { + +// Base class for history notifications. This needs only a virtual destructor +// so that the history service's broadcaster can delete it when the request +// is complete. +struct HistoryDetails { + public: + virtual ~HistoryDetails() {} +}; + +// Details for HISTORY_URL_VISITED. +struct URLVisitedDetails : public HistoryDetails { + PageTransition::Type transition; + URLRow row; + + // A list of redirects leading up to the URL represented by this struct. If + // we have the redirect chain A -> B -> C and this struct represents visiting + // C, then redirects[0]=B and redirects[1]=A. If there are no redirects, + // this will be an empty vector. + history::RedirectList redirects; +}; + +// Details for NOTIFY_HISTORY_TYPED_URLS_MODIFIED. +struct URLsModifiedDetails : public HistoryDetails { + // Lists the information for each of the URLs affected. + std::vector<URLRow> changed_urls; +}; + +// Details for NOTIFY_HISTORY_URLS_DELETED. +struct URLsDeletedDetails : public HistoryDetails { + // Set when all history was deleted. False means just a subset was deleted. + bool all_history; + + // The list of unique URLs affected. This is valid only when a subset of + // history is deleted. When all of it is deleted, this will be empty, since + // we do not bother to list all URLs. + std::set<GURL> urls; +}; + +// Details for NOTIFY_URLS_STARRED. +struct URLsStarredDetails : public HistoryDetails { + explicit URLsStarredDetails(bool being_starred) : starred(being_starred) {} + + // The new starred state of the list of URLs. True when they are being + // starred, false when they are being unstarred. + bool starred; + + // The list of URLs that are changing. + std::set<GURL> changed_urls; +}; + +// Details for NOTIFY_FAVICON_CHANGED. +struct FavIconChangeDetails : public HistoryDetails { + std::set<GURL> urls; +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_HISTORY_NOTIFICATIONS_H__ diff --git a/chrome/browser/history/history_publisher.cc b/chrome/browser/history/history_publisher.cc new file mode 100644 index 0000000..0392632 --- /dev/null +++ b/chrome/browser/history/history_publisher.cc @@ -0,0 +1,46 @@ +// Copyright (c) 2008-2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/history_publisher.h" + +#include "base/utf_string_conversions.h" + +namespace history { + +const char* const HistoryPublisher::kThumbnailImageFormat = "image/jpeg"; + +void HistoryPublisher::PublishPageThumbnail( + const std::vector<unsigned char>& thumbnail, const GURL& url, + const base::Time& time) const { + PageData page_data = { + time, + url, + NULL, + NULL, + kThumbnailImageFormat, + &thumbnail, + }; + + PublishDataToIndexers(page_data); +} + +void HistoryPublisher::PublishPageContent(const base::Time& time, + const GURL& url, + const string16& title, + const string16& contents) const { + std::wstring wide_title = UTF16ToWide(title); + std::wstring wide_contents = UTF16ToWide(contents); + PageData page_data = { + time, + url, + wide_contents.c_str(), + wide_title.c_str(), + NULL, + NULL, + }; + + PublishDataToIndexers(page_data); +} + +} // namespace history diff --git a/chrome/browser/history/history_publisher.h b/chrome/browser/history/history_publisher.h new file mode 100644 index 0000000..5fafc3e --- /dev/null +++ b/chrome/browser/history/history_publisher.h @@ -0,0 +1,84 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_HISTORY_PUBLISHER_H_ +#define CHROME_BROWSER_HISTORY_HISTORY_PUBLISHER_H_ + +#include <vector> +#include <string> + +#include "base/basictypes.h" +#include "base/string16.h" + +#if defined(OS_WIN) +#include "base/scoped_comptr_win.h" +#include "history_indexer.h" +#endif + +class GURL; + +namespace base { +class Time; +} + +namespace history { + +class HistoryPublisher { + public: + HistoryPublisher(); + ~HistoryPublisher(); + + // Must call this function to complete initialization. Returns true if we + // need to publish data to any indexers registered with us. Returns false if + // there are none registered. On false, no other function should be called. + bool Init(); + + void PublishPageThumbnail(const std::vector<unsigned char>& thumbnail, + const GURL& url, const base::Time& time) const; + void PublishPageContent(const base::Time& time, const GURL& url, + const string16& title, + const string16& contents) const; + void DeleteUserHistoryBetween(const base::Time& begin_time, + const base::Time& end_time) const; + + private: + struct PageData { + const base::Time& time; + const GURL& url; + const wchar_t* html; + const wchar_t* title; + const char* thumbnail_format; + const std::vector<unsigned char>* thumbnail; + }; + + void PublishDataToIndexers(const PageData& page_data) const; + +#if defined(OS_WIN) + // Initializes the indexer_list_ with the list of indexers that registered + // with us to index history. Returns true if there are any registered. + bool ReadRegisteredIndexersFromRegistry(); + + // Converts time represented by the Time class object to variant time in UTC. + // Returns '0' if the time object is NULL. + static double TimeToUTCVariantTime(const base::Time& time); + + typedef std::vector< ScopedComPtr<IChromeHistoryIndexer> > IndexerList; + + // The list of indexers registered to receive history data from us. + IndexerList indexers_; + + // The Registry key under HKCU where the indexers need to register their + // CLSID. + static const wchar_t* const kRegKeyRegisteredIndexersInfo; +#endif + + // The format of the thumbnail we pass to indexers. + static const char* const kThumbnailImageFormat; + + DISALLOW_COPY_AND_ASSIGN(HistoryPublisher); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_HISTORY_PUBLISHER_H_ diff --git a/chrome/browser/history/history_publisher_none.cc b/chrome/browser/history/history_publisher_none.cc new file mode 100644 index 0000000..2a164bf --- /dev/null +++ b/chrome/browser/history/history_publisher_none.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// A stub implementation of HistoryPublisher used to provide needed symbols. +// For now there is no equivalent of this functionality on systems other than +// Windows. + +#include "chrome/browser/history/history_publisher.h" + +#include "base/time.h" + +namespace history { + +HistoryPublisher::HistoryPublisher() { +} + +HistoryPublisher::~HistoryPublisher() { +} + +bool HistoryPublisher::Init() { + return false; +} + +void HistoryPublisher::PublishDataToIndexers(const PageData& page_data) + const { +} + +void HistoryPublisher::DeleteUserHistoryBetween(const base::Time& begin_time, + const base::Time& end_time) + const { +} + +} // namespace history diff --git a/chrome/browser/history/history_publisher_win.cc b/chrome/browser/history/history_publisher_win.cc new file mode 100644 index 0000000..cbde619 --- /dev/null +++ b/chrome/browser/history/history_publisher_win.cc @@ -0,0 +1,139 @@ +// Copyright (c) 2008-2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/history_publisher.h" + +#include <atlsafe.h> +#include <objbase.h> +#include <oleauto.h> +#include <wtypes.h> + +#include "base/registry.h" +#include "base/scoped_bstr_win.h" +#include "base/scoped_comptr_win.h" +#include "base/scoped_variant_win.h" +#include "base/string_util.h" +#include "base/time.h" +#include "googleurl/src/gurl.h" + +namespace { + +// Instantiates a IChromeHistoryIndexer COM object. Takes a COM class id +// in |name| and returns the object in |indexer|. Returns false if the +// operation fails. +bool CoCreateIndexerFromName(const wchar_t* name, + IChromeHistoryIndexer** indexer) { + CLSID clsid; + HRESULT hr = CLSIDFromString(const_cast<wchar_t*>(name), &clsid); + if (FAILED(hr)) + return false; + hr = CoCreateInstance(clsid, NULL, CLSCTX_INPROC, + __uuidof(IChromeHistoryIndexer), + reinterpret_cast<void**>(indexer)); + if (FAILED(hr)) + return false; + return true; +} + +// Instantiates the registered indexers from the registry |root| + |path| key +// and adds them to the |indexers| list. +void AddRegisteredIndexers(HKEY root, const wchar_t* path, + std::vector< ScopedComPtr<IChromeHistoryIndexer> >* indexers) { + IChromeHistoryIndexer* indexer; + RegistryKeyIterator r_iter(root, path); + while (r_iter.Valid()) { + if (CoCreateIndexerFromName(r_iter.Name(), &indexer)) { + indexers->push_back(ScopedComPtr<IChromeHistoryIndexer>(indexer)); + indexer->Release(); + } + ++r_iter; + } +} + +} // namespace + +namespace history { + +const wchar_t* const HistoryPublisher::kRegKeyRegisteredIndexersInfo = + L"Software\\Google\\Google Chrome\\IndexerPlugins"; + +// static +double HistoryPublisher::TimeToUTCVariantTime(const base::Time& time) { + double var_time = 0; + if (!time.is_null()) { + base::Time::Exploded exploded; + time.UTCExplode(&exploded); + + // Create the system time struct representing our exploded time. + SYSTEMTIME system_time; + system_time.wYear = exploded.year; + system_time.wMonth = exploded.month; + system_time.wDayOfWeek = exploded.day_of_week; + system_time.wDay = exploded.day_of_month; + system_time.wHour = exploded.hour; + system_time.wMinute = exploded.minute; + system_time.wSecond = exploded.second; + system_time.wMilliseconds = exploded.millisecond; + SystemTimeToVariantTime(&system_time, &var_time); + } + + return var_time; +} + +HistoryPublisher::HistoryPublisher() { + CoInitialize(NULL); +} + +HistoryPublisher::~HistoryPublisher() { + CoUninitialize(); +} + +bool HistoryPublisher::Init() { + return ReadRegisteredIndexersFromRegistry(); +} + +// Peruse the registry for Indexer to instantiate and store in |indexers_|. +// Return true if we found at least one indexer object. We look both in HKCU +// and HKLM. +bool HistoryPublisher::ReadRegisteredIndexersFromRegistry() { + AddRegisteredIndexers(HKEY_CURRENT_USER, + kRegKeyRegisteredIndexersInfo, &indexers_); + AddRegisteredIndexers(HKEY_LOCAL_MACHINE, + kRegKeyRegisteredIndexersInfo, &indexers_); + return indexers_.size() > 0; +} + +void HistoryPublisher::PublishDataToIndexers(const PageData& page_data) + const { + double var_time = TimeToUTCVariantTime(page_data.time); + + CComSafeArray<unsigned char> thumbnail_arr; + if (page_data.thumbnail) { + for (size_t i = 0; i < page_data.thumbnail->size(); ++i) + thumbnail_arr.Add((*page_data.thumbnail)[i]); + } + + // Send data to registered indexers. + ScopedVariant time(var_time, VT_DATE); + ScopedBstr url(ASCIIToWide(page_data.url.spec()).c_str()); + ScopedBstr html(page_data.html); + ScopedBstr title(page_data.title); + ScopedBstr format(ASCIIToWide(page_data.thumbnail_format).c_str()); + ScopedVariant psa(thumbnail_arr.m_psa); + for (size_t i = 0; i < indexers_.size(); ++i) { + indexers_[i]->SendPageData(time, url, html, title, format, psa); + } +} + +void HistoryPublisher::DeleteUserHistoryBetween(const base::Time& begin_time, + const base::Time& end_time) + const { + ScopedVariant var_begin_time(TimeToUTCVariantTime(begin_time), VT_DATE); + ScopedVariant var_end_time(TimeToUTCVariantTime(end_time), VT_DATE); + for (size_t i = 0; i < indexers_.size(); ++i) { + indexers_[i]->DeleteUserHistoryBetween(var_begin_time, var_end_time); + } +} + +} // namespace history diff --git a/chrome/browser/history/history_querying_unittest.cc b/chrome/browser/history/history_querying_unittest.cc new file mode 100644 index 0000000..7512786 --- /dev/null +++ b/chrome/browser/history/history_querying_unittest.cc @@ -0,0 +1,350 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/basictypes.h" +#include "base/callback.h" +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/history/history.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::Time; +using base::TimeDelta; + +// Tests the history service for querying functionality. + +namespace history { + +namespace { + +struct TestEntry { + const char* url; + const char* title; + const int days_ago; + const char* body; + Time time; // Filled by SetUp. +} test_entries[] = { + // This one is visited super long ago so it will be in a different database + // from the next appearance of it at the end. + {"http://example.com/", "Other", 180, "Other"}, + + // These are deliberately added out of chronological order. The history + // service should sort them by visit time when returning query results. + // The correct index sort order is 4 2 3 1 0. + {"http://www.google.com/1", "Title 1", 10, + "PAGEONE FOO some body text"}, + {"http://www.google.com/3", "Title 3", 8, + "PAGETHREE BAR some hello world for you"}, + {"http://www.google.com/2", "Title 2", 9, + "PAGETWO FOO some more blah blah blah"}, + + // A more recent visit of the first one. + {"http://example.com/", "Other", 6, "Other"}, +}; + +// Returns true if the nth result in the given results set matches. It will +// return false on a non-match or if there aren't enough results. +bool NthResultIs(const QueryResults& results, + int n, // Result index to check. + int test_entry_index) { // Index of test_entries to compare. + if (static_cast<int>(results.size()) <= n) + return false; + + const URLResult& result = results[n]; + + // Check the visit time. + if (result.visit_time() != test_entries[test_entry_index].time) + return false; + + // Now check the URL & title. + return result.url() == GURL(test_entries[test_entry_index].url) && + result.title() == UTF8ToUTF16(test_entries[test_entry_index].title); +} + +} // namespace + +class HistoryQueryTest : public testing::Test { + public: + HistoryQueryTest() { + } + + // Acts like a synchronous call to history's QueryHistory. + void QueryHistory(const std::string& text_query, + const QueryOptions& options, + QueryResults* results) { + history_->QueryHistory(UTF8ToUTF16(text_query), options, &consumer_, + NewCallback(this, &HistoryQueryTest::QueryHistoryComplete)); + MessageLoop::current()->Run(); // Will go until ...Complete calls Quit. + results->Swap(&last_query_results_); + } + + protected: + scoped_refptr<HistoryService> history_; + + private: + virtual void SetUp() { + FilePath temp_dir; + PathService::Get(base::DIR_TEMP, &temp_dir); + history_dir_ = temp_dir.AppendASCII("HistoryTest"); + file_util::Delete(history_dir_, true); + file_util::CreateDirectory(history_dir_); + + history_ = new HistoryService; + if (!history_->Init(history_dir_, NULL)) { + history_ = NULL; // Tests should notice this NULL ptr & fail. + return; + } + + // Fill the test data. + Time now = Time::Now().LocalMidnight(); + for (size_t i = 0; i < arraysize(test_entries); i++) { + test_entries[i].time = + now - (test_entries[i].days_ago * TimeDelta::FromDays(1)); + + // We need the ID scope and page ID so that the visit tracker can find it. + const void* id_scope = reinterpret_cast<void*>(1); + int32 page_id = i; + GURL url(test_entries[i].url); + + history_->AddPage(url, test_entries[i].time, id_scope, page_id, GURL(), + PageTransition::LINK, history::RedirectList(), + false); + history_->SetPageTitle(url, UTF8ToUTF16(test_entries[i].title)); + history_->SetPageContents(url, UTF8ToUTF16(test_entries[i].body)); + } + } + + virtual void TearDown() { + if (history_.get()) { + history_->SetOnBackendDestroyTask(new MessageLoop::QuitTask); + history_->Cleanup(); + history_ = NULL; + MessageLoop::current()->Run(); // Wait for the other thread. + } + file_util::Delete(history_dir_, true); + } + + void QueryHistoryComplete(HistoryService::Handle, QueryResults* results) { + results->Swap(&last_query_results_); + MessageLoop::current()->Quit(); // Will return out to QueryHistory. + } + + MessageLoop message_loop_; + + FilePath history_dir_; + + CancelableRequestConsumer consumer_; + + // The QueryHistoryComplete callback will put the results here so QueryHistory + // can return them. + QueryResults last_query_results_; + + DISALLOW_COPY_AND_ASSIGN(HistoryQueryTest); +}; + +TEST_F(HistoryQueryTest, Basic) { + ASSERT_TRUE(history_.get()); + + QueryOptions options; + QueryResults results; + + // Test duplicate collapsing. + QueryHistory(std::string(), options, &results); + EXPECT_EQ(4U, results.size()); + EXPECT_TRUE(NthResultIs(results, 0, 4)); + EXPECT_TRUE(NthResultIs(results, 1, 2)); + EXPECT_TRUE(NthResultIs(results, 2, 3)); + EXPECT_TRUE(NthResultIs(results, 3, 1)); + + // Next query a time range. The beginning should be inclusive, the ending + // should be exclusive. + options.begin_time = test_entries[3].time; + options.end_time = test_entries[2].time; + QueryHistory(std::string(), options, &results); + EXPECT_EQ(1U, results.size()); + EXPECT_TRUE(NthResultIs(results, 0, 3)); +} + +// Tests max_count feature for basic (non-Full Text Search) queries. +TEST_F(HistoryQueryTest, BasicCount) { + ASSERT_TRUE(history_.get()); + + QueryOptions options; + QueryResults results; + + // Query all time but with a limit on the number of entries. We should + // get the N most recent entries. + options.max_count = 2; + QueryHistory(std::string(), options, &results); + EXPECT_EQ(2U, results.size()); + EXPECT_TRUE(NthResultIs(results, 0, 4)); + EXPECT_TRUE(NthResultIs(results, 1, 2)); +} + +TEST_F(HistoryQueryTest, ReachedBeginning) { + ASSERT_TRUE(history_.get()); + + QueryOptions options; + QueryResults results; + + QueryHistory(std::string(), options, &results); + EXPECT_TRUE(results.reached_beginning()); + + options.begin_time = test_entries[1].time; + QueryHistory(std::string(), options, &results); + EXPECT_FALSE(results.reached_beginning()); + + options.begin_time = test_entries[0].time + TimeDelta::FromMicroseconds(1); + QueryHistory(std::string(), options, &results); + EXPECT_FALSE(results.reached_beginning()); + + options.begin_time = test_entries[0].time; + QueryHistory(std::string(), options, &results); + EXPECT_TRUE(results.reached_beginning()); + + options.begin_time = test_entries[0].time - TimeDelta::FromMicroseconds(1); + QueryHistory(std::string(), options, &results); + EXPECT_TRUE(results.reached_beginning()); +} + +// This does most of the same tests above, but searches for a FTS string that +// will match the pages in question. This will trigger a different code path. +TEST_F(HistoryQueryTest, FTS) { + ASSERT_TRUE(history_.get()); + + QueryOptions options; + QueryResults results; + + // Query all of them to make sure they are there and in order. Note that + // this query will return the starred item twice since we requested all + // starred entries and no de-duping. + QueryHistory("some", options, &results); + EXPECT_EQ(3U, results.size()); + EXPECT_TRUE(NthResultIs(results, 0, 2)); + EXPECT_TRUE(NthResultIs(results, 1, 3)); + EXPECT_TRUE(NthResultIs(results, 2, 1)); + + // Do a query that should only match one of them. + QueryHistory("PAGETWO", options, &results); + EXPECT_EQ(1U, results.size()); + EXPECT_TRUE(NthResultIs(results, 0, 3)); + + // Next query a time range. The beginning should be inclusive, the ending + // should be exclusive. + options.begin_time = test_entries[1].time; + options.end_time = test_entries[3].time; + QueryHistory("some", options, &results); + EXPECT_EQ(1U, results.size()); + EXPECT_TRUE(NthResultIs(results, 0, 1)); +} + +// Searches titles. +TEST_F(HistoryQueryTest, FTSTitle) { + ASSERT_TRUE(history_.get()); + + QueryOptions options; + QueryResults results; + + // Query all time but with a limit on the number of entries. We should + // get the N most recent entries. + QueryHistory("title", options, &results); + EXPECT_EQ(3U, results.size()); + EXPECT_TRUE(NthResultIs(results, 0, 2)); + EXPECT_TRUE(NthResultIs(results, 1, 3)); + EXPECT_TRUE(NthResultIs(results, 2, 1)); +} + +// Tests prefix searching for Full Text Search queries. +TEST_F(HistoryQueryTest, FTSPrefix) { + ASSERT_TRUE(history_.get()); + + QueryOptions options; + QueryResults results; + + // Query with a prefix search. Should return matches for "PAGETWO" and + // "PAGETHREE". + QueryHistory("PAGET", options, &results); + EXPECT_EQ(2U, results.size()); + EXPECT_TRUE(NthResultIs(results, 0, 2)); + EXPECT_TRUE(NthResultIs(results, 1, 3)); +} + +// Tests max_count feature for Full Text Search queries. +TEST_F(HistoryQueryTest, FTSCount) { + ASSERT_TRUE(history_.get()); + + QueryOptions options; + QueryResults results; + + // Query all time but with a limit on the number of entries. We should + // get the N most recent entries. + options.max_count = 2; + QueryHistory("some", options, &results); + EXPECT_EQ(2U, results.size()); + EXPECT_TRUE(NthResultIs(results, 0, 2)); + EXPECT_TRUE(NthResultIs(results, 1, 3)); + + // Now query a subset of the pages and limit by N items. "FOO" should match + // the 2nd & 3rd pages, but we should only get the 3rd one because of the one + // page max restriction. + options.max_count = 1; + QueryHistory("FOO", options, &results); + EXPECT_EQ(1U, results.size()); + EXPECT_TRUE(NthResultIs(results, 0, 3)); +} + +// Tests that FTS queries can find URLs when they exist only in the archived +// database. This also tests that imported URLs can be found, since we use +// AddPageWithDetails just like the importer. +TEST_F(HistoryQueryTest, FTSArchived) { + ASSERT_TRUE(history_.get()); + + std::vector<URLRow> urls_to_add; + + URLRow row1(GURL("http://foo.bar/")); + row1.set_title(UTF8ToUTF16("archived title")); + row1.set_last_visit(Time::Now() - TimeDelta::FromDays(365)); + urls_to_add.push_back(row1); + + URLRow row2(GURL("http://foo.bar/")); + row2.set_title(UTF8ToUTF16("nonarchived title")); + row2.set_last_visit(Time::Now()); + urls_to_add.push_back(row2); + + history_->AddPagesWithDetails(urls_to_add); + + QueryOptions options; + QueryResults results; + + // Query all time. The title we get should be the one in the full text + // database and not the most current title (since otherwise highlighting in + // the title might be wrong). + QueryHistory("archived", options, &results); + ASSERT_EQ(1U, results.size()); + EXPECT_TRUE(row1.url() == results[0].url()); + EXPECT_TRUE(row1.title() == results[0].title()); +} + +/* TODO(brettw) re-enable this. It is commented out because the current history + code prohibits adding more than one indexed page with the same URL. When we + have tiered history, there could be a dupe in the archived history which + won't get picked up by the deletor and it can happen again. When this is the + case, we should fix this test to duplicate that situation. + +// Tests duplicate collapsing and not in Full Text Search situations. +TEST_F(HistoryQueryTest, FTSDupes) { + ASSERT_TRUE(history_.get()); + + QueryOptions options; + QueryResults results; + + QueryHistory("Other", options, &results); + EXPECT_EQ(1, results.urls().size()); + EXPECT_TRUE(NthResultIs(results, 0, 4)); +} +*/ + +} // namespace history diff --git a/chrome/browser/history/history_types.cc b/chrome/browser/history/history_types.cc new file mode 100644 index 0000000..50395aa --- /dev/null +++ b/chrome/browser/history/history_types.cc @@ -0,0 +1,240 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/history_types.h" + +#include <limits> + +#include "base/logging.h" +#include "base/stl_util-inl.h" + +using base::Time; + +namespace history { + +// URLRow ---------------------------------------------------------------------- + +void URLRow::Swap(URLRow* other) { + std::swap(id_, other->id_); + url_.Swap(&other->url_); + title_.swap(other->title_); + std::swap(visit_count_, other->visit_count_); + std::swap(typed_count_, other->typed_count_); + std::swap(last_visit_, other->last_visit_); + std::swap(hidden_, other->hidden_); + std::swap(favicon_id_, other->favicon_id_); +} + +void URLRow::Initialize() { + id_ = 0; + visit_count_ = false; + typed_count_ = false; + last_visit_ = Time(); + hidden_ = false; + favicon_id_ = 0; +} + +// VisitRow -------------------------------------------------------------------- + +VisitRow::VisitRow() + : visit_id(0), + url_id(0), + referring_visit(0), + transition(PageTransition::LINK), + segment_id(0), + is_indexed(false) { +} + +VisitRow::VisitRow(URLID arg_url_id, + Time arg_visit_time, + VisitID arg_referring_visit, + PageTransition::Type arg_transition, + SegmentID arg_segment_id) + : visit_id(0), + url_id(arg_url_id), + visit_time(arg_visit_time), + referring_visit(arg_referring_visit), + transition(arg_transition), + segment_id(arg_segment_id), + is_indexed(false) { +} + +// StarredEntry ---------------------------------------------------------------- + +StarredEntry::StarredEntry() + : id(0), + parent_group_id(0), + group_id(0), + visual_order(0), + type(URL), + url_id(0) { +} + +void StarredEntry::Swap(StarredEntry* other) { + std::swap(id, other->id); + title.swap(other->title); + std::swap(date_added, other->date_added); + std::swap(parent_group_id, other->parent_group_id); + std::swap(group_id, other->group_id); + std::swap(visual_order, other->visual_order); + std::swap(type, other->type); + url.Swap(&other->url); + std::swap(url_id, other->url_id); + std::swap(date_group_modified, other->date_group_modified); +} + +// URLResult ------------------------------------------------------------------- + +void URLResult::Swap(URLResult* other) { + URLRow::Swap(other); + std::swap(visit_time_, other->visit_time_); + snippet_.Swap(&other->snippet_); + title_match_positions_.swap(other->title_match_positions_); +} + +// QueryResults ---------------------------------------------------------------- + +QueryResults::QueryResults() : reached_beginning_(false) { +} + +QueryResults::~QueryResults() { + // Free all the URL objects. + STLDeleteContainerPointers(results_.begin(), results_.end()); +} + +const size_t* QueryResults::MatchesForURL(const GURL& url, + size_t* num_matches) const { + URLToResultIndices::const_iterator found = url_to_results_.find(url); + if (found == url_to_results_.end()) { + if (num_matches) + *num_matches = 0; + return NULL; + } + + // All entries in the map should have at least one index, otherwise it + // shouldn't be in the map. + DCHECK(found->second->size() > 0); + if (num_matches) + *num_matches = found->second->size(); + return &found->second->front(); +} + +void QueryResults::Swap(QueryResults* other) { + std::swap(first_time_searched_, other->first_time_searched_); + std::swap(reached_beginning_, other->reached_beginning_); + results_.swap(other->results_); + url_to_results_.swap(other->url_to_results_); +} + +void QueryResults::AppendURLBySwapping(URLResult* result) { + URLResult* new_result = new URLResult; + new_result->Swap(result); + + results_.push_back(new_result); + AddURLUsageAtIndex(new_result->url(), results_.size() - 1); +} + +void QueryResults::AppendResultsBySwapping(QueryResults* other, + bool remove_dupes) { + if (remove_dupes) { + // Delete all entries in the other array that are already in this one. + for (size_t i = 0; i < results_.size(); i++) + other->DeleteURL(results_[i]->url()); + } + + if (first_time_searched_ > other->first_time_searched_) + std::swap(first_time_searched_, other->first_time_searched_); + + if (reached_beginning_ != other->reached_beginning_) + std::swap(reached_beginning_, other->reached_beginning_); + + for (size_t i = 0; i < other->results_.size(); i++) { + // Just transfer pointer ownership. + results_.push_back(other->results_[i]); + AddURLUsageAtIndex(results_.back()->url(), results_.size() - 1); + } + + // We just took ownership of all the results in the input vector. + other->results_.clear(); + other->url_to_results_.clear(); +} + +void QueryResults::DeleteURL(const GURL& url) { + // Delete all instances of this URL. We re-query each time since each + // mutation will cause the indices to change. + while (const size_t* match_indices = MatchesForURL(url, NULL)) + DeleteRange(*match_indices, *match_indices); +} + +void QueryResults::DeleteRange(size_t begin, size_t end) { + DCHECK(begin <= end && begin < size() && end < size()); + + // First delete the pointers in the given range and store all the URLs that + // were modified. We will delete references to these later. + std::set<GURL> urls_modified; + for (size_t i = begin; i <= end; i++) { + urls_modified.insert(results_[i]->url()); + delete results_[i]; + results_[i] = NULL; + } + + // Now just delete that range in the vector en masse (the STL ending is + // exclusive, while ours is inclusive, hence the +1). + results_.erase(results_.begin() + begin, results_.begin() + end + 1); + + // Delete the indicies referencing the deleted entries. + for (std::set<GURL>::const_iterator url = urls_modified.begin(); + url != urls_modified.end(); ++url) { + URLToResultIndices::iterator found = url_to_results_.find(*url); + if (found == url_to_results_.end()) { + NOTREACHED(); + continue; + } + + // Need a signed loop type since we do -- which may take us to -1. + for (int match = 0; match < static_cast<int>(found->second->size()); + match++) { + if (found->second[match] >= begin && found->second[match] <= end) { + // Remove this referece from the list. + found->second->erase(found->second->begin() + match); + match--; + } + } + + // Clear out an empty lists if we just made one. + if (found->second->empty()) + url_to_results_.erase(found); + } + + // Shift all other indices over to account for the removed ones. + AdjustResultMap(end + 1, std::numeric_limits<size_t>::max(), + -static_cast<ptrdiff_t>(end - begin + 1)); +} + +void QueryResults::AddURLUsageAtIndex(const GURL& url, size_t index) { + URLToResultIndices::iterator found = url_to_results_.find(url); + if (found != url_to_results_.end()) { + // The URL is already in the list, so we can just append the new index. + found->second->push_back(index); + return; + } + + // Need to add a new entry for this URL. + StackVector<size_t, 4> new_list; + new_list->push_back(index); + url_to_results_[url] = new_list; +} + +void QueryResults::AdjustResultMap(size_t begin, size_t end, ptrdiff_t delta) { + for (URLToResultIndices::iterator i = url_to_results_.begin(); + i != url_to_results_.end(); ++i) { + for (size_t match = 0; match < i->second->size(); match++) { + size_t match_index = i->second[match]; + if (match_index >= begin && match_index <= end) + i->second[match] += delta; + } + } +} + +} // namespace history diff --git a/chrome/browser/history/history_types.h b/chrome/browser/history/history_types.h new file mode 100644 index 0000000..f7bc7fb --- /dev/null +++ b/chrome/browser/history/history_types.h @@ -0,0 +1,532 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_ +#define CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_ + +#include <map> +#include <set> +#include <string> +#include <vector> + +#include "base/basictypes.h" +#include "base/stack_container.h" +#include "base/string16.h" +#include "base/time.h" +#include "chrome/browser/history/snippet.h" +#include "chrome/common/page_transition_types.h" +#include "chrome/common/ref_counted_util.h" +#include "googleurl/src/gurl.h" + +namespace history { + +// Forward declaration for friend statements. +class HistoryBackend; +class URLDatabase; + +// Structure to hold redirect lists for URLs. For a redirect chain +// A -> B -> C, and entry in the map would look like "A => {B -> C}". +typedef std::map<GURL, scoped_refptr<RefCountedVector<GURL> > > RedirectMap; + +// Container for a list of URLs. +typedef std::vector<GURL> RedirectList; + +typedef int64 StarID; // Unique identifier for star entries. +typedef int64 UIStarID; // Identifier for star entries that come from the UI. +typedef int64 DownloadID; // Identifier for a download. +typedef int64 FavIconID; // For FavIcons. +typedef int64 SegmentID; // URL segments for the most visited view. + +// URLRow --------------------------------------------------------------------- + +typedef int64 URLID; + +// Holds all information globally associated with one URL (one row in the +// URL table). +// +// This keeps track of dirty bits, which are currently unused: +// +// TODO(brettw) the dirty bits are broken in a number of respects. First, the +// database will want to update them on a const object, so they need to be +// mutable. +// +// Second, there is a problem copying. If you make a copy of this structure +// (as we allow since we put this into vectors in various places) then the +// dirty bits will not be in sync for these copies. +class URLRow { + public: + URLRow() { + Initialize(); + } + + explicit URLRow(const GURL& url) : url_(url) { + // Initialize will not set the URL, so our initialization above will stay. + Initialize(); + } + + // We need to be able to set the id of a URLRow that's being passed through + // an IPC message. This constructor should probably not be used otherwise. + URLRow(const GURL& url, URLID id) : url_(url) { + // Initialize will not set the URL, so our initialization above will stay. + Initialize(); + // Initialize will zero the id_, so set it here. + id_ = id; + } + + virtual ~URLRow() {} + + URLID id() const { return id_; } + const GURL& url() const { return url_; } + + const string16& title() const { + return title_; + } + void set_title(const string16& title) { + // The title is frequently set to the same thing, so we don't bother + // updating unless the string has changed. + if (title != title_) { + title_ = title; + } + } + + int visit_count() const { + return visit_count_; + } + void set_visit_count(int visit_count) { + visit_count_ = visit_count; + } + + // Number of times the URL was typed in the Omnibox. + int typed_count() const { + return typed_count_; + } + void set_typed_count(int typed_count) { + typed_count_ = typed_count; + } + + base::Time last_visit() const { + return last_visit_; + } + void set_last_visit(base::Time last_visit) { + last_visit_ = last_visit; + } + + // If this is set, we won't autocomplete this URL. + bool hidden() const { + return hidden_; + } + void set_hidden(bool hidden) { + hidden_ = hidden; + } + + // ID of the favicon. A value of 0 means the favicon isn't known yet. + FavIconID favicon_id() const { return favicon_id_; } + void set_favicon_id(FavIconID favicon_id) { + favicon_id_ = favicon_id; + } + + // Swaps the contents of this URLRow with another, which allows it to be + // destructively copied without memory allocations. + // (Virtual because it's overridden by URLResult.) + virtual void Swap(URLRow* other); + + private: + // This class writes directly into this structure and clears our dirty bits + // when reading out of the DB. + friend class URLDatabase; + friend class HistoryBackend; + + // Initializes all values that need initialization to their defaults. + // This excludes objects which autoinitialize such as strings. + void Initialize(); + + // The row ID of this URL. Immutable except for the database which sets it + // when it pulls them out. + URLID id_; + + // The URL of this row. Immutable except for the database which sets it + // when it pulls them out. If clients want to change it, they must use + // the constructor to make a new one. + GURL url_; + + string16 title_; + + // Total number of times this URL has been visited. + int visit_count_; + + // Number of times this URL has been manually entered in the URL bar. + int typed_count_; + + // The date of the last visit of this URL, which saves us from having to + // loop up in the visit table for things like autocomplete and expiration. + base::Time last_visit_; + + // Indicates this entry should now be shown in typical UI or queries, this + // is usually for subframes. + bool hidden_; + + // The ID of the favicon for this url. + FavIconID favicon_id_; + + // We support the implicit copy constuctor and operator=. +}; + +// VisitRow ------------------------------------------------------------------- + +typedef int64 VisitID; + +// Holds all information associated with a specific visit. A visit holds time +// and referrer information for one time a URL is visited. +class VisitRow { + public: + VisitRow(); + VisitRow(URLID arg_url_id, + base::Time arg_visit_time, + VisitID arg_referring_visit, + PageTransition::Type arg_transition, + SegmentID arg_segment_id); + + // ID of this row (visit ID, used a a referrer for other visits). + VisitID visit_id; + + // Row ID into the URL table of the URL that this page is. + URLID url_id; + + base::Time visit_time; + + // Indicates another visit that was the referring page for this one. + // 0 indicates no referrer. + VisitID referring_visit; + + // A combination of bits from PageTransition. + PageTransition::Type transition; + + // The segment id (see visitsegment_database.*). + // If 0, the segment id is null in the table. + SegmentID segment_id; + + // True when this visit has indexed data for it. We try to keep this in sync + // with the full text index: when we add or remove things from there, we will + // update the visit table as well. However, that file could get deleted, or + // out of sync in various ways, so this flag should be false when things + // change. + bool is_indexed; + + // Compares two visits based on dates, for sorting. + bool operator<(const VisitRow& other) { + return visit_time < other.visit_time; + } + + // We allow the implicit copy constuctor and operator=. +}; + +// We pass around vectors of visits a lot +typedef std::vector<VisitRow> VisitVector; + +// Favicons ------------------------------------------------------------------- + +// Used by the importer to set favicons for imported bookmarks. +struct ImportedFavIconUsage { + // The URL of the favicon. + GURL favicon_url; + + // The raw png-encoded data. + std::vector<unsigned char> png_data; + + // The list of URLs using this favicon. + std::set<GURL> urls; +}; + +// PageVisit ------------------------------------------------------------------ + +// Represents a simplified version of a visit for external users. Normally, +// views are only interested in the time, and not the other information +// associated with a VisitRow. +struct PageVisit { + URLID page_id; + base::Time visit_time; +}; + +// StarredEntry --------------------------------------------------------------- + +// StarredEntry represents either a starred page, or a star grouping (where +// a star grouping consists of child starred entries). Use the type to +// determine the type of a particular entry. +// +// The database internally uses the id field to uniquely identify a starred +// entry. On the other hand, the UI, which is anything routed through +// HistoryService and HistoryBackend (including BookmarkBarView), uses the +// url field to uniquely identify starred entries of type URL and the group_id +// field to uniquely identify starred entries of type USER_GROUP. For example, +// HistoryService::UpdateStarredEntry identifies the entry by url (if the +// type is URL) or group_id (if the type is not URL). +struct StarredEntry { + enum Type { + // Type represents a starred URL (StarredEntry). + URL, + + // The bookmark bar grouping. + BOOKMARK_BAR, + + // User created group. + USER_GROUP, + + // The "other bookmarks" folder that holds uncategorized bookmarks. + OTHER + }; + + StarredEntry(); + + void Swap(StarredEntry* other); + + // Unique identifier of this entry. + StarID id; + + // Title. + string16 title; + + // When this was added. + base::Time date_added; + + // Group ID of the star group this entry is in. If 0, this entry is not + // in a star group. + UIStarID parent_group_id; + + // Unique identifier for groups. This is assigned by the UI. + // + // WARNING: this is NOT the same as id, id is assigned by the database, + // this is assigned by the UI. See note about StarredEntry for more info. + UIStarID group_id; + + // Visual order within the parent. Only valid if group_id is not 0. + int visual_order; + + // Type of this entry (see enum). + Type type; + + // If type == URL, this is the URL of the page that was starred. + GURL url; + + // If type == URL, this is the ID of the URL of the primary page that was + // starred. + history::URLID url_id; + + // Time the entry was last modified. This is only used for groups and + // indicates the last time a URL was added as a child to the group. + base::Time date_group_modified; +}; + +// URLResult ------------------------------------------------------------------- + +class URLResult : public URLRow { + public: + URLResult() {} + URLResult(const GURL& url, base::Time visit_time) + : URLRow(url), + visit_time_(visit_time) { + } + // Constructor that create a URLResult from the specified URL and title match + // positions from title_matches. + URLResult(const GURL& url, const Snippet::MatchPositions& title_matches) + : URLRow(url) { + title_match_positions_ = title_matches; + } + + base::Time visit_time() const { return visit_time_; } + void set_visit_time(base::Time visit_time) { visit_time_ = visit_time; } + + const Snippet& snippet() const { return snippet_; } + + // If this is a title match, title_match_positions contains an entry for + // every word in the title that matched one of the query parameters. Each + // entry contains the start and end of the match. + const Snippet::MatchPositions& title_match_positions() const { + return title_match_positions_; + } + + virtual void Swap(URLResult* other); + + private: + friend class HistoryBackend; + + // The time that this result corresponds to. + base::Time visit_time_; + + // These values are typically set by HistoryBackend. + Snippet snippet_; + Snippet::MatchPositions title_match_positions_; + + // We support the implicit copy constructor and operator=. +}; + +// QueryResults ---------------------------------------------------------------- + +// Encapsulates the results of a history query. It supports an ordered list of +// URLResult objects, plus an efficient way of looking up the index of each time +// a given URL appears in those results. +class QueryResults { + public: + typedef std::vector<URLResult*> URLResultVector; + + QueryResults(); + ~QueryResults(); + + // Indicates the first time that the query includes results for (queries are + // clipped at the beginning, so it will always include to the end of the time + // queried). + // + // If the number of results was clipped as a result of the max count, this + // will be the time of the first query returned. If there were fewer results + // than we were allowed to return, this represents the first date considered + // in the query (this will be before the first result if there was time + // queried with no results). + // + // TODO(brettw): bug 1203054: This field is not currently set properly! Do + // not use until the bug is fixed. + base::Time first_time_searched() const { return first_time_searched_; } + void set_first_time_searched(base::Time t) { first_time_searched_ = t; } + // Note: If you need end_time_searched, it can be added. + + void set_reached_beginning(bool reached) { reached_beginning_ = reached; } + bool reached_beginning() { return reached_beginning_; } + + size_t size() const { return results_.size(); } + bool empty() const { return results_.empty(); } + + URLResult& operator[](size_t i) { return *results_[i]; } + const URLResult& operator[](size_t i) const { return *results_[i]; } + + URLResultVector::const_iterator begin() const { return results_.begin(); } + URLResultVector::const_iterator end() const { return results_.end(); } + URLResultVector::const_reverse_iterator rbegin() const { + return results_.rbegin(); + } + URLResultVector::const_reverse_iterator rend() const { + return results_.rend(); + } + + // Returns a pointer to the beginning of an array of all matching indices + // for entries with the given URL. The array will be |*num_matches| long. + // |num_matches| can be NULL if the caller is not interested in the number of + // results (commonly it will only be interested in the first one and can test + // the pointer for NULL). + // + // When there is no match, it will return NULL and |*num_matches| will be 0. + const size_t* MatchesForURL(const GURL& url, size_t* num_matches) const; + + // Swaps the current result with another. This allows ownership to be + // efficiently transferred without copying. + void Swap(QueryResults* other); + + // Adds the given result to the map, using swap() on the members to avoid + // copying (there are a lot of strings and vectors). This means the parameter + // object will be cleared after this call. + void AppendURLBySwapping(URLResult* result); + + // Appends a new result set to the other. The |other| results will be + // destroyed because the pointer ownership will just be transferred. When + // |remove_dupes| is set, each URL that appears in this array will be removed + // from the |other| array before appending. + void AppendResultsBySwapping(QueryResults* other, bool remove_dupes); + + // Removes all instances of the given URL from the result set. + void DeleteURL(const GURL& url); + + // Deletes the given range of items in the result set. + void DeleteRange(size_t begin, size_t end); + + private: + // Maps the given URL to a list of indices into results_ which identify each + // time an entry with that URL appears. Normally, each URL will have one or + // very few indices after it, so we optimize this to use statically allocated + // memory when possible. + typedef std::map<GURL, StackVector<size_t, 4> > URLToResultIndices; + + // Inserts an entry into the |url_to_results_| map saying that the given URL + // is at the given index in the results_. + void AddURLUsageAtIndex(const GURL& url, size_t index); + + // Adds |delta| to each index in url_to_results_ in the range [begin,end] + // (this is inclusive). This is used when inserting or deleting. + void AdjustResultMap(size_t begin, size_t end, ptrdiff_t delta); + + base::Time first_time_searched_; + + // Whether the query reaches the beginning of the database. + bool reached_beginning_; + + // The ordered list of results. The pointers inside this are owned by this + // QueryResults object. + URLResultVector results_; + + // Maps URLs to entries in results_. + URLToResultIndices url_to_results_; + + DISALLOW_COPY_AND_ASSIGN(QueryResults); +}; + +// QueryOptions ---------------------------------------------------------------- + +struct QueryOptions { + QueryOptions() : max_count(0) {} + + // The time range to search for matches in. + // + // This will match only the one recent visit of a URL. For text search + // queries, if the URL was visited in the given time period, but has also been + // visited more recently than that, it will not be returned. When the text + // query is empty, this will return the most recent visit within the time + // range. + // + // As a special case, if both times are is_null(), then the entire database + // will be searched. However, if you set one, you must set the other. + // + // The beginning is inclusive and the ending is exclusive. + base::Time begin_time; + base::Time end_time; + + // Sets the query time to the last |days_ago| days to the present time. + void SetRecentDayRange(int days_ago) { + end_time = base::Time::Now(); + begin_time = end_time - base::TimeDelta::FromDays(days_ago); + } + + // The maximum number of results to return. The results will be sorted with + // the most recent first, so older results may not be returned if there is not + // enough room. When 0, this will return everything (the default). + int max_count; +}; + +// KeywordSearchTermVisit ----------------------------------------------------- + +// KeywordSearchTermVisit is returned from GetMostRecentKeywordSearchTerms. It +// gives the time and search term of the keyword visit. +struct KeywordSearchTermVisit { + // The time of the visit. + base::Time time; + + // The search term that was used. + string16 term; +}; + +// MostVisitedURL -------------------------------------------------------------- + +// Holds the per-URL information of the most visited query. +struct MostVisitedURL { + GURL url; + GURL favicon_url; + string16 title; + + RedirectList redirects; + + bool operator==(const MostVisitedURL& other) { + return url == other.url; + } +}; + +typedef std::vector<MostVisitedURL> MostVisitedURLList; + +} // history + +#endif // CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_ diff --git a/chrome/browser/history/history_types_unittest.cc b/chrome/browser/history/history_types_unittest.cc new file mode 100644 index 0000000..5e14de5 --- /dev/null +++ b/chrome/browser/history/history_types_unittest.cc @@ -0,0 +1,171 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/history_types.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::Time; + +namespace history { + +namespace { + +// Validates the consistency of the given history result. We just make sure +// that the URL rows match the indices structure. The unit tests themselves +// test the index structure to verify things are in the right order, so we +// don't need to. +void CheckHistoryResultConsistency(const QueryResults& result) { + for (size_t i = 0; i < result.size(); i++) { + size_t match_count; + const size_t* matches = result.MatchesForURL(result[i].url(), &match_count); + + bool found = false; + for (size_t match = 0; match < match_count; match++) { + if (matches[match] == i) { + found = true; + break; + } + } + + EXPECT_TRUE(found) << "The URL had no index referring to it."; + } +} + +static const char kURL1[] = "http://www.google.com/"; +static const char kURL2[] = "http://news.google.com/"; +static const char kURL3[] = "http://images.google.com/"; + +// Adds kURL1 twice and kURL2 once. +void AddSimpleData(QueryResults* results) { + GURL url1(kURL1); + GURL url2(kURL2); + URLResult result1(url1, Time::Now()); + URLResult result2(url1, Time::Now()); + URLResult result3(url2, Time::Now()); + + // The URLResults are invalid after being inserted. + results->AppendURLBySwapping(&result1); + results->AppendURLBySwapping(&result2); + results->AppendURLBySwapping(&result3); + CheckHistoryResultConsistency(*results); +} + +// Adds kURL2 once and kURL3 once. +void AddAlternateData(QueryResults* results) { + GURL url2(kURL2); + GURL url3(kURL3); + URLResult result1(url2, Time::Now()); + URLResult result2(url3, Time::Now()); + + // The URLResults are invalid after being inserted. + results->AppendURLBySwapping(&result1); + results->AppendURLBySwapping(&result2); + CheckHistoryResultConsistency(*results); +} + +} // namespace + +// Tests insertion and deletion by range. +TEST(HistoryQueryResult, DeleteRange) { + GURL url1(kURL1); + GURL url2(kURL2); + QueryResults results; + AddSimpleData(&results); + + // Make sure the first URL is in there twice. The indices can be in either + // order. + size_t match_count; + const size_t* matches = results.MatchesForURL(url1, &match_count); + ASSERT_EQ(2U, match_count); + EXPECT_TRUE((matches[0] == 0 && matches[1] == 1) || + (matches[0] == 1 && matches[1] == 0)); + + // Check the second one. + matches = results.MatchesForURL(url2, &match_count); + ASSERT_EQ(1U, match_count); + EXPECT_TRUE(matches[0] == 2); + + // Delete the first instance of the first URL. + results.DeleteRange(0, 0); + CheckHistoryResultConsistency(results); + + // Check the two URLs. + matches = results.MatchesForURL(url1, &match_count); + ASSERT_EQ(1U, match_count); + EXPECT_TRUE(matches[0] == 0); + matches = results.MatchesForURL(url2, &match_count); + ASSERT_EQ(1U, match_count); + EXPECT_TRUE(matches[0] == 1); + + // Now delete everything and make sure it's deleted. + results.DeleteRange(0, 1); + EXPECT_EQ(0U, results.size()); + EXPECT_FALSE(results.MatchesForURL(url1, NULL)); + EXPECT_FALSE(results.MatchesForURL(url2, NULL)); +} + +// Tests insertion and deletion by URL. +TEST(HistoryQueryResult, ResultDeleteURL) { + GURL url1(kURL1); + GURL url2(kURL2); + QueryResults results; + AddSimpleData(&results); + + // Delete the first URL. + results.DeleteURL(url1); + CheckHistoryResultConsistency(results); + EXPECT_EQ(1U, results.size()); + + // The first one should be gone, and the second one should be at [0]. + size_t match_count; + EXPECT_FALSE(results.MatchesForURL(url1, NULL)); + const size_t* matches = results.MatchesForURL(url2, &match_count); + ASSERT_EQ(1U, match_count); + EXPECT_TRUE(matches[0] == 0); + + // Delete the second URL, there should be nothing left. + results.DeleteURL(url2); + EXPECT_EQ(0U, results.size()); + EXPECT_FALSE(results.MatchesForURL(url2, NULL)); +} + +TEST(HistoryQueryResult, AppendResults) { + GURL url1(kURL1); + GURL url2(kURL2); + GURL url3(kURL3); + + // This is the base. + QueryResults results; + AddSimpleData(&results); + + // Now create the appendee. + QueryResults appendee; + AddAlternateData(&appendee); + + results.AppendResultsBySwapping(&appendee, true); + CheckHistoryResultConsistency(results); + + // There should be 3 results, the second one of the appendee should be + // deleted because it was already in the first one and we said remove dupes. + ASSERT_EQ(4U, results.size()); + + // The first URL should be unchanged in the first two spots. + size_t match_count; + const size_t* matches = results.MatchesForURL(url1, &match_count); + ASSERT_EQ(2U, match_count); + EXPECT_TRUE((matches[0] == 0 && matches[1] == 1) || + (matches[0] == 1 && matches[1] == 0)); + + // The second URL should be there once after that + matches = results.MatchesForURL(url2, &match_count); + ASSERT_EQ(1U, match_count); + EXPECT_TRUE(matches[0] == 2); + + // The third one should be after that. + matches = results.MatchesForURL(url3, &match_count); + ASSERT_EQ(1U, match_count); + EXPECT_TRUE(matches[0] == 3); +} + +} // namespace diff --git a/chrome/browser/history/history_unittest.cc b/chrome/browser/history/history_unittest.cc new file mode 100644 index 0000000..c8db05a --- /dev/null +++ b/chrome/browser/history/history_unittest.cc @@ -0,0 +1,959 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// History unit tests come in two flavors: +// +// 1. The more complicated style is that the unit test creates a full history +// service. This spawns a background thread for the history backend, and +// all communication is asynchronous. This is useful for testing more +// complicated things or end-to-end behavior. +// +// 2. The simpler style is to create a history backend on this thread and +// access it directly without a HistoryService object. This is much simpler +// because communication is synchronous. Generally, sets should go through +// the history backend (since there is a lot of logic) but gets can come +// directly from the HistoryDatabase. This is because the backend generally +// has no logic in the getter except threading stuff, which we don't want +// to run. + +#include <time.h> +#include <algorithm> + +#include "app/sql/connection.h" +#include "app/sql/statement.h" +#include "base/basictypes.h" +#include "base/callback.h" +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/message_loop.h" +#include "base/path_service.h" +#include "base/scoped_vector.h" +#include "base/string_util.h" +#include "base/task.h" +#include "chrome/browser/browser_process.h" +#include "chrome/browser/download/download_item.h" +#include "chrome/browser/history/history.h" +#include "chrome/browser/history/history_backend.h" +#include "chrome/browser/history/history_database.h" +#include "chrome/browser/history/history_notifications.h" +#include "chrome/browser/history/in_memory_database.h" +#include "chrome/browser/history/in_memory_history_backend.h" +#include "chrome/browser/history/page_usage_data.h" +#include "chrome/common/chrome_paths.h" +#include "chrome/common/notification_service.h" +#include "chrome/common/thumbnail_score.h" +#include "chrome/tools/profiles/thumbnail-inl.h" +#include "gfx/codec/jpeg_codec.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/skia/include/core/SkBitmap.h" + +using base::Time; +using base::TimeDelta; + +namespace history { +class HistoryTest; +} + +// Specialize RunnableMethodTraits for HistoryTest so we can create callbacks. +// None of these callbacks can outlast the test, so there is not need to retain +// the HistoryTest object. +DISABLE_RUNNABLE_METHOD_REFCOUNT(history::HistoryTest); + +namespace history { + +namespace { + +// Compares the two data values. Used for comparing thumbnail data. +bool DataEqual(const unsigned char* reference, size_t reference_len, + const std::vector<unsigned char>& data) { + if (reference_len != data.size()) + return false; + for (size_t i = 0; i < reference_len; i++) { + if (data[i] != reference[i]) + return false; + } + return true; +} + +// The tracker uses RenderProcessHost pointers for scoping but never +// dereferences them. We use ints because it's easier. This function converts +// between the two. +static void* MakeFakeHost(int id) { + void* host = 0; + memcpy(&host, &id, sizeof(id)); + return host; +} + +} // namespace + +// Delegate class for when we create a backend without a HistoryService. +class BackendDelegate : public HistoryBackend::Delegate { + public: + explicit BackendDelegate(HistoryTest* history_test) + : history_test_(history_test) { + } + + virtual void NotifyProfileError(int message_id); + virtual void SetInMemoryBackend(InMemoryHistoryBackend* backend); + virtual void BroadcastNotifications(NotificationType type, + HistoryDetails* details); + virtual void DBLoaded() {} + virtual void StartTopSitesMigration() {} + private: + HistoryTest* history_test_; +}; + +// This must be outside the anonymous namespace for the friend statement in +// HistoryBackend to work. +class HistoryTest : public testing::Test { + public: + HistoryTest() + : history_service_(NULL), + got_thumbnail_callback_(false), + redirect_query_success_(false), + query_url_success_(false), + db_(NULL) { + } + ~HistoryTest() { + } + + // Thumbnail callback: we save the data and exit the message loop so the + // unit test can read the data + void OnThumbnailDataAvailable( + HistoryService::Handle request_handle, + scoped_refptr<RefCountedBytes> jpeg_data) { + got_thumbnail_callback_ = true; + if (jpeg_data.get()) { + std::copy(jpeg_data->data.begin(), jpeg_data->data.end(), + std::back_inserter(thumbnail_data_)); + } + MessageLoop::current()->Quit(); + } + + // Creates the HistoryBackend and HistoryDatabase on the current thread, + // assigning the values to backend_ and db_. + void CreateBackendAndDatabase() { + backend_ = + new HistoryBackend(history_dir_, new BackendDelegate(this), NULL); + backend_->Init(false); + db_ = backend_->db_.get(); + DCHECK(in_mem_backend_.get()) << "Mem backend should have been set by " + "HistoryBackend::Init"; + } + + void OnSegmentUsageAvailable(CancelableRequestProvider::Handle handle, + std::vector<PageUsageData*>* data) { + page_usage_data_->swap(*data); + MessageLoop::current()->Quit(); + } + + void OnDeleteURLsDone(CancelableRequestProvider::Handle handle) { + MessageLoop::current()->Quit(); + } + + void OnMostVisitedURLsAvailable(CancelableRequestProvider::Handle handle, + MostVisitedURLList url_list) { + most_visited_urls_.swap(url_list); + MessageLoop::current()->Quit(); + } + + protected: + friend class BackendDelegate; + + // testing::Test + virtual void SetUp() { + FilePath temp_dir; + PathService::Get(base::DIR_TEMP, &temp_dir); + history_dir_ = temp_dir.AppendASCII("HistoryTest"); + file_util::Delete(history_dir_, true); + file_util::CreateDirectory(history_dir_); + } + + void DeleteBackend() { + if (backend_) { + backend_->Closing(); + backend_ = NULL; + } + } + + virtual void TearDown() { + DeleteBackend(); + + if (history_service_) + CleanupHistoryService(); + + // Try to clean up the database file. + file_util::Delete(history_dir_, true); + + // Make sure we don't have any event pending that could disrupt the next + // test. + MessageLoop::current()->PostTask(FROM_HERE, new MessageLoop::QuitTask); + MessageLoop::current()->Run(); + } + + void CleanupHistoryService() { + DCHECK(history_service_.get()); + + history_service_->NotifyRenderProcessHostDestruction(0); + history_service_->SetOnBackendDestroyTask(new MessageLoop::QuitTask); + history_service_->Cleanup(); + history_service_ = NULL; + + // Wait for the backend class to terminate before deleting the files and + // moving to the next test. Note: if this never terminates, somebody is + // probably leaking a reference to the history backend, so it never calls + // our destroy task. + MessageLoop::current()->Run(); + } + + int64 AddDownload(int32 state, const Time& time) { + DownloadCreateInfo download(FilePath(FILE_PATH_LITERAL("foo-path")), + GURL("foo-url"), time, 0, 512, state, 0); + return db_->CreateDownload(download); + } + + // Fills the query_url_row_ and query_url_visits_ structures with the + // information about the given URL and returns true. If the URL was not + // found, this will return false and those structures will not be changed. + bool QueryURL(HistoryService* history, const GURL& url) { + history->QueryURL(url, true, &consumer_, + NewCallback(this, &HistoryTest::SaveURLAndQuit)); + MessageLoop::current()->Run(); // Will be exited in SaveURLAndQuit. + return query_url_success_; + } + + // Callback for HistoryService::QueryURL. + void SaveURLAndQuit(HistoryService::Handle handle, + bool success, + const URLRow* url_row, + VisitVector* visit_vector) { + query_url_success_ = success; + if (query_url_success_) { + query_url_row_ = *url_row; + query_url_visits_.swap(*visit_vector); + } else { + query_url_row_ = URLRow(); + query_url_visits_.clear(); + } + MessageLoop::current()->Quit(); + } + + // Fills in saved_redirects_ with the redirect information for the given URL, + // returning true on success. False means the URL was not found. + bool QueryRedirectsFrom(HistoryService* history, const GURL& url) { + history->QueryRedirectsFrom(url, &consumer_, + NewCallback(this, &HistoryTest::OnRedirectQueryComplete)); + MessageLoop::current()->Run(); // Will be exited in *QueryComplete. + return redirect_query_success_; + } + + // Callback for QueryRedirects. + void OnRedirectQueryComplete(HistoryService::Handle handle, + GURL url, + bool success, + history::RedirectList* redirects) { + redirect_query_success_ = success; + if (redirect_query_success_) + saved_redirects_.swap(*redirects); + else + saved_redirects_.clear(); + MessageLoop::current()->Quit(); + } + + MessageLoopForUI message_loop_; + + // PageUsageData vector to test segments. + ScopedVector<PageUsageData> page_usage_data_; + + MostVisitedURLList most_visited_urls_; + + // When non-NULL, this will be deleted on tear down and we will block until + // the backend thread has completed. This allows tests for the history + // service to use this feature, but other tests to ignore this. + scoped_refptr<HistoryService> history_service_; + + // names of the database files + FilePath history_dir_; + + // Set by the thumbnail callback when we get data, you should be sure to + // clear this before issuing a thumbnail request. + bool got_thumbnail_callback_; + std::vector<unsigned char> thumbnail_data_; + + // Set by the redirect callback when we get data. You should be sure to + // clear this before issuing a redirect request. + history::RedirectList saved_redirects_; + bool redirect_query_success_; + + // For history requests. + CancelableRequestConsumer consumer_; + + // For saving URL info after a call to QueryURL + bool query_url_success_; + URLRow query_url_row_; + VisitVector query_url_visits_; + + // Created via CreateBackendAndDatabase. + scoped_refptr<HistoryBackend> backend_; + scoped_ptr<InMemoryHistoryBackend> in_mem_backend_; + HistoryDatabase* db_; // Cached reference to the backend's database. +}; + +void BackendDelegate::NotifyProfileError(int message_id) { +} + +void BackendDelegate::SetInMemoryBackend(InMemoryHistoryBackend* backend) { + // Save the in-memory backend to the history test object, this happens + // synchronously, so we don't have to do anything fancy. + history_test_->in_mem_backend_.reset(backend); +} + +void BackendDelegate::BroadcastNotifications(NotificationType type, + HistoryDetails* details) { + // Currently, just send the notifications directly to the in-memory database. + // We may want do do something more fancy in the future. + Details<HistoryDetails> det(details); + history_test_->in_mem_backend_->Observe(type, + Source<HistoryTest>(NULL), det); + + // The backend passes ownership of the details pointer to us. + delete details; +} + +TEST_F(HistoryTest, ClearBrowsingData_Downloads) { + CreateBackendAndDatabase(); + + Time now = Time::Now(); + TimeDelta one_day = TimeDelta::FromDays(1); + Time month_ago = now - TimeDelta::FromDays(30); + + // Initially there should be nothing in the downloads database. + std::vector<DownloadCreateInfo> downloads; + db_->QueryDownloads(&downloads); + EXPECT_EQ(0U, downloads.size()); + + // Keep track of these as we need to update them later during the test. + DownloadID in_progress, removing; + + // Create one with a 0 time. + EXPECT_NE(0, AddDownload(DownloadItem::COMPLETE, Time())); + // Create one for now and +/- 1 day. + EXPECT_NE(0, AddDownload(DownloadItem::COMPLETE, now - one_day)); + EXPECT_NE(0, AddDownload(DownloadItem::COMPLETE, now)); + EXPECT_NE(0, AddDownload(DownloadItem::COMPLETE, now + one_day)); + // Try the other three states. + EXPECT_NE(0, AddDownload(DownloadItem::COMPLETE, month_ago)); + EXPECT_NE(0, in_progress = AddDownload(DownloadItem::IN_PROGRESS, month_ago)); + EXPECT_NE(0, AddDownload(DownloadItem::CANCELLED, month_ago)); + EXPECT_NE(0, removing = AddDownload(DownloadItem::REMOVING, month_ago)); + + // Test to see if inserts worked. + db_->QueryDownloads(&downloads); + EXPECT_EQ(8U, downloads.size()); + + // Try removing from current timestamp. This should delete the one in the + // future and one very recent one. + db_->RemoveDownloadsBetween(now, Time()); + db_->QueryDownloads(&downloads); + EXPECT_EQ(6U, downloads.size()); + + // Try removing from two months ago. This should not delete items that are + // 'in progress' or in 'removing' state. + db_->RemoveDownloadsBetween(now - TimeDelta::FromDays(60), Time()); + db_->QueryDownloads(&downloads); + EXPECT_EQ(3U, downloads.size()); + + // Download manager converts to TimeT, which is lossy, so we do the same + // for comparison. + Time month_ago_lossy = Time::FromTimeT(month_ago.ToTimeT()); + + // Make sure the right values remain. + EXPECT_EQ(DownloadItem::COMPLETE, downloads[0].state); + EXPECT_EQ(0, downloads[0].start_time.ToInternalValue()); + EXPECT_EQ(DownloadItem::IN_PROGRESS, downloads[1].state); + EXPECT_EQ(month_ago_lossy.ToInternalValue(), + downloads[1].start_time.ToInternalValue()); + EXPECT_EQ(DownloadItem::REMOVING, downloads[2].state); + EXPECT_EQ(month_ago_lossy.ToInternalValue(), + downloads[2].start_time.ToInternalValue()); + + // Change state so we can delete the downloads. + EXPECT_TRUE(db_->UpdateDownload(512, DownloadItem::COMPLETE, in_progress)); + EXPECT_TRUE(db_->UpdateDownload(512, DownloadItem::CANCELLED, removing)); + + // Try removing from Time=0. This should delete all. + db_->RemoveDownloadsBetween(Time(), Time()); + db_->QueryDownloads(&downloads); + EXPECT_EQ(0U, downloads.size()); + + // Check removal of downloads stuck in IN_PROGRESS state. + EXPECT_NE(0, AddDownload(DownloadItem::COMPLETE, month_ago)); + EXPECT_NE(0, AddDownload(DownloadItem::IN_PROGRESS, month_ago)); + db_->QueryDownloads(&downloads); + EXPECT_EQ(2U, downloads.size()); + db_->RemoveDownloadsBetween(Time(), Time()); + db_->QueryDownloads(&downloads); + // IN_PROGRESS download should remain. It it indicated as "Canceled" + EXPECT_EQ(1U, downloads.size()); + db_->CleanUpInProgressEntries(); + db_->QueryDownloads(&downloads); + EXPECT_EQ(1U, downloads.size()); + db_->RemoveDownloadsBetween(Time(), Time()); + db_->QueryDownloads(&downloads); + EXPECT_EQ(0U, downloads.size()); +} + +TEST_F(HistoryTest, AddPage) { + scoped_refptr<HistoryService> history(new HistoryService); + history_service_ = history; + ASSERT_TRUE(history->Init(history_dir_, NULL)); + + // Add the page once from a child frame. + const GURL test_url("http://www.google.com/"); + history->AddPage(test_url, NULL, 0, GURL(), + PageTransition::MANUAL_SUBFRAME, + history::RedirectList(), false); + EXPECT_TRUE(QueryURL(history, test_url)); + EXPECT_EQ(1, query_url_row_.visit_count()); + EXPECT_EQ(0, query_url_row_.typed_count()); + EXPECT_TRUE(query_url_row_.hidden()); // Hidden because of child frame. + + // Add the page once from the main frame (should unhide it). + history->AddPage(test_url, NULL, 0, GURL(), PageTransition::LINK, + history::RedirectList(), false); + EXPECT_TRUE(QueryURL(history, test_url)); + EXPECT_EQ(2, query_url_row_.visit_count()); // Added twice. + EXPECT_EQ(0, query_url_row_.typed_count()); // Never typed. + EXPECT_FALSE(query_url_row_.hidden()); // Because loaded in main frame. +} + +TEST_F(HistoryTest, AddPageSameTimes) { + scoped_refptr<HistoryService> history(new HistoryService); + history_service_ = history; + ASSERT_TRUE(history->Init(history_dir_, NULL)); + + Time now = Time::Now(); + const GURL test_urls[] = { + GURL("http://timer.first.page/"), + GURL("http://timer.second.page/"), + GURL("http://timer.third.page/"), + }; + + // Make sure that two pages added at the same time with no intervening + // additions have different timestamps. + history->AddPage(test_urls[0], now, NULL, 0, GURL(), + PageTransition::LINK, + history::RedirectList(), false); + EXPECT_TRUE(QueryURL(history, test_urls[0])); + EXPECT_EQ(1, query_url_row_.visit_count()); + EXPECT_TRUE(now == query_url_row_.last_visit()); // gtest doesn't like Time + + history->AddPage(test_urls[1], now, NULL, 0, GURL(), + PageTransition::LINK, + history::RedirectList(), false); + EXPECT_TRUE(QueryURL(history, test_urls[1])); + EXPECT_EQ(1, query_url_row_.visit_count()); + EXPECT_TRUE(now + TimeDelta::FromMicroseconds(1) == + query_url_row_.last_visit()); + + // Make sure the next page, at a different time, is also correct. + history->AddPage(test_urls[2], now + TimeDelta::FromMinutes(1), + NULL, 0, GURL(), + PageTransition::LINK, + history::RedirectList(), false); + EXPECT_TRUE(QueryURL(history, test_urls[2])); + EXPECT_EQ(1, query_url_row_.visit_count()); + EXPECT_TRUE(now + TimeDelta::FromMinutes(1) == + query_url_row_.last_visit()); +} + +TEST_F(HistoryTest, AddRedirect) { + scoped_refptr<HistoryService> history(new HistoryService); + history_service_ = history; + ASSERT_TRUE(history->Init(history_dir_, NULL)); + + const char* first_sequence[] = { + "http://first.page/", + "http://second.page/"}; + int first_count = arraysize(first_sequence); + history::RedirectList first_redirects; + for (int i = 0; i < first_count; i++) + first_redirects.push_back(GURL(first_sequence[i])); + + // Add the sequence of pages as a server with no referrer. Note that we need + // to have a non-NULL page ID scope. + history->AddPage(first_redirects.back(), MakeFakeHost(1), 0, GURL(), + PageTransition::LINK, first_redirects, true); + + // The first page should be added once with a link visit type (because we set + // LINK when we added the original URL, and a referrer of nowhere (0). + EXPECT_TRUE(QueryURL(history, first_redirects[0])); + EXPECT_EQ(1, query_url_row_.visit_count()); + ASSERT_EQ(1U, query_url_visits_.size()); + int64 first_visit = query_url_visits_[0].visit_id; + EXPECT_EQ(PageTransition::LINK | + PageTransition::CHAIN_START, query_url_visits_[0].transition); + EXPECT_EQ(0, query_url_visits_[0].referring_visit); // No referrer. + + // The second page should be a server redirect type with a referrer of the + // first page. + EXPECT_TRUE(QueryURL(history, first_redirects[1])); + EXPECT_EQ(1, query_url_row_.visit_count()); + ASSERT_EQ(1U, query_url_visits_.size()); + int64 second_visit = query_url_visits_[0].visit_id; + EXPECT_EQ(PageTransition::SERVER_REDIRECT | + PageTransition::CHAIN_END, query_url_visits_[0].transition); + EXPECT_EQ(first_visit, query_url_visits_[0].referring_visit); + + // Check that the redirect finding function successfully reports it. + saved_redirects_.clear(); + QueryRedirectsFrom(history, first_redirects[0]); + ASSERT_EQ(1U, saved_redirects_.size()); + EXPECT_EQ(first_redirects[1], saved_redirects_[0]); + + // Now add a client redirect from that second visit to a third, client + // redirects are tracked by the RenderView prior to updating history, + // so we pass in a CLIENT_REDIRECT qualifier to mock that behavior. + history::RedirectList second_redirects; + second_redirects.push_back(first_redirects[1]); + second_redirects.push_back(GURL("http://last.page/")); + history->AddPage(second_redirects[1], MakeFakeHost(1), 1, + second_redirects[0], + static_cast<PageTransition::Type>(PageTransition::LINK | + PageTransition::CLIENT_REDIRECT), + second_redirects, true); + + // The last page (source of the client redirect) should NOT have an + // additional visit added, because it was a client redirect (normally it + // would). We should only have 1 left over from the first sequence. + EXPECT_TRUE(QueryURL(history, second_redirects[0])); + EXPECT_EQ(1, query_url_row_.visit_count()); + + // The final page should be set as a client redirect from the previous visit. + EXPECT_TRUE(QueryURL(history, second_redirects[1])); + EXPECT_EQ(1, query_url_row_.visit_count()); + ASSERT_EQ(1U, query_url_visits_.size()); + EXPECT_EQ(PageTransition::CLIENT_REDIRECT | + PageTransition::CHAIN_END, query_url_visits_[0].transition); + EXPECT_EQ(second_visit, query_url_visits_[0].referring_visit); +} + +TEST_F(HistoryTest, Typed) { + scoped_refptr<HistoryService> history(new HistoryService); + history_service_ = history; + ASSERT_TRUE(history->Init(history_dir_, NULL)); + + // Add the page once as typed. + const GURL test_url("http://www.google.com/"); + history->AddPage(test_url, NULL, 0, GURL(), PageTransition::TYPED, + history::RedirectList(), false); + EXPECT_TRUE(QueryURL(history, test_url)); + + // We should have the same typed & visit count. + EXPECT_EQ(1, query_url_row_.visit_count()); + EXPECT_EQ(1, query_url_row_.typed_count()); + + // Add the page again not typed. + history->AddPage(test_url, NULL, 0, GURL(), PageTransition::LINK, + history::RedirectList(), false); + EXPECT_TRUE(QueryURL(history, test_url)); + + // The second time should not have updated the typed count. + EXPECT_EQ(2, query_url_row_.visit_count()); + EXPECT_EQ(1, query_url_row_.typed_count()); + + // Add the page again as a generated URL. + history->AddPage(test_url, NULL, 0, GURL(), + PageTransition::GENERATED, history::RedirectList(), + false); + EXPECT_TRUE(QueryURL(history, test_url)); + + // This should have worked like a link click. + EXPECT_EQ(3, query_url_row_.visit_count()); + EXPECT_EQ(1, query_url_row_.typed_count()); + + // Add the page again as a reload. + history->AddPage(test_url, NULL, 0, GURL(), + PageTransition::RELOAD, history::RedirectList(), + false); + EXPECT_TRUE(QueryURL(history, test_url)); + + // This should not have incremented any visit counts. + EXPECT_EQ(3, query_url_row_.visit_count()); + EXPECT_EQ(1, query_url_row_.typed_count()); +} + +TEST_F(HistoryTest, SetTitle) { + scoped_refptr<HistoryService> history(new HistoryService); + history_service_ = history; + ASSERT_TRUE(history->Init(history_dir_, NULL)); + + // Add a URL. + const GURL existing_url("http://www.google.com/"); + history->AddPage(existing_url); + + // Set some title. + const string16 existing_title = UTF8ToUTF16("Google"); + history->SetPageTitle(existing_url, existing_title); + + // Make sure the title got set. + EXPECT_TRUE(QueryURL(history, existing_url)); + EXPECT_EQ(existing_title, query_url_row_.title()); + + // set a title on a nonexistent page + const GURL nonexistent_url("http://news.google.com/"); + const string16 nonexistent_title = UTF8ToUTF16("Google News"); + history->SetPageTitle(nonexistent_url, nonexistent_title); + + // Make sure nothing got written. + EXPECT_FALSE(QueryURL(history, nonexistent_url)); + EXPECT_EQ(string16(), query_url_row_.title()); + + // TODO(brettw) this should also test redirects, which get the title of the + // destination page. +} + +TEST_F(HistoryTest, Segments) { + scoped_refptr<HistoryService> history(new HistoryService); + history_service_ = history; + + ASSERT_TRUE(history->Init(history_dir_, NULL)); + + static const void* scope = static_cast<void*>(this); + + // Add a URL. + const GURL existing_url("http://www.google.com/"); + history->AddPage(existing_url, scope, 0, GURL(), + PageTransition::TYPED, history::RedirectList(), + false); + + // Make sure a segment was created. + history->QuerySegmentUsageSince( + &consumer_, Time::Now() - TimeDelta::FromDays(1), 10, + NewCallback(static_cast<HistoryTest*>(this), + &HistoryTest::OnSegmentUsageAvailable)); + + // Wait for processing. + MessageLoop::current()->Run(); + + ASSERT_EQ(1U, page_usage_data_->size()); + EXPECT_TRUE(page_usage_data_[0]->GetURL() == existing_url); + EXPECT_DOUBLE_EQ(3.0, page_usage_data_[0]->GetScore()); + + // Add a URL which doesn't create a segment. + const GURL link_url("http://yahoo.com/"); + history->AddPage(link_url, scope, 0, GURL(), + PageTransition::LINK, history::RedirectList(), + false); + + // Query again + history->QuerySegmentUsageSince( + &consumer_, Time::Now() - TimeDelta::FromDays(1), 10, + NewCallback(static_cast<HistoryTest*>(this), + &HistoryTest::OnSegmentUsageAvailable)); + + // Wait for processing. + MessageLoop::current()->Run(); + + // Make sure we still have one segment. + ASSERT_EQ(1U, page_usage_data_->size()); + EXPECT_TRUE(page_usage_data_[0]->GetURL() == existing_url); + + // Add a page linked from existing_url. + history->AddPage(GURL("http://www.google.com/foo"), scope, 3, existing_url, + PageTransition::LINK, history::RedirectList(), + false); + + // Query again + history->QuerySegmentUsageSince( + &consumer_, Time::Now() - TimeDelta::FromDays(1), 10, + NewCallback(static_cast<HistoryTest*>(this), + &HistoryTest::OnSegmentUsageAvailable)); + + // Wait for processing. + MessageLoop::current()->Run(); + + // Make sure we still have one segment. + ASSERT_EQ(1U, page_usage_data_->size()); + EXPECT_TRUE(page_usage_data_[0]->GetURL() == existing_url); + + // However, the score should have increased. + EXPECT_GT(page_usage_data_[0]->GetScore(), 5.0); +} + +// This just tests history system -> thumbnail database integration, the actual +// thumbnail tests are in its own file. +TEST_F(HistoryTest, Thumbnails) { + scoped_refptr<HistoryService> history(new HistoryService); + history_service_ = history; + ASSERT_TRUE(history->Init(history_dir_, NULL)); + + scoped_ptr<SkBitmap> thumbnail( + gfx::JPEGCodec::Decode(kGoogleThumbnail, sizeof(kGoogleThumbnail))); + static const double boringness = 0.25; + + const GURL url("http://www.google.com/thumbnail_test/"); + history->AddPage(url); // Must be visited before adding a thumbnail. + history->SetPageThumbnail(url, *thumbnail, + ThumbnailScore(boringness, true, true)); + + // Make sure we get the correct thumbnail data. + EXPECT_TRUE(history->GetPageThumbnail(url, &consumer_, + NewCallback(static_cast<HistoryTest*>(this), + &HistoryTest::OnThumbnailDataAvailable))); + thumbnail_data_.clear(); + MessageLoop::current()->Run(); + // Make sure we got a valid JPEG back. This isn't equivalent to + // being correct, but when we're roundtripping through JPEG + // compression and we don't have a similarity measure. + EXPECT_TRUE(thumbnail_data_.size()); + scoped_ptr<SkBitmap> decoded_thumbnail( + gfx::JPEGCodec::Decode(&thumbnail_data_[0], thumbnail_data_.size())); + EXPECT_TRUE(decoded_thumbnail.get()); + + // Request a nonexistent thumbnail and make sure we get + // a callback and no data. + EXPECT_TRUE(history->GetPageThumbnail(GURL("http://asdfasdf.com/"), + &consumer_, + NewCallback(static_cast<HistoryTest*>(this), + &HistoryTest::OnThumbnailDataAvailable))); + thumbnail_data_.clear(); + MessageLoop::current()->Run(); + EXPECT_EQ(0U, thumbnail_data_.size()); + + // Request the thumbnail and cancel the request.. + got_thumbnail_callback_ = false; + thumbnail_data_.clear(); + HistoryService::Handle handle = history->GetPageThumbnail(url, &consumer_, + NewCallback(static_cast<HistoryTest*>(this), + &HistoryTest::OnThumbnailDataAvailable)); + EXPECT_TRUE(handle); + + history->CancelRequest(handle); + + // We create a task with a timeout so we can make sure we don't get and + // data in that time. + class QuitMessageLoop : public Task { + public: + virtual void Run() { + MessageLoop::current()->Quit(); + } + }; + MessageLoop::current()->PostDelayedTask(FROM_HERE, new QuitMessageLoop, 2000); + MessageLoop::current()->Run(); + EXPECT_FALSE(got_thumbnail_callback_); +} + +TEST_F(HistoryTest, MostVisitedURLs) { + scoped_refptr<HistoryService> history(new HistoryService); + history_service_ = history; + ASSERT_TRUE(history->Init(history_dir_, NULL)); + + const GURL url0("http://www.google.com/url0/"); + const GURL url1("http://www.google.com/url1/"); + const GURL url2("http://www.google.com/url2/"); + const GURL url3("http://www.google.com/url3/"); + const GURL url4("http://www.google.com/url4/"); + + static const void* scope = static_cast<void*>(this); + + // Add two pages. + history->AddPage(url0, scope, 0, GURL(), + PageTransition::TYPED, history::RedirectList(), + false); + history->AddPage(url1, scope, 0, GURL(), + PageTransition::TYPED, history::RedirectList(), + false); + history->QueryMostVisitedURLs(20, 90, &consumer_, + NewCallback(static_cast<HistoryTest*>(this), + &HistoryTest::OnMostVisitedURLsAvailable)); + MessageLoop::current()->Run(); + + EXPECT_EQ(2U, most_visited_urls_.size()); + EXPECT_EQ(url0, most_visited_urls_[0].url); + EXPECT_EQ(url1, most_visited_urls_[1].url); + + // Add another page. + history->AddPage(url2, scope, 0, GURL(), + PageTransition::TYPED, history::RedirectList(), + false); + history->QueryMostVisitedURLs(20, 90, &consumer_, + NewCallback(static_cast<HistoryTest*>(this), + &HistoryTest::OnMostVisitedURLsAvailable)); + MessageLoop::current()->Run(); + + EXPECT_EQ(3U, most_visited_urls_.size()); + EXPECT_EQ(url0, most_visited_urls_[0].url); + EXPECT_EQ(url1, most_visited_urls_[1].url); + EXPECT_EQ(url2, most_visited_urls_[2].url); + + // Revisit url2, making it the top URL. + history->AddPage(url2, scope, 0, GURL(), + PageTransition::TYPED, history::RedirectList(), + false); + history->QueryMostVisitedURLs(20, 90, &consumer_, + NewCallback(static_cast<HistoryTest*>(this), + &HistoryTest::OnMostVisitedURLsAvailable)); + MessageLoop::current()->Run(); + + EXPECT_EQ(3U, most_visited_urls_.size()); + EXPECT_EQ(url2, most_visited_urls_[0].url); + EXPECT_EQ(url0, most_visited_urls_[1].url); + EXPECT_EQ(url1, most_visited_urls_[2].url); + + // Revisit url1, making it the top URL. + history->AddPage(url1, scope, 0, GURL(), + PageTransition::TYPED, history::RedirectList(), + false); + history->QueryMostVisitedURLs(20, 90, &consumer_, + NewCallback(static_cast<HistoryTest*>(this), + &HistoryTest::OnMostVisitedURLsAvailable)); + MessageLoop::current()->Run(); + + EXPECT_EQ(3U, most_visited_urls_.size()); + EXPECT_EQ(url1, most_visited_urls_[0].url); + EXPECT_EQ(url2, most_visited_urls_[1].url); + EXPECT_EQ(url0, most_visited_urls_[2].url); + + // Redirects + history::RedirectList redirects; + redirects.push_back(url3); + redirects.push_back(url4); + + // Visit url4 using redirects. + history->AddPage(url4, scope, 0, GURL(), + PageTransition::TYPED, redirects, + false); + history->QueryMostVisitedURLs(20, 90, &consumer_, + NewCallback(static_cast<HistoryTest*>(this), + &HistoryTest::OnMostVisitedURLsAvailable)); + MessageLoop::current()->Run(); + + EXPECT_EQ(4U, most_visited_urls_.size()); + EXPECT_EQ(url1, most_visited_urls_[0].url); + EXPECT_EQ(url2, most_visited_urls_[1].url); + EXPECT_EQ(url0, most_visited_urls_[2].url); + EXPECT_EQ(url3, most_visited_urls_[3].url); + EXPECT_EQ(2U, most_visited_urls_[3].redirects.size()); +} + +// The version of the history database should be current in the "typical +// history" example file or it will be imported on startup, throwing off timing +// measurements. +// +// See test/data/profiles/typical_history/README.txt for instructions on +// how to up the version. +TEST(HistoryProfileTest, TypicalProfileVersion) { + FilePath file; + ASSERT_TRUE(PathService::Get(chrome::DIR_TEST_DATA, &file)); + file = file.AppendASCII("profiles"); + file = file.AppendASCII("typical_history"); + file = file.AppendASCII("Default"); + file = file.AppendASCII("History"); + + int cur_version = HistoryDatabase::GetCurrentVersion(); + + sql::Connection db; + ASSERT_TRUE(db.Open(file)); + + { + sql::Statement s(db.GetUniqueStatement( + "SELECT value FROM meta WHERE key = 'version'")); + EXPECT_TRUE(s.Step()); + int file_version = s.ColumnInt(0); + EXPECT_EQ(cur_version, file_version); + } +} + +namespace { + +// Use this dummy value to scope the page IDs we give history. +static const void* kAddArgsScope = reinterpret_cast<void*>(0x12345678); + +// Creates a new HistoryAddPageArgs object for sending to the history database +// with reasonable defaults and the given NULL-terminated URL string. The +// returned object will NOT be add-ref'ed, which is the responsibility of the +// caller. +HistoryAddPageArgs* MakeAddArgs(const GURL& url) { + return new HistoryAddPageArgs(url, + Time::Now(), + kAddArgsScope, + 0, + GURL(), + history::RedirectList(), + PageTransition::TYPED, false); +} + +// Convenience version of the above to convert a char string. +HistoryAddPageArgs* MakeAddArgs(const char* url) { + return MakeAddArgs(GURL(url)); +} + +// A HistoryDBTask implementation. Each time RunOnDBThread is invoked +// invoke_count is increment. When invoked kWantInvokeCount times, true is +// returned from RunOnDBThread which should stop RunOnDBThread from being +// invoked again. When DoneRunOnMainThread is invoked, done_invoked is set to +// true. +class HistoryDBTaskImpl : public HistoryDBTask { + public: + static const int kWantInvokeCount; + + HistoryDBTaskImpl() : invoke_count(0), done_invoked(false) {} + + virtual bool RunOnDBThread(HistoryBackend* backend, HistoryDatabase* db) { + return (++invoke_count == kWantInvokeCount); + } + + virtual void DoneRunOnMainThread() { + done_invoked = true; + MessageLoop::current()->Quit(); + } + + int invoke_count; + bool done_invoked; + + private: + virtual ~HistoryDBTaskImpl() {} + + DISALLOW_COPY_AND_ASSIGN(HistoryDBTaskImpl); +}; + +// static +const int HistoryDBTaskImpl::kWantInvokeCount = 2; + +} // namespace + +TEST_F(HistoryTest, HistoryDBTask) { + CancelableRequestConsumerT<int, 0> request_consumer; + HistoryService* history = new HistoryService(); + ASSERT_TRUE(history->Init(history_dir_, NULL)); + scoped_refptr<HistoryDBTaskImpl> task(new HistoryDBTaskImpl()); + history_service_ = history; + history->ScheduleDBTask(task.get(), &request_consumer); + // Run the message loop. When HistoryDBTaskImpl::DoneRunOnMainThread runs, + // it will stop the message loop. If the test hangs here, it means + // DoneRunOnMainThread isn't being invoked correctly. + MessageLoop::current()->Run(); + CleanupHistoryService(); + // WARNING: history has now been deleted. + history = NULL; + ASSERT_EQ(HistoryDBTaskImpl::kWantInvokeCount, task->invoke_count); + ASSERT_TRUE(task->done_invoked); +} + +TEST_F(HistoryTest, HistoryDBTaskCanceled) { + CancelableRequestConsumerT<int, 0> request_consumer; + HistoryService* history = new HistoryService(); + ASSERT_TRUE(history->Init(history_dir_, NULL)); + scoped_refptr<HistoryDBTaskImpl> task(new HistoryDBTaskImpl()); + history_service_ = history; + history->ScheduleDBTask(task.get(), &request_consumer); + request_consumer.CancelAllRequests(); + CleanupHistoryService(); + // WARNING: history has now been deleted. + history = NULL; + ASSERT_FALSE(task->done_invoked); +} + +} // namespace history diff --git a/chrome/browser/history/in_memory_database.cc b/chrome/browser/history/in_memory_database.cc new file mode 100644 index 0000000..a6a9a6f --- /dev/null +++ b/chrome/browser/history/in_memory_database.cc @@ -0,0 +1,107 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/in_memory_database.h" + +#include "base/file_path.h" +#include "base/histogram.h" +#include "base/logging.h" +#include "base/time.h" +#include "base/utf_string_conversions.h" +#include "build/build_config.h" + +namespace history { + +InMemoryDatabase::InMemoryDatabase() : URLDatabase() { +} + +InMemoryDatabase::~InMemoryDatabase() { +} + +bool InMemoryDatabase::InitDB() { + // Set the database page size to 4K for better performance. + db_.set_page_size(4096); + + if (!db_.OpenInMemory()) { + NOTREACHED() << "Cannot open databse " << GetDB().GetErrorMessage(); + return false; + } + + // No reason to leave data behind in memory when rows are removed. + db_.Execute("PRAGMA auto_vacuum=1"); + + // Ensure this is really an in-memory-only cache. + db_.Execute("PRAGMA temp_store=MEMORY"); + + // Create the URL table, but leave it empty for now. + if (!CreateURLTable(false)) { + NOTREACHED() << "Unable to create table"; + db_.Close(); + return false; + } + + return true; +} + +bool InMemoryDatabase::InitFromScratch() { + if (!InitDB()) + return false; + + // InitDB doesn't create the index so in the disk-loading case, it can be + // added afterwards. + CreateMainURLIndex(); + return true; +} + +bool InMemoryDatabase::InitFromDisk(const FilePath& history_name) { + if (!InitDB()) + return false; + + // Attach to the history database on disk. (We can't ATTACH in the middle of + // a transaction.) + sql::Statement attach(GetDB().GetUniqueStatement("ATTACH ? AS history")); + if (!attach) { + NOTREACHED() << "Unable to attach to history database."; + return false; + } +#if defined(OS_POSIX) + attach.BindString(0, history_name.value()); +#else + attach.BindString(0, WideToUTF8(history_name.value())); +#endif + if (!attach.Run()) { + NOTREACHED() << GetDB().GetErrorMessage(); + return false; + } + + // Copy URL data to memory. + base::TimeTicks begin_load = base::TimeTicks::Now(); + if (!db_.Execute( + "INSERT INTO urls SELECT * FROM history.urls WHERE typed_count > 0")) { + // Unable to get data from the history database. This is OK, the file may + // just not exist yet. + } + base::TimeTicks end_load = base::TimeTicks::Now(); + UMA_HISTOGRAM_MEDIUM_TIMES("History.InMemoryDBPopulate", + end_load - begin_load); + UMA_HISTOGRAM_COUNTS("History.InMemoryDBItemCount", db_.GetLastChangeCount()); + + // Detach from the history database on disk. + if (!db_.Execute("DETACH history")) { + NOTREACHED() << "Unable to detach from history database."; + return false; + } + + // Index the table, this is faster than creating the index first and then + // inserting into it. + CreateMainURLIndex(); + + return true; +} + +sql::Connection& InMemoryDatabase::GetDB() { + return db_; +} + +} // namespace history diff --git a/chrome/browser/history/in_memory_database.h b/chrome/browser/history/in_memory_database.h new file mode 100644 index 0000000..62460dd --- /dev/null +++ b/chrome/browser/history/in_memory_database.h @@ -0,0 +1,51 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_DATABASE_H_ +#define CHROME_BROWSER_HISTORY_IN_MEMORY_DATABASE_H_ + +#include <string> + +#include "app/sql/connection.h" +#include "base/basictypes.h" +#include "chrome/browser/history/url_database.h" + +class FilePath; + +namespace history { + +// Class used for a fast in-memory cache of typed URLs. Used for inline +// autocomplete since it is fast enough to be called synchronously as the user +// is typing. +class InMemoryDatabase : public URLDatabase { + public: + InMemoryDatabase(); + virtual ~InMemoryDatabase(); + + // Creates an empty in-memory database. + bool InitFromScratch(); + + // Initializes the database by directly slurping the data from the given + // file. Conceptually, the InMemoryHistoryBackend should do the populating + // after this object does some common initialization, but that would be + // much slower. + bool InitFromDisk(const FilePath& history_name); + + protected: + // Implemented for URLDatabase. + virtual sql::Connection& GetDB(); + + private: + // Initializes the database connection, this is the shared code between + // InitFromScratch() and InitFromDisk() above. Returns true on success. + bool InitDB(); + + sql::Connection db_; + + DISALLOW_COPY_AND_ASSIGN(InMemoryDatabase); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_DATABASE_H_ diff --git a/chrome/browser/history/in_memory_history_backend.cc b/chrome/browser/history/in_memory_history_backend.cc new file mode 100644 index 0000000..9f3e7be --- /dev/null +++ b/chrome/browser/history/in_memory_history_backend.cc @@ -0,0 +1,135 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/in_memory_history_backend.h" + +#include "base/command_line.h" +#include "chrome/browser/browser_process.h" +#include "chrome/browser/history/history_notifications.h" +#include "chrome/browser/history/in_memory_database.h" +#include "chrome/browser/history/in_memory_url_index.h" +#include "chrome/browser/profile.h" +#include "chrome/common/chrome_switches.h" +#include "chrome/common/notification_service.h" + +namespace history { + +// If a page becomes starred we use this id in place of the real starred id. +// See note in OnURLsStarred. +static const StarID kBogusStarredID = 0x0FFFFFFF; + +InMemoryHistoryBackend::InMemoryHistoryBackend() + : profile_(NULL) { +} + +InMemoryHistoryBackend::~InMemoryHistoryBackend() { +} + +bool InMemoryHistoryBackend::Init(const FilePath& history_filename) { + db_.reset(new InMemoryDatabase); + bool success = db_->InitFromDisk(history_filename); + + if (CommandLine::ForCurrentProcess()->HasSwitch( + switches::kEnableInMemoryURLIndex)) { + index_.reset(new InMemoryURLIndex); + // TODO(rohitrao): Load index. + } + + return success; +} + +void InMemoryHistoryBackend::AttachToHistoryService(Profile* profile) { + if (!db_.get()) { + NOTREACHED(); + return; + } + + profile_ = profile; + + // TODO(evanm): this is currently necessitated by generate_profile, which + // runs without a browser process. generate_profile should really create + // a browser process, at which point this check can then be nuked. + if (!g_browser_process) + return; + + // Register for the notifications we care about. + // We only want notifications for the associated profile. + Source<Profile> source(profile_); + registrar_.Add(this, NotificationType::HISTORY_URL_VISITED, source); + registrar_.Add(this, NotificationType::HISTORY_TYPED_URLS_MODIFIED, source); + registrar_.Add(this, NotificationType::HISTORY_URLS_DELETED, source); +} + +void InMemoryHistoryBackend::Observe(NotificationType type, + const NotificationSource& source, + const NotificationDetails& details) { + switch (type.value) { + case NotificationType::HISTORY_URL_VISITED: { + Details<history::URLVisitedDetails> visited_details(details); + if (visited_details->row.typed_count() > 0) { + URLsModifiedDetails modified_details; + modified_details.changed_urls.push_back(visited_details->row); + OnTypedURLsModified(modified_details); + } + break; + } + case NotificationType::HISTORY_TYPED_URLS_MODIFIED: + OnTypedURLsModified( + *Details<history::URLsModifiedDetails>(details).ptr()); + break; + case NotificationType::HISTORY_URLS_DELETED: + OnURLsDeleted(*Details<history::URLsDeletedDetails>(details).ptr()); + break; + default: + // For simplicity, the unit tests send us all notifications, even when + // we haven't registered for them, so don't assert here. + break; + } +} + +void InMemoryHistoryBackend::OnTypedURLsModified( + const URLsModifiedDetails& details) { + DCHECK(db_.get()); + + // Add or update the URLs. + // + // TODO(brettw) currently the rows in the in-memory database don't match the + // IDs in the main database. This sucks. Instead of Add and Remove, we should + // have Sync(), which would take the ID if it's given and add it. + std::vector<history::URLRow>::const_iterator i; + for (i = details.changed_urls.begin(); + i != details.changed_urls.end(); i++) { + URLID id = db_->GetRowForURL(i->url(), NULL); + if (id) + db_->UpdateURLRow(id, *i); + else + db_->AddURL(*i); + } +} + +void InMemoryHistoryBackend::OnURLsDeleted(const URLsDeletedDetails& details) { + DCHECK(db_.get()); + + if (details.all_history) { + // When all history is deleted, the individual URLs won't be listed. Just + // create a new database to quickly clear everything out. + db_.reset(new InMemoryDatabase); + if (!db_->InitFromScratch()) + db_.reset(); + return; + } + + // Delete all matching URLs in our database. + for (std::set<GURL>::const_iterator i = details.urls.begin(); + i != details.urls.end(); ++i) { + URLID id = db_->GetRowForURL(*i, NULL); + if (id) { + // We typically won't have most of them since we only have a subset of + // history, so ignore errors. + db_->DeleteURLRow(id); + } + } +} + +} // namespace history diff --git a/chrome/browser/history/in_memory_history_backend.h b/chrome/browser/history/in_memory_history_backend.h new file mode 100644 index 0000000..30026b5 --- /dev/null +++ b/chrome/browser/history/in_memory_history_backend.h @@ -0,0 +1,89 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Contains the history backend wrapper around the in-memory URL database. This +// object maintains an in-memory cache of the subset of history required to do +// in-line autocomplete. +// +// It is created on the history thread and passed to the main thread where +// operations can be completed synchronously. It listenes for notifications +// from the "regular" history backend and keeps itself in sync. + +#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_HISTORY_BACKEND_H_ +#define CHROME_BROWSER_HISTORY_IN_MEMORY_HISTORY_BACKEND_H_ + +#include <string> + +#include "base/basictypes.h" +#include "base/gtest_prod_util.h" +#include "base/scoped_ptr.h" +#include "chrome/common/notification_registrar.h" + +class FilePath; +class HistoryDatabase; +class Profile; + +namespace history { + +class InMemoryDatabase; +class InMemoryURLIndex; +struct URLsDeletedDetails; +struct URLsModifiedDetails; + +class InMemoryHistoryBackend : public NotificationObserver { + public: + InMemoryHistoryBackend(); + ~InMemoryHistoryBackend(); + + // Initializes with data from the given history database. + bool Init(const FilePath& history_filename); + + // Does initialization work when this object is attached to the history + // system on the main thread. The argument is the profile with which the + // attached history service is under. + void AttachToHistoryService(Profile* profile); + + // Returns the underlying database associated with this backend. The current + // autocomplete code was written fro this, but it should probably be removed + // so that it can deal directly with this object, rather than the DB. + InMemoryDatabase* db() const { + return db_.get(); + } + + // Returns the in memory index owned by this backend. This index is only + // loaded when the --enable-in-memory-url-index flag is used. + InMemoryURLIndex* index() const { + return index_.get(); + } + + // Notification callback. + virtual void Observe(NotificationType type, + const NotificationSource& source, + const NotificationDetails& details); + + private: + FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, DeleteAll); + + // Handler for NOTIFY_HISTORY_TYPED_URLS_MODIFIED. + void OnTypedURLsModified(const URLsModifiedDetails& details); + + // Handler for NOTIFY_HISTORY_URLS_DELETED. + void OnURLsDeleted(const URLsDeletedDetails& details); + + NotificationRegistrar registrar_; + + scoped_ptr<InMemoryDatabase> db_; + + scoped_ptr<InMemoryURLIndex> index_; + + // The profile that this object is attached. May be NULL before + // initialization. + Profile* profile_; + + DISALLOW_COPY_AND_ASSIGN(InMemoryHistoryBackend); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_HISTORY_BACKEND_H_ diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc new file mode 100644 index 0000000..83c401f --- /dev/null +++ b/chrome/browser/history/in_memory_url_index.cc @@ -0,0 +1,13 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/in_memory_url_index.h" + +namespace history { + +InMemoryURLIndex::InMemoryURLIndex() {} + +InMemoryURLIndex::~InMemoryURLIndex() {} + +} // namespace history diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h new file mode 100644 index 0000000..7b57a4a --- /dev/null +++ b/chrome/browser/history/in_memory_url_index.h @@ -0,0 +1,23 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ +#define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ + +namespace history { + +// The URL history source. +// Holds portions of the URL database in memory in an indexed form. Used to +// quickly look up matching URLs for a given query string. Used by +// the HistoryURLProvider for inline autocomplete and to provide URL +// matches to the omnibox. +class InMemoryURLIndex { + public: + InMemoryURLIndex(); + ~InMemoryURLIndex(); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc new file mode 100644 index 0000000..f5932ef --- /dev/null +++ b/chrome/browser/history/in_memory_url_index_unittest.cc @@ -0,0 +1,22 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/in_memory_url_index.h" + +#include "base/scoped_ptr.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace history { + +class InMemoryURLIndexTest : public testing::Test { + protected: + scoped_ptr<InMemoryURLIndex> url_index_; +}; + +TEST_F(InMemoryURLIndexTest, Construction) { + url_index_.reset(new InMemoryURLIndex); + EXPECT_TRUE(url_index_.get()); +} + +} // namespace history diff --git a/chrome/browser/history/multipart_uitest.cc b/chrome/browser/history/multipart_uitest.cc new file mode 100644 index 0000000..a8fcf4c --- /dev/null +++ b/chrome/browser/history/multipart_uitest.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/test/ui/ui_test.h" + +#include "app/sql/connection.h" +#include "app/sql/statement.h" +#include "chrome/test/automation/tab_proxy.h" +#include "chrome/test/automation/browser_proxy.h" +#include "net/url_request/url_request_unittest.h" + +namespace { + +class MultipartResponseUITest : public UITest { +}; + +#if defined(NDEBUG) +// http://code.google.com/p/chromium/issues/detail?id=37746 +// Running this test only for release builds as it fails in debug test +// runs +TEST_F(MultipartResponseUITest, SingleVisit) { + // Make sure that visiting a multipart/x-mixed-replace site only + // creates one entry in the visits table. + const wchar_t kDocRoot[] = L"chrome/test/data"; + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot, NULL); + ASSERT_TRUE(NULL != server.get()); + + scoped_refptr<BrowserProxy> browser_proxy(automation()->GetBrowserWindow(0)); + ASSERT_TRUE(browser_proxy.get()); + scoped_refptr<TabProxy> tab_proxy(browser_proxy->GetActiveTab()); + ASSERT_TRUE(tab_proxy.get()); + NavigateToURL(server->TestServerPage("multipart")); + std::wstring title; + EXPECT_TRUE(tab_proxy->GetTabTitle(&title)); + EXPECT_EQ(L"page 9", title); + CloseBrowserAndServer(); + + // The browser has shutdown now. Check the contents of the history + // table. We should have only one visit for the URL even though it + // had 10 parts. + sql::Connection db; + FilePath history = + user_data_dir().AppendASCII("Default").AppendASCII("History"); + ASSERT_TRUE(file_util::PathExists(history)); + ASSERT_TRUE(db.Open(history)); + std::string query( + "SELECT COUNT(1) FROM visits, urls WHERE visits.url = urls.id" + " AND urls.url LIKE 'http://localhost:%/multipart'"); + { + sql::Statement statement(db.GetUniqueStatement(query.c_str())); + EXPECT_TRUE(statement); + EXPECT_TRUE(statement.Step()); + EXPECT_EQ(1, statement.ColumnInt(0)); + } + db.Close(); +} +#endif + +} // namespace diff --git a/chrome/browser/history/page_usage_data.cc b/chrome/browser/history/page_usage_data.cc new file mode 100644 index 0000000..f202538 --- /dev/null +++ b/chrome/browser/history/page_usage_data.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/page_usage_data.h" + +#include <algorithm> + +#include "third_party/skia/include/core/SkBitmap.h" + +PageUsageData::~PageUsageData() { + delete thumbnail_; + delete favicon_; +} + +void PageUsageData::SetThumbnail(SkBitmap* img) { + if (thumbnail_ && thumbnail_ != img) + delete thumbnail_; + + thumbnail_ = img; + thumbnail_set_ = true; +} + +void PageUsageData::SetFavIcon(SkBitmap* img) { + if (favicon_ && favicon_ != img) + delete favicon_; + favicon_ = img; + favicon_set_ = true; +} + +// static +bool PageUsageData::Predicate(const PageUsageData* lhs, + const PageUsageData* rhs) { + return lhs->GetScore() > rhs->GetScore(); +} diff --git a/chrome/browser/history/page_usage_data.h b/chrome/browser/history/page_usage_data.h new file mode 100644 index 0000000..66a63e2 --- /dev/null +++ b/chrome/browser/history/page_usage_data.h @@ -0,0 +1,134 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_PAGE_USAGE_DATA_H__ +#define CHROME_BROWSER_HISTORY_PAGE_USAGE_DATA_H__ + +#include "base/string16.h" +#include "chrome/browser/history/history_types.h" +#include "googleurl/src/gurl.h" + +class SkBitmap; + +///////////////////////////////////////////////////////////////////////////// +// +// PageUsageData +// +// A per domain usage data structure to compute and manage most visited +// pages. +// +// See History::QueryPageUsageSince() +// +///////////////////////////////////////////////////////////////////////////// +class PageUsageData { + public: + explicit PageUsageData(history::URLID id) + : id_(id), + thumbnail_(NULL), + thumbnail_set_(false), + thumbnail_pending_(false), + favicon_(NULL), + favicon_set_(false), + favicon_pending_(false), + score_(0.0) { + } + + virtual ~PageUsageData(); + + // Return the url ID + history::URLID GetID() const { + return id_; + } + + void SetURL(const GURL& url) { + url_ = url; + } + + const GURL& GetURL() const { + return url_; + } + + void SetTitle(const string16& s) { + title_ = s; + } + + const string16& GetTitle() const { + return title_; + } + + void SetScore(double v) { + score_ = v; + } + + double GetScore() const { + return score_; + } + + void SetThumbnailMissing() { + thumbnail_set_ = true; + } + + void SetThumbnail(SkBitmap* img); + + bool HasThumbnail() const { + return thumbnail_set_; + } + + const SkBitmap* GetThumbnail() const { + return thumbnail_; + } + + bool thumbnail_pending() const { + return thumbnail_pending_; + } + + void set_thumbnail_pending(bool pending) { + thumbnail_pending_ = pending; + } + + void SetFavIconMissing() { + favicon_set_ = true; + } + + void SetFavIcon(SkBitmap* img); + + bool HasFavIcon() const { + return favicon_set_; + } + + bool favicon_pending() const { + return favicon_pending_; + } + + void set_favicon_pending(bool pending) { + favicon_pending_ = pending; + } + + const SkBitmap* GetFavIcon() const { + return favicon_; + } + + // Sort predicate to sort instances by score (high to low) + static bool Predicate(const PageUsageData* dud1, + const PageUsageData* dud2); + + private: + history::URLID id_; + GURL url_; + string16 title_; + + SkBitmap* thumbnail_; + bool thumbnail_set_; + // Whether we have an outstanding request for the thumbnail. + bool thumbnail_pending_; + + SkBitmap* favicon_; + bool favicon_set_; + // Whether we have an outstanding request for the favicon. + bool favicon_pending_; + + double score_; +}; + +#endif // CHROME_BROWSER_HISTORY_PAGE_USAGE_DATA_H__ diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc new file mode 100644 index 0000000..e1afb86 --- /dev/null +++ b/chrome/browser/history/query_parser.cc @@ -0,0 +1,386 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/query_parser.h" + +#include <algorithm> + +#include "app/l10n_util.h" +#include "base/i18n/word_iterator.h" +#include "base/logging.h" +#include "base/scoped_vector.h" +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "unicode/uscript.h" + +namespace { + +// Returns true if |mp1.first| is less than |mp2.first|. This is used to +// sort match positions. +int CompareMatchPosition(const Snippet::MatchPosition& mp1, + const Snippet::MatchPosition& mp2) { + return mp1.first < mp2.first; +} + +// Returns true if |mp2| intersects |mp1|. This is intended for use by +// CoalesceMatchesFrom and isn't meant as a general intersectpion comparison +// function. +bool SnippetIntersects(const Snippet::MatchPosition& mp1, + const Snippet::MatchPosition& mp2) { + return mp2.first >= mp1.first && mp2.first <= mp1.second; +} + +// Coalesces match positions in |matches| after index that intersect the match +// position at |index|. +void CoalesceMatchesFrom(size_t index, + Snippet::MatchPositions* matches) { + Snippet::MatchPosition& mp = (*matches)[index]; + for (Snippet::MatchPositions::iterator i = matches->begin() + index + 1; + i != matches->end(); ) { + if (SnippetIntersects(mp, *i)) { + mp.second = i->second; + i = matches->erase(i); + } else { + return; + } + } +} + +// Sorts the match positions in |matches| by their first index, then coalesces +// any match positions that intersect each other. +void CoalseAndSortMatchPositions(Snippet::MatchPositions* matches) { + std::sort(matches->begin(), matches->end(), &CompareMatchPosition); + // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove + // from matches. + for (size_t i = 0; i < matches->size(); ++i) + CoalesceMatchesFrom(i, matches); +} + +} // namespace + +// Inheritance structure: +// Queries are represented as trees of QueryNodes. +// QueryNodes are either a collection of subnodes (a QueryNodeList) +// or a single word (a QueryNodeWord). + +// A QueryNodeWord is a single word in the query. +class QueryNodeWord : public QueryNode { + public: + explicit QueryNodeWord(const string16& word) + : word_(word), literal_(false) {} + virtual ~QueryNodeWord() {} + virtual int AppendToSQLiteQuery(string16* query) const; + virtual bool IsWord() const { return true; } + + const string16& word() const { return word_; } + void set_literal(bool literal) { literal_ = literal; } + + virtual bool HasMatchIn(const std::vector<QueryWord>& words, + Snippet::MatchPositions* match_positions) const; + + virtual bool Matches(const string16& word, bool exact) const; + virtual void AppendWords(std::vector<string16>* words) const; + + private: + string16 word_; + bool literal_; +}; + +bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words, + Snippet::MatchPositions* match_positions) const { + for (size_t i = 0; i < words.size(); ++i) { + if (Matches(words[i].word, false)) { + size_t match_start = words[i].position; + match_positions->push_back( + Snippet::MatchPosition(match_start, + match_start + static_cast<int>(word_.size()))); + return true; + } + } + return false; +} + +bool QueryNodeWord::Matches(const string16& word, bool exact) const { + if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_)) + return word == word_; + return word.size() >= word_.size() && + (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); +} + +void QueryNodeWord::AppendWords(std::vector<string16>* words) const { + words->push_back(word_); +} + +int QueryNodeWord::AppendToSQLiteQuery(string16* query) const { + query->append(word_); + + // Use prefix search if we're not literal and long enough. + if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) + *query += L'*'; + return 1; +} + +// A QueryNodeList has a collection of child QueryNodes +// which it cleans up after. +class QueryNodeList : public QueryNode { + public: + virtual ~QueryNodeList(); + + virtual int AppendToSQLiteQuery(string16* query) const { + return AppendChildrenToString(query); + } + virtual bool IsWord() const { return false; } + + void AddChild(QueryNode* node) { children_.push_back(node); } + + typedef std::vector<QueryNode*> QueryNodeVector; + QueryNodeVector* children() { return &children_; } + + // Remove empty subnodes left over from other parsing. + void RemoveEmptySubnodes(); + + // QueryNodeList is never used with Matches or HasMatchIn. + virtual bool Matches(const string16& word, bool exact) const { + NOTREACHED(); + return false; + } + virtual bool HasMatchIn(const std::vector<QueryWord>& words, + Snippet::MatchPositions* match_positions) const { + NOTREACHED(); + return false; + } + virtual void AppendWords(std::vector<string16>* words) const; + + protected: + int AppendChildrenToString(string16* query) const; + + QueryNodeVector children_; +}; + +QueryNodeList::~QueryNodeList() { + for (QueryNodeVector::iterator node = children_.begin(); + node != children_.end(); ++node) + delete *node; +} + +void QueryNodeList::RemoveEmptySubnodes() { + for (size_t i = 0; i < children_.size(); ++i) { + if (children_[i]->IsWord()) + continue; + + QueryNodeList* list_node = static_cast<QueryNodeList*>(children_[i]); + list_node->RemoveEmptySubnodes(); + if (list_node->children()->empty()) { + children_.erase(children_.begin() + i); + --i; + delete list_node; + } + } +} + +void QueryNodeList::AppendWords(std::vector<string16>* words) const { + for (size_t i = 0; i < children_.size(); ++i) + children_[i]->AppendWords(words); +} + +int QueryNodeList::AppendChildrenToString(string16* query) const { + int num_words = 0; + for (QueryNodeVector::const_iterator node = children_.begin(); + node != children_.end(); ++node) { + if (node != children_.begin()) + query->push_back(L' '); + num_words += (*node)->AppendToSQLiteQuery(query); + } + return num_words; +} + +// A QueryNodePhrase is a phrase query ("quoted"). +class QueryNodePhrase : public QueryNodeList { + public: + virtual int AppendToSQLiteQuery(string16* query) const { + query->push_back(L'"'); + int num_words = AppendChildrenToString(query); + query->push_back(L'"'); + return num_words; + } + + virtual bool Matches(const string16& word, bool exact) const; + virtual bool HasMatchIn(const std::vector<QueryWord>& words, + Snippet::MatchPositions* match_positions) const; +}; + +bool QueryNodePhrase::Matches(const string16& word, bool exact) const { + NOTREACHED(); + return false; +} + +bool QueryNodePhrase::HasMatchIn( + const std::vector<QueryWord>& words, + Snippet::MatchPositions* match_positions) const { + if (words.size() < children_.size()) + return false; + + for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) { + bool matched_all = true; + for (size_t j = 0; j < children_.size(); ++j) { + if (!children_[j]->Matches(words[i + j].word, true)) { + matched_all = false; + break; + } + } + if (matched_all) { + const QueryWord& last_word = words[i + children_.size() - 1]; + match_positions->push_back( + Snippet::MatchPosition(words[i].position, + last_word.position + last_word.word.length())); + return true; + } + } + return false; +} + +QueryParser::QueryParser() { +} + +// static +bool QueryParser::IsWordLongEnoughForPrefixSearch(const string16& word) { + DCHECK(word.size() > 0); + size_t minimum_length = 3; + // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) + // because they 'behave like' Latin letters. Moreover, we should + // normalize the former before reaching here. + if (0xAC00 <= word[0] && word[0] <= 0xD7A3) + minimum_length = 2; + return word.size() >= minimum_length; +} + +// Returns true if the character is considered a quote. +static bool IsQueryQuote(wchar_t ch) { + return ch == '"' || + ch == 0xab || // left pointing double angle bracket + ch == 0xbb || // right pointing double angle bracket + ch == 0x201c || // left double quotation mark + ch == 0x201d || // right double quotation mark + ch == 0x201e; // double low-9 quotation mark +} + +int QueryParser::ParseQuery(const string16& query, + string16* sqlite_query) { + QueryNodeList root; + if (!ParseQueryImpl(query, &root)) + return 0; + return root.AppendToSQLiteQuery(sqlite_query); +} + +void QueryParser::ParseQuery(const string16& query, + std::vector<QueryNode*>* nodes) { + QueryNodeList root; + if (ParseQueryImpl(l10n_util::ToLower(query), &root)) + nodes->swap(*root.children()); +} + + +void QueryParser::ExtractQueryWords(const string16& query, + std::vector<string16>* words) { + QueryNodeList root; + if (!ParseQueryImpl(query, &root)) + return; + root.AppendWords(words); +} + +bool QueryParser::DoesQueryMatch(const string16& text, + const std::vector<QueryNode*>& query_nodes, + Snippet::MatchPositions* match_positions) { + if (query_nodes.empty()) + return false; + + std::vector<QueryWord> query_words; + string16 lower_text = l10n_util::ToLower(text); + ExtractQueryWords(lower_text, &query_words); + + if (query_words.empty()) + return false; + + Snippet::MatchPositions matches; + for (size_t i = 0; i < query_nodes.size(); ++i) { + if (!query_nodes[i]->HasMatchIn(query_words, &matches)) + return false; + } + if (lower_text.length() != text.length()) { + // The lower case string differs from the original string. The matches are + // meaningless. + // TODO(sky): we need a better way to align the positions so that we don't + // completely punt here. + match_positions->clear(); + } else { + CoalseAndSortMatchPositions(&matches); + match_positions->swap(matches); + } + return true; +} + +bool QueryParser::ParseQueryImpl(const string16& query, + QueryNodeList* root) { + WordIterator iter(&query, WordIterator::BREAK_WORD); + // TODO(evanm): support a locale here + if (!iter.Init()) + return false; + + // To handle nesting, we maintain a stack of QueryNodeLists. + // The last element (back) of the stack contains the current, deepest node. + std::vector<QueryNodeList*> query_stack; + query_stack.push_back(root); + + bool in_quotes = false; // whether we're currently in a quoted phrase + while (iter.Advance()) { + // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It + // is not necessarily a word, but could also be a sequence of punctuation + // or whitespace. + if (iter.IsWord()) { + string16 word = iter.GetWord(); + + QueryNodeWord* word_node = new QueryNodeWord(word); + if (in_quotes) + word_node->set_literal(true); + query_stack.back()->AddChild(word_node); + } else { // Punctuation. + if (IsQueryQuote(query[iter.prev()])) { + if (!in_quotes) { + QueryNodeList* quotes_node = new QueryNodePhrase; + query_stack.back()->AddChild(quotes_node); + query_stack.push_back(quotes_node); + in_quotes = true; + } else { + query_stack.pop_back(); // Stop adding to the quoted phrase. + in_quotes = false; + } + } + } + } + + root->RemoveEmptySubnodes(); + return true; +} + +void QueryParser::ExtractQueryWords(const string16& text, + std::vector<QueryWord>* words) { + WordIterator iter(&text, WordIterator::BREAK_WORD); + // TODO(evanm): support a locale here + if (!iter.Init()) + return; + + while (iter.Advance()) { + // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It + // is not necessarily a word, but could also be a sequence of punctuation + // or whitespace. + if (iter.IsWord()) { + string16 word = iter.GetWord(); + if (!word.empty()) { + words->push_back(QueryWord()); + words->back().word = word; + words->back().position = iter.prev(); + } + } + } +} diff --git a/chrome/browser/history/query_parser.h b/chrome/browser/history/query_parser.h new file mode 100644 index 0000000..8399abf --- /dev/null +++ b/chrome/browser/history/query_parser.h @@ -0,0 +1,107 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The query parser is used to parse queries entered into the history +// search into more normalized queries can be passed to the SQLite backend. + +#ifndef CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ +#define CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ + +#include <vector> + +#include "base/string16.h" +#include "chrome/browser/history/snippet.h" + +class QueryNodeList; + +// Used by HasMatchIn. +struct QueryWord { + // The work to match against. + string16 word; + + // The starting position of the word in the original text. + size_t position; +}; + +// QueryNode is used by QueryNodeParser to represent the elements that +// constitute a query. While QueryNode is exposed by way of ParseQuery, it +// really isn't meant for external usage. +class QueryNode { + public: + virtual ~QueryNode() {} + + // Serialize ourselves out to a string that can be passed to SQLite. Returns + // the number of words in this node. + virtual int AppendToSQLiteQuery(string16* query) const = 0; + + // Return true if this is a word node, false if it's a QueryNodeList. + virtual bool IsWord() const = 0; + + // Returns true if this node matches the specified text. If exact is true, + // the string must exactly match. Otherwise, this uses a starts with + // comparison. + virtual bool Matches(const string16& word, bool exact) const = 0; + + // Returns true if this node matches at least one of the words in words. If + // the node matches at least one word, an entry is added to match_positions + // giving the matching region. + virtual bool HasMatchIn(const std::vector<QueryWord>& words, + Snippet::MatchPositions* match_positions) const = 0; + + // Appends the words that make up this node in |words|. + virtual void AppendWords(std::vector<string16>* words) const = 0; +}; + + +class QueryParser { + public: + QueryParser(); + + // For CJK ideographs and Korean Hangul, even a single character + // can be useful in prefix matching, but that may give us too many + // false positives. Moreover, the current ICU word breaker gives us + // back every single Chinese character as a word so that there's no + // point doing anything for them and we only adjust the minimum length + // to 2 for Korean Hangul while using 3 for others. This is a temporary + // hack until we have a segmentation support. + static bool IsWordLongEnoughForPrefixSearch(const string16& word); + + // Parse a query into a SQLite query. The resulting query is placed in + // sqlite_query and the number of words is returned. + int ParseQuery(const string16& query, + string16* sqlite_query); + + // Parses the query words in query, returning the nodes that constitute the + // valid words in the query. This is intended for later usage with + // DoesQueryMatch. + // Ownership of the nodes passes to the caller. + void ParseQuery(const string16& query, + std::vector<QueryNode*>* nodes); + + // Parses a query returning the words that make up the query. Any words in + // quotes are put in |words| without the quotes. For example, the query text + // "foo bar" results in two entries being added to words, one for foo and one + // for bar. + void ExtractQueryWords(const string16& query, + std::vector<string16>* words); + + // Returns true if the string text matches the query nodes created by a call + // to ParseQuery. If the query does match each of the matching positions in + // the text is added to |match_positions|. + bool DoesQueryMatch(const string16& text, + const std::vector<QueryNode*>& nodes, + Snippet::MatchPositions* match_positions); + + private: + // Does the work of parsing a query; creates nodes in QueryNodeList as + // appropriate. This is invoked from both of the ParseQuery methods. + bool ParseQueryImpl(const string16& query, + QueryNodeList* root); + + // Extracts the words from text, placing each word into words. + void ExtractQueryWords(const string16& text, + std::vector<QueryWord>* words); +}; + +#endif // CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ diff --git a/chrome/browser/history/query_parser_unittest.cc b/chrome/browser/history/query_parser_unittest.cc new file mode 100644 index 0000000..f8b41d9 --- /dev/null +++ b/chrome/browser/history/query_parser_unittest.cc @@ -0,0 +1,163 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/basictypes.h" +#include "base/scoped_vector.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/history/query_parser.h" +#include "testing/gtest/include/gtest/gtest.h" + +class QueryParserTest : public testing::Test { + public: + struct TestData { + const char* input; + const int expected_word_count; + }; + + std::string QueryToString(const std::string& query); + + protected: + QueryParser query_parser_; +}; + +// Test helper: Convert a user query string in 8-bit (for hardcoding +// convenience) to a SQLite query string. +std::string QueryParserTest::QueryToString(const std::string& query) { + string16 sqlite_query; + query_parser_.ParseQuery(UTF8ToUTF16(query), &sqlite_query); + return UTF16ToUTF8(sqlite_query); +} + +// Basic multi-word queries, including prefix matching. +TEST_F(QueryParserTest, SimpleQueries) { + EXPECT_EQ("", QueryToString(" ")); + EXPECT_EQ("singleword*", QueryToString("singleword")); + EXPECT_EQ("spacedout*", QueryToString(" spacedout ")); + EXPECT_EQ("foo* bar*", QueryToString("foo bar")); + // Short words aren't prefix matches. For Korean Hangul + // the minimum is 2 while for other scripts, it's 3. + EXPECT_EQ("f b", QueryToString(" f b")); + // KA JANG + EXPECT_EQ(WideToUTF8(L"\xAC00 \xC7A5"), + QueryToString(WideToUTF8(L" \xAC00 \xC7A5"))); + EXPECT_EQ("foo* bar*", QueryToString(" foo bar ")); + // KA-JANG BICH-GO + EXPECT_EQ(WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"), + QueryToString(WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0"))); +} + +// Quoted substring parsing. +TEST_F(QueryParserTest, Quoted) { + // ASCII quotes + EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\"")); + // Missing end quotes + EXPECT_EQ("\"miss end\"", QueryToString("\"miss end")); + // Missing begin quotes + EXPECT_EQ("miss* beg*", QueryToString("miss beg\"")); + // Weird formatting + EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many \"\"quotes")); +} + +// Apostrophes within words should be preserved, but otherwise stripped. +TEST_F(QueryParserTest, Apostrophes) { + EXPECT_EQ("foo* bar's*", QueryToString("foo bar's")); + EXPECT_EQ("l'foo*", QueryToString("l'foo")); + EXPECT_EQ("foo*", QueryToString("'foo")); +} + +// Special characters. +TEST_F(QueryParserTest, SpecialChars) { + EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar")); +} + +TEST_F(QueryParserTest, NumWords) { + TestData data[] = { + { "blah", 1 }, + { "foo \"bar baz\"", 3 }, + { "foo \"baz\"", 2 }, + { "foo \"bar baz\" blah", 4 }, + }; + + for (size_t i = 0; i < arraysize(data); ++i) { + string16 query_string; + EXPECT_EQ(data[i].expected_word_count, + query_parser_.ParseQuery(UTF8ToUTF16(data[i].input), + &query_string)); + } +} + +TEST_F(QueryParserTest, ParseQueryNodesAndMatch) { + struct TestData2 { + const std::string query; + const std::string text; + const bool matches; + const size_t m1_start; + const size_t m1_end; + const size_t m2_start; + const size_t m2_end; + } data[] = { + { "foo foo", "foo", true, 0, 3, 0, 0 }, + { "foo fooey", "fooey", true, 0, 5, 0, 0 }, + { "foo fooey bar", "bar fooey", true, 0, 3, 4, 9 }, + { "blah", "blah", true, 0, 4, 0, 0 }, + { "blah", "foo", false, 0, 0, 0, 0 }, + { "blah", "blahblah", true, 0, 4, 0, 0 }, + { "blah", "foo blah", true, 4, 8, 0, 0 }, + { "foo blah", "blah", false, 0, 0, 0, 0 }, + { "foo blah", "blahx foobar", true, 0, 4, 6, 9 }, + { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 }, + { "\"foo blah\"", "foox blahx", false, 0, 0, 0, 0 }, + { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 }, + { "\"foo blah\"", "\"foo blah\"", true, 1, 9, 0, 0 }, + { "foo blah", "\"foo bar blah\"", true, 1, 4, 9, 13 }, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { + QueryParser parser; + ScopedVector<QueryNode> query_nodes; + parser.ParseQuery(UTF8ToUTF16(data[i].query), &query_nodes.get()); + Snippet::MatchPositions match_positions; + ASSERT_EQ(data[i].matches, + parser.DoesQueryMatch(UTF8ToUTF16(data[i].text), + query_nodes.get(), + &match_positions)); + size_t offset = 0; + if (data[i].m1_start != 0 || data[i].m1_end != 0) { + ASSERT_TRUE(match_positions.size() >= 1); + EXPECT_EQ(data[i].m1_start, match_positions[0].first); + EXPECT_EQ(data[i].m1_end, match_positions[0].second); + offset++; + } + if (data[i].m2_start != 0 || data[i].m2_end != 0) { + ASSERT_TRUE(match_positions.size() == 1 + offset); + EXPECT_EQ(data[i].m2_start, match_positions[offset].first); + EXPECT_EQ(data[i].m2_end, match_positions[offset].second); + } + } +} + +TEST_F(QueryParserTest, ExtractQueryWords) { + struct TestData2 { + const std::string text; + const std::string w1; + const std::string w2; + const std::string w3; + const size_t word_count; + } data[] = { + { "foo", "foo", "", "", 1 }, + { "foo bar", "foo", "bar", "", 2 }, + { "\"foo bar\"", "foo", "bar", "", 2 }, + { "\"foo bar\" a", "foo", "bar", "a", 3 }, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { + std::vector<string16> results; + QueryParser parser; + parser.ExtractQueryWords(UTF8ToUTF16(data[i].text), &results); + ASSERT_EQ(data[i].word_count, results.size()); + EXPECT_EQ(data[i].w1, UTF16ToUTF8(results[0])); + if (results.size() == 2) + EXPECT_EQ(data[i].w2, UTF16ToUTF8(results[1])); + if (results.size() == 3) + EXPECT_EQ(data[i].w3, UTF16ToUTF8(results[2])); + } +} diff --git a/chrome/browser/history/redirect_uitest.cc b/chrome/browser/history/redirect_uitest.cc new file mode 100644 index 0000000..f7a1669 --- /dev/null +++ b/chrome/browser/history/redirect_uitest.cc @@ -0,0 +1,303 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Navigates the browser to server and client redirect pages and makes sure +// that the correct redirects are reflected in the history database. Errors +// here might indicate that WebKit changed the calls our glue layer gets in +// the case of redirects. It may also mean problems with the history system. + +#include "base/file_util.h" +#include "base/platform_thread.h" +#include "base/scoped_ptr.h" +#include "base/string_util.h" +#include "base/string16.h" +#include "chrome/test/automation/tab_proxy.h" +#include "chrome/test/ui/ui_test.h" +#include "net/base/net_util.h" +#include "net/url_request/url_request_unittest.h" + +namespace { + +const wchar_t kDocRoot[] = L"chrome/test/data"; + +typedef UITest RedirectTest; + +// Tests a single server redirect +TEST_F(RedirectTest, Server) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot, NULL); + ASSERT_TRUE(NULL != server.get()); + + GURL final_url = server->TestServerPage(std::string()); + GURL first_url = server->TestServerPage( + "server-redirect?" + final_url.spec()); + + NavigateToURL(first_url); + + scoped_refptr<TabProxy> tab_proxy(GetActiveTab()); + ASSERT_TRUE(tab_proxy.get()); + + std::vector<GURL> redirects; + ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects)); + + ASSERT_EQ(1U, redirects.size()); + EXPECT_EQ(final_url.spec(), redirects[0].spec()); +} + +// Tests a single client redirect. +TEST_F(RedirectTest, Client) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot, NULL); + ASSERT_TRUE(NULL != server.get()); + + GURL final_url = server->TestServerPage(std::string()); + GURL first_url = server->TestServerPage( + "client-redirect?" + final_url.spec()); + + // The client redirect appears as two page visits in the browser. + NavigateToURLBlockUntilNavigationsComplete(first_url, 2); + + scoped_refptr<TabProxy> tab_proxy(GetActiveTab()); + ASSERT_TRUE(tab_proxy.get()); + + std::vector<GURL> redirects; + ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects)); + + ASSERT_EQ(1U, redirects.size()); + EXPECT_EQ(final_url.spec(), redirects[0].spec()); + + // The address bar should display the final URL. + GURL tab_url; + EXPECT_TRUE(tab_proxy->GetCurrentURL(&tab_url)); + EXPECT_TRUE(final_url == tab_url); + + // Navigate one more time. + NavigateToURLBlockUntilNavigationsComplete(first_url, 2); + + // The address bar should still display the final URL. + EXPECT_TRUE(tab_proxy->GetCurrentURL(&tab_url)); + EXPECT_TRUE(final_url == tab_url); +} + +TEST_F(RedirectTest, ClientEmptyReferer) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot, NULL); + ASSERT_TRUE(NULL != server.get()); + + GURL final_url = server->TestServerPage(std::string()); + FilePath test_file(test_data_directory_); + test_file = test_file.AppendASCII("file_client_redirect.html"); + GURL first_url = net::FilePathToFileURL(test_file); + + // The client redirect appears as two page visits in the browser. + NavigateToURLBlockUntilNavigationsComplete(first_url, 2); + + std::vector<GURL> redirects; + scoped_refptr<TabProxy> tab_proxy(GetActiveTab()); + ASSERT_TRUE(tab_proxy.get()); + ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects)); + ASSERT_EQ(1U, redirects.size()); + EXPECT_EQ(final_url.spec(), redirects[0].spec()); +} + +// Tests to make sure a location change when a pending redirect exists isn't +// flagged as a redirect. +TEST_F(RedirectTest, ClientCancelled) { + FilePath first_path(test_data_directory_); + first_path = first_path.AppendASCII("cancelled_redirect_test.html"); + ASSERT_TRUE(file_util::AbsolutePath(&first_path)); + GURL first_url = net::FilePathToFileURL(first_path); + + NavigateToURLBlockUntilNavigationsComplete(first_url, 1); + + NavigateToURL(GURL("javascript:click()")); // User initiated location change. + + scoped_refptr<TabProxy> tab_proxy(GetActiveTab()); + ASSERT_TRUE(tab_proxy.get()); + + std::vector<GURL> redirects; + ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects)); + + // There should be no redirects from first_url, because the anchor location + // change that occurs should not be flagged as a redirect and the meta-refresh + // won't have fired yet. + ASSERT_EQ(0U, redirects.size()); + GURL current_url; + ASSERT_TRUE(tab_proxy->GetCurrentURL(¤t_url)); + + // Need to test final path and ref separately since constructing a file url + // containing an anchor using FilePathToFileURL will escape the anchor as + // %23, but in current_url the anchor will be '#'. + std::string final_ref = "myanchor"; + FilePath current_path; + ASSERT_TRUE(net::FileURLToFilePath(current_url, ¤t_path)); + ASSERT_TRUE(file_util::AbsolutePath(¤t_path)); + // Path should remain unchanged. + EXPECT_EQ(StringToLowerASCII(first_path.value()), + StringToLowerASCII(current_path.value())); + EXPECT_EQ(final_ref, current_url.ref()); +} + +// Tests a client->server->server redirect +TEST_F(RedirectTest, ClientServerServer) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot, NULL); + ASSERT_TRUE(NULL != server.get()); + + GURL final_url = server->TestServerPage(std::string()); + GURL next_to_last = server->TestServerPage( + "server-redirect?" + final_url.spec()); + GURL second_url = server->TestServerPage( + "server-redirect?" + next_to_last.spec()); + GURL first_url = server->TestServerPage( + "client-redirect?" + second_url.spec()); + std::vector<GURL> redirects; + + // We need the sleep for the client redirects, because it appears as two + // page visits in the browser. + NavigateToURL(first_url); + + for (int i = 0; i < 10; ++i) { + PlatformThread::Sleep(sleep_timeout_ms()); + scoped_refptr<TabProxy> tab_proxy(GetActiveTab()); + ASSERT_TRUE(tab_proxy.get()); + ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects)); + if (!redirects.empty()) + break; + } + + ASSERT_EQ(3U, redirects.size()); + EXPECT_EQ(second_url.spec(), redirects[0].spec()); + EXPECT_EQ(next_to_last.spec(), redirects[1].spec()); + EXPECT_EQ(final_url.spec(), redirects[2].spec()); +} + +// Tests that the "#reference" gets preserved across server redirects. +TEST_F(RedirectTest, ServerReference) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot, NULL); + ASSERT_TRUE(NULL != server.get()); + + const std::string ref("reference"); + + GURL final_url = server->TestServerPage(std::string()); + GURL initial_url = server->TestServerPage( + "server-redirect?" + final_url.spec() + "#" + ref); + + NavigateToURL(initial_url); + + GURL url = GetActiveTabURL(); + EXPECT_EQ(ref, url.ref()); +} + +// Test that redirect from http:// to file:// : +// A) does not crash the browser or confuse the redirect chain, see bug 1080873 +// B) does not take place. +TEST_F(RedirectTest, NoHttpToFile) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot, NULL); + ASSERT_TRUE(NULL != server.get()); + FilePath test_file(test_data_directory_); + test_file = test_file.AppendASCII("http_to_file.html"); + GURL file_url = net::FilePathToFileURL(test_file); + + GURL initial_url = server->TestServerPage( + "client-redirect?" + file_url.spec()); + + NavigateToURL(initial_url); + // UITest will check for crashes. We make sure the title doesn't match the + // title from the file, because the nav should not have taken place. + scoped_refptr<TabProxy> tab_proxy(GetActiveTab()); + ASSERT_TRUE(tab_proxy.get()); + std::wstring actual_title; + ASSERT_TRUE(tab_proxy->GetTabTitle(&actual_title)); + EXPECT_NE("File!", WideToUTF8(actual_title)); +} + +// Ensures that non-user initiated location changes (within page) are +// flagged as client redirects. See bug 1139823. +TEST_F(RedirectTest, ClientFragments) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot, NULL); + ASSERT_TRUE(NULL != server.get()); + + FilePath test_file(test_data_directory_); + test_file = test_file.AppendASCII("ref_redirect.html"); + GURL first_url = net::FilePathToFileURL(test_file); + std::vector<GURL> redirects; + + NavigateToURL(first_url); + + scoped_refptr<TabProxy> tab_proxy(GetActiveTab()); + ASSERT_TRUE(tab_proxy.get()); + ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects)); + EXPECT_EQ(1U, redirects.size()); + EXPECT_EQ(first_url.spec() + "#myanchor", redirects[0].spec()); +} + +// TODO(timsteele): This is disabled because our current testserver can't +// handle multiple requests in parallel, making it hang on the first request +// to /slow?60. It's unable to serve our second request for files/title2.html +// until /slow? completes, which doesn't give the desired behavior. We could +// alternatively load the second page from disk, but we would need to start +// the browser for this testcase with --process-per-tab, and I don't think +// we can do this at test-case-level granularity at the moment. +// http://crbug.com/45056 +TEST_F(RedirectTest, + DISABLED_ClientCancelledByNewNavigationAfterProvisionalLoad) { + // We want to initiate a second navigation after the provisional load for + // the client redirect destination has started, but before this load is + // committed. To achieve this, we tell the browser to load a slow page, + // which causes it to start a provisional load, and while it is waiting + // for the response (which means it hasn't committed the load for the client + // redirect destination page yet), we issue a new navigation request. + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot, NULL); + ASSERT_TRUE(NULL != server.get()); + + GURL final_url = server->TestServerPage("files/title2.html"); + GURL slow = server->TestServerPage("slow?60"); + GURL first_url = server->TestServerPage( + "client-redirect?" + slow.spec()); + std::vector<GURL> redirects; + + NavigateToURL(first_url); + // We don't sleep here - the first navigation won't have been committed yet + // because we told the server to wait a minute. This means the browser has + // started it's provisional load for the client redirect destination page but + // hasn't completed. Our time is now! + NavigateToURL(final_url); + + std::wstring tab_title; + std::wstring final_url_title = UTF8ToWide("Title Of Awesomeness"); + // Wait till the final page has been loaded. + for (int i = 0; i < 10; ++i) { + PlatformThread::Sleep(sleep_timeout_ms()); + scoped_refptr<TabProxy> tab_proxy(GetActiveTab()); + ASSERT_TRUE(tab_proxy.get()); + ASSERT_TRUE(tab_proxy->GetTabTitle(&tab_title)); + if (tab_title == final_url_title) { + ASSERT_TRUE(tab_proxy->GetRedirectsFrom(first_url, &redirects)); + break; + } + } + + // Check to make sure the navigation did in fact take place and we are + // at the expected page. + EXPECT_EQ(final_url_title, tab_title); + + bool final_navigation_not_redirect = true; + // Check to make sure our request for files/title2.html doesn't get flagged + // as a client redirect from the first (/client-redirect?) page. + for (std::vector<GURL>::iterator it = redirects.begin(); + it != redirects.end(); ++it) { + if (final_url.spec() == it->spec()) { + final_navigation_not_redirect = false; + break; + } + } + EXPECT_TRUE(final_navigation_not_redirect); +} + +} // namespace diff --git a/chrome/browser/history/snippet.cc b/chrome/browser/history/snippet.cc new file mode 100644 index 0000000..cb96e16 --- /dev/null +++ b/chrome/browser/history/snippet.cc @@ -0,0 +1,285 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/snippet.h" + +#include <algorithm> + +#include "base/logging.h" +#include "base/scoped_ptr.h" +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "unicode/brkiter.h" +#include "unicode/utext.h" +#include "unicode/utf8.h" + +namespace { + +bool PairFirstLessThan(const Snippet::MatchPosition& a, + const Snippet::MatchPosition& b) { + return a.first < b.first; +} + +// Combines all pairs after offset in match_positions that are contained +// or touch the pair at offset. +void CoalescePositionsFrom(size_t offset, + Snippet::MatchPositions* match_positions) { + DCHECK(offset < match_positions->size()); + Snippet::MatchPosition& pair((*match_positions)[offset]); + ++offset; + while (offset < match_positions->size() && + pair.second >= (*match_positions)[offset].first) { + pair.second = std::max(pair.second, (*match_positions)[offset].second); + match_positions->erase(match_positions->begin() + offset); + } +} + +// Makes sure there is a pair in match_positions that contains the specified +// range. This keeps the pairs ordered in match_positions by first, and makes +// sure none of the pairs in match_positions touch each other. +void AddMatch(size_t start, + size_t end, + Snippet::MatchPositions* match_positions) { + DCHECK(start < end); + DCHECK(match_positions); + Snippet::MatchPosition pair(start, end); + if (match_positions->empty()) { + match_positions->push_back(pair); + return; + } + // There's at least one match. Find the position of the new match, + // potentially extending pairs around it. + Snippet::MatchPositions::iterator i = + std::lower_bound(match_positions->begin(), match_positions->end(), + pair, &PairFirstLessThan); + if (i != match_positions->end() && i->first == start) { + // Match not at the end and there is already a pair with the same + // start. + if (end > i->second) { + // New pair extends beyond existing pair. Extend existing pair and + // coalesce matches after it. + i->second = end; + CoalescePositionsFrom(i - match_positions->begin(), match_positions); + } // else case, new pair completely contained in existing pair, nothing + // to do. + } else if (i == match_positions->begin()) { + // Match at the beginning and the first pair doesn't have the same + // start. Insert new pair and coalesce matches after it. + match_positions->insert(i, pair); + CoalescePositionsFrom(0, match_positions); + } else { + // Not at the beginning (but may be at the end). + --i; + if (start <= i->second && end > i->second) { + // Previous element contains match. Extend it and coalesce. + i->second = end; + CoalescePositionsFrom(i - match_positions->begin(), match_positions); + } else if (end > i->second) { + // Region doesn't touch previous element. See if region touches current + // element. + ++i; + if (i == match_positions->end() || end < i->first) { + match_positions->insert(i, pair); + } else { + i->first = start; + i->second = end; + CoalescePositionsFrom(i - match_positions->begin(), match_positions); + } + } + } +} + +// Converts an index in a utf8 string into the index in the corresponding utf16 +// string and returns the utf16 index. This is intended to be called in a loop +// iterating through a utf8 string. +// +// utf8_string: the utf8 string. +// utf8_length: length of the utf8 string. +// offset: the utf8 offset to convert. +// utf8_pos: current offset in the utf8 string. This is modified and on return +// matches offset. +// wide_pos: current index in the wide string. This is the same as the return +// value. +size_t AdvanceAndReturnUTF16Pos(const char* utf8_string, + int32_t utf8_length, + int32_t offset, + int32_t* utf8_pos, + size_t* utf16_pos) { + DCHECK(offset >= *utf8_pos && offset <= utf8_length); + + UChar32 wide_char; + while (*utf8_pos < offset) { + U8_NEXT(utf8_string, *utf8_pos, utf8_length, wide_char); + *utf16_pos += (wide_char <= 0xFFFF) ? 1 : 2; + } + return *utf16_pos; +} + +// Given a character break iterator over a UTF-8 string, set the iterator +// position to |*utf8_pos| and move by |count| characters. |count| can +// be either positive or negative. +void MoveByNGraphemes(icu::BreakIterator* bi, int count, size_t* utf8_pos) { + // Ignore the return value. A side effect of the current position + // being set at or following |*utf8_pos| is exploited here. + // It's simpler than calling following(n) and then previous(). + // isBoundary() is not very fast, but should be good enough for the + // snippet generation. If not, revisit the way we scan in ComputeSnippet. + bi->isBoundary(*utf8_pos); + bi->next(count); + *utf8_pos = static_cast<size_t>(bi->current()); +} + +// The amount of context to include for a given hit. Note that it's counted +// in terms of graphemes rather than bytes. +const int kSnippetContext = 50; + +// Returns true if next match falls within a snippet window +// from the previous match. The window size is counted in terms +// of graphemes rather than bytes in UTF-8. +bool IsNextMatchWithinSnippetWindow(icu::BreakIterator* bi, + size_t previous_match_end, + size_t next_match_start) { + // If it's within a window in terms of bytes, it's certain + // that it's within a window in terms of graphemes as well. + if (next_match_start < previous_match_end + kSnippetContext) + return true; + bi->isBoundary(previous_match_end); + // An alternative to this is to call |bi->next()| at most + // kSnippetContext times, compare |bi->current()| with |next_match_start| + // after each call and return early if possible. There are other + // heuristics to speed things up if necessary, but it's not likely that + // we need to bother. + bi->next(kSnippetContext); + int64 current = bi->current(); + return (next_match_start < static_cast<uint64>(current) || + current == icu::BreakIterator::DONE); +} + +} // namespace + +// static +void Snippet::ExtractMatchPositions(const std::string& offsets_str, + const std::string& column_num, + MatchPositions* match_positions) { + DCHECK(match_positions); + if (offsets_str.empty()) + return; + std::vector<std::string> offsets; + SplitString(offsets_str, ' ', &offsets); + // SQLite offsets are sets of four integers: + // column, query term, match offset, match length + // Matches within a string are marked by (start, end) pairs. + for (size_t i = 0; i < offsets.size() - 3; i += 4) { + if (offsets[i] != column_num) + continue; + const size_t start = atoi(offsets[i + 2].c_str()); + const size_t end = start + atoi(offsets[i + 3].c_str()); + // Switch to DCHECK after debugging http://crbug.com/15261. + CHECK(end >= start); + AddMatch(start, end, match_positions); + } +} + +// static +void Snippet::ConvertMatchPositionsToWide( + const std::string& utf8_string, + Snippet::MatchPositions* match_positions) { + DCHECK(match_positions); + int32_t utf8_pos = 0; + size_t utf16_pos = 0; + const char* utf8_cstring = utf8_string.c_str(); + const int32_t utf8_length = static_cast<int32_t>(utf8_string.size()); + for (Snippet::MatchPositions::iterator i = match_positions->begin(); + i != match_positions->end(); ++i) { + i->first = AdvanceAndReturnUTF16Pos(utf8_cstring, utf8_length, + i->first, &utf8_pos, &utf16_pos); + i->second = AdvanceAndReturnUTF16Pos(utf8_cstring, utf8_length, + i->second, &utf8_pos, &utf16_pos); + } +} + +void Snippet::ComputeSnippet(const MatchPositions& match_positions, + const std::string& document) { + // The length of snippets we try to produce. + // We can generate longer snippets but stop once we cross kSnippetMaxLength. + const size_t kSnippetMaxLength = 200; + const string16 kEllipsis = ASCIIToUTF16(" ... "); + + UText* document_utext = NULL; + UErrorCode status = U_ZERO_ERROR; + document_utext = utext_openUTF8(document_utext, document.data(), + document.size(), &status); + // Locale does not matter because there's no per-locale customization + // for character iterator. + scoped_ptr<icu::BreakIterator> bi(icu::BreakIterator::createCharacterInstance( + icu::Locale::getDefault(), status)); + bi->setText(document_utext, status); + DCHECK(U_SUCCESS(status)); + + // We build the snippet by iterating through the matches and then grabbing + // context around each match. If matches are near enough each other (within + // kSnippetContext), we skip the "..." between them. + string16 snippet; + size_t start = 0; + for (size_t i = 0; i < match_positions.size(); ++i) { + // Some shorter names for the current match. + const size_t match_start = match_positions[i].first; + const size_t match_end = match_positions[i].second; + + // Switch to DCHECK after debugging http://crbug.com/15261. + CHECK(match_end > match_start); + CHECK(match_end <= document.size()); + + // Add the context, if any, to show before the match. + size_t context_start = match_start; + MoveByNGraphemes(bi.get(), -kSnippetContext, &context_start); + start = std::max(start, context_start); + if (start < match_start) { + if (start > 0) + snippet += kEllipsis; + // Switch to DCHECK after debugging http://crbug.com/15261. + CHECK(start < document.size()); + snippet += UTF8ToUTF16(document.substr(start, match_start - start)); + } + + // Add the match. + const size_t first = snippet.size(); + snippet += UTF8ToUTF16(document.substr(match_start, + match_end - match_start)); + matches_.push_back(std::make_pair(first, snippet.size())); + + // Compute the context, if any, to show after the match. + size_t end; + // Check if the next match falls within our snippet window. + if (i + 1 < match_positions.size() && + IsNextMatchWithinSnippetWindow(bi.get(), match_end, + match_positions[i + 1].first)) { + // Yes, it's within the window. Make the end context extend just up + // to the next match. + end = match_positions[i + 1].first; + // Switch to DCHECK after debugging http://crbug.com/15261. + CHECK(end >= match_end); + CHECK(end <= document.size()); + snippet += UTF8ToUTF16(document.substr(match_end, end - match_end)); + } else { + // No, there's either no next match or the next match is too far away. + end = match_end; + MoveByNGraphemes(bi.get(), kSnippetContext, &end); + // Switch to DCHECK after debugging http://crbug.com/15261. + CHECK(end >= match_end); + CHECK(end <= document.size()); + snippet += UTF8ToUTF16(document.substr(match_end, end - match_end)); + if (end < document.size()) + snippet += kEllipsis; + } + start = end; + + // Stop here if we have enough snippet computed. + if (snippet.size() >= kSnippetMaxLength) + break; + } + + utext_close(document_utext); + swap(text_, snippet); +} diff --git a/chrome/browser/history/snippet.h b/chrome/browser/history/snippet.h new file mode 100644 index 0000000..9e92893 --- /dev/null +++ b/chrome/browser/history/snippet.h @@ -0,0 +1,69 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// This module computes snippets of queries based on hits in the documents +// for display in history search results. + +#ifndef CHROME_BROWSER_HISTORY_SNIPPET_H__ +#define CHROME_BROWSER_HISTORY_SNIPPET_H__ + +#include <vector> + +#include "base/string16.h" + +class Snippet { + public: + // Each MatchPosition is the [begin, end) positions of a match within a + // string. + typedef std::pair<size_t, size_t> MatchPosition; + typedef std::vector<MatchPosition> MatchPositions; + + // Parses an offsets string as returned from a sqlite full text index. An + // offsets string encodes information about why a row matched a text query. + // The information is encoded in the string as a set of matches, where each + // match consists of the column, term-number, location, and length of the + // match. Each element of the match is separated by a space, as is each match + // from other matches. + // + // This method adds the start and end of each match whose column is + // column_num to match_positions. The pairs are ordered based on first, + // with no overlapping elements. + // + // NOTE: the positions returned are in terms of UTF8 encoding. To convert the + // offsets to wide, use ConvertMatchPositionsToWide. + static void ExtractMatchPositions(const std::string& offsets_str, + const std::string& column_num, + MatchPositions* match_positions); + + // Converts match positions as returned from ExtractMatchPositions to be in + // terms of a wide string. + static void ConvertMatchPositionsToWide( + const std::string& utf8_string, + Snippet::MatchPositions* match_positions); + + // Given |matches|, the match positions within |document|, compute the snippet + // for the document. + // Note that |document| is UTF-8 and the offsets in |matches| are byte + // offsets. + void ComputeSnippet(const MatchPositions& matches, + const std::string& document); + + const string16& text() const { return text_; } + const MatchPositions& matches() const { return matches_; } + + // Efficiently swaps the contents of this snippet with the other. + void Swap(Snippet* other) { + text_.swap(other->text_); + matches_.swap(other->matches_); + } + + private: + // The text of the snippet. + string16 text_; + + // The matches within text_. + MatchPositions matches_; +}; + +#endif // CHROME_BROWSER_HISTORY_SNIPPET_H__ diff --git a/chrome/browser/history/snippet_unittest.cc b/chrome/browser/history/snippet_unittest.cc new file mode 100644 index 0000000..5bc8a3b --- /dev/null +++ b/chrome/browser/history/snippet_unittest.cc @@ -0,0 +1,254 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/snippet.h" + +#include <algorithm> + +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +// A sample document to compute snippets of. +// The \x bits after the first "Google" are UTF-8 of U+2122 TRADE MARK SIGN, +// and are useful for verifying we don't screw up in UTF-8/UTF-16 conversion. +const char* kSampleDocument = "Google\xe2\x84\xa2 Terms of Service " +"Welcome to Google! " +"1. Your relationship with Google " +"1.1 Your use of Google's products, software, services and web sites " +"(referred to collectively as the \"Services\" in this document and excluding " +"any services provided to you by Google under a separate written agreement) " +"is subject to the terms of a legal agreement between you and Google. " +"\"Google\" means Google Inc., whose principal place of business is at 1600 " +"Amphitheatre Parkway, Mountain View, CA 94043, United States. This document " +"explains how the agreement is made up, and sets out some of the terms of " +"that agreement."; +}; + +// Thai sample taken from http://www.google.co.th/intl/th/privacy.html +// TODO(jungshik) : Add more samples (e.g. Hindi) after porting +// ICU 4.0's character iterator changes to our copy of ICU 3.8 to get +// grapheme clusters in Indic scripts handled more reasonably. +const char* kThaiSample = "Google \xE0\xB9\x80\xE0\xB8\x81\xE0\xB9\x87" +"\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7" +"\xE0\xB8\xA1 \xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9" +"\xE0\xB8\xA5\xE0\xB8\xAA\xE0\xB9\x88\xE0\xB8\xA7\xE0\xB8\x99\xE0\xB8\x9A" +"\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0\xB8\xA5 \xE0\xB9\x80\xE0\xB8\xA1" +"\xE0\xB8\xB7\xE0\xB9\x88\xE0\xB8\xAD\xE0\xB8\x84\xE0\xB8\xB8\xE0\xB8\x93" +"\xE0\xB8\xA5\xE0\xB8\x87\xE0\xB8\x97\xE0\xB8\xB0\xE0\xB9\x80\xE0\xB8\x9A" +"\xE0\xB8\xB5\xE0\xB8\xA2\xE0\xB8\x99\xE0\xB9\x80\xE0\xB8\x9E\xE0\xB8\xB7" +"\xE0\xB9\x88\xE0\xB8\xAD\xE0\xB9\x83\xE0\xB8\x8A\xE0\xB9\x89\xE0\xB8\x9A" +"\xE0\xB8\xA3\xE0\xB8\xB4\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3\xE0\xB8\x82" +"\xE0\xB8\xAD\xE0\xB8\x87 Google \xE0\xB8\xAB\xE0\xB8\xA3\xE0\xB8\xB7" +"\xE0\xB8\xAD\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89\xE0\xB8\x82\xE0\xB9\x89" +"\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5\xE0\xB8\x94\xE0\xB8\xB1" +"\xE0\xB8\x87\xE0\xB8\x81\xE0\xB8\xA5\xE0\xB9\x88\xE0\xB8\xB2\xE0\xB8\xA7" +"\xE0\xB9\x82\xE0\xB8\x94\xE0\xB8\xA2\xE0\xB8\xAA\xE0\xB8\xA1\xE0\xB8\xB1" +"\xE0\xB8\x84\xE0\xB8\xA3\xE0\xB9\x83\xE0\xB8\x88 \xE0\xB9\x80\xE0\xB8\xA3" +"\xE0\xB8\xB2\xE0\xB8\xAD\xE0\xB8\xB2\xE0\xB8\x88\xE0\xB8\xA3\xE0\xB8\xA7" +"\xE0\xB8\xA1\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9" +"\xE0\xB8\xA5\xE0\xB8\xAA\xE0\xB9\x88\xE0\xB8\xA7\xE0\xB8\x99\xE0\xB8\x9A" +"\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0\xB8\xA5\xE0\xB8\x97\xE0\xB8\xB5" +"\xE0\xB9\x88\xE0\xB9\x80\xE0\xB8\x81\xE0\xB9\x87\xE0\xB8\x9A\xE0\xB8\xA3" +"\xE0\xB8\xA7\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\xA1\xE0\xB8\x88" +"\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\x84\xE0\xB8\xB8\xE0\xB8\x93\xE0\xB9\x80" +"\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\xB1\xE0\xB8\x9A" +"\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5" +"\xE0\xB8\x88\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xB4" +"\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3\xE0\xB8\xAD\xE0\xB8\xB7\xE0\xB9\x88" +"\xE0\xB8\x99\xE0\xB8\x82\xE0\xB8\xAD\xE0\xB8\x87 Google \xE0\xB8\xAB" +"\xE0\xB8\xA3\xE0\xB8\xB7\xE0\xB8\xAD\xE0\xB8\x9A\xE0\xB8\xB8\xE0\xB8\x84" +"\xE0\xB8\x84\xE0\xB8\xA5\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB8\xAA" +"\xE0\xB8\xB2\xE0\xB8\xA1 \xE0\xB9\x80\xE0\xB8\x9E\xE0\xB8\xB7\xE0\xB9\x88" +"\xE0\xB8\xAD\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89\xE0\xB8\x9C\xE0\xB8\xB9" +"\xE0\xB9\x89\xE0\xB9\x83\xE0\xB8\x8A\xE0\xB9\x89\xE0\xB9\x84\xE0\xB8\x94" +"\xE0\xB9\x89\xE0\xB8\xA3\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB8\x9B\xE0\xB8\xA3" +"\xE0\xB8\xB0\xE0\xB8\xAA\xE0\xB8\x9A\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3" +"\xE0\xB8\x93\xE0\xB9\x8C\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB8\x94" +"\xE0\xB8\xB5\xE0\xB8\x82\xE0\xB8\xB6\xE0\xB9\x89\xE0\xB8\x99 \xE0\xB8\xA3" +"\xE0\xB8\xA7\xE0\xB8\xA1\xE0\xB8\x97\xE0\xB8\xB1\xE0\xB9\x89\xE0\xB8\x87" +"\xE0\xB8\x9B\xE0\xB8\xA3\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB9\x81\xE0\xB8\x95" +"\xE0\xB9\x88\xE0\xB8\x87\xE0\xB9\x80\xE0\xB8\x99\xE0\xB8\xB7\xE0\xB9\x89" +"\xE0\xB8\xAD\xE0\xB8\xAB\xE0\xB8\xB2\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89" +"\xE0\xB9\x80\xE0\xB8\xAB\xE0\xB8\xA1\xE0\xB8\xB2\xE0\xB8\xB0\xE0\xB8\xAA" +"\xE0\xB8\xB3\xE0\xB8\xAB\xE0\xB8\xA3\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB8\x84" +"\xE0\xB8\xB8\xE0\xB8\x93"; + +// Comparator for sorting by the first element in a pair. +bool ComparePair1st(const Snippet::MatchPosition& a, + const Snippet::MatchPosition& b) { + return a.first < b.first; +} + +// For testing, we'll compute the match positions manually instead of using +// sqlite's FTS matching. BuildSnippet returns the snippet for matching +// |query| against |document|. Matches are surrounded by "**". +string16 BuildSnippet(const std::string& document, + const std::string& query) { + // This function assumes that |document| does not contain + // any character for which lowercasing changes its length. Further, + // it's assumed that lowercasing only the ASCII-portion works for + // |document|. We need to add more test cases and change this function + // to be more generic depending on how we deal with 'folding for match' + // in history. + const std::string document_folded = StringToLowerASCII(std::string(document)); + + std::vector<std::string> query_words; + SplitString(query, ' ', &query_words); + + // Manually construct match_positions of the document. + Snippet::MatchPositions match_positions; + match_positions.clear(); + for (std::vector<std::string>::iterator qw = query_words.begin(); + qw != query_words.end(); ++qw) { + // Insert all instances of this word into match_pairs. + size_t ofs = 0; + while ((ofs = document_folded.find(*qw, ofs)) != std::string::npos) { + match_positions.push_back(std::make_pair(ofs, ofs + qw->size())); + ofs += qw->size(); + } + } + // Sort match_positions in order of increasing offset. + std::sort(match_positions.begin(), match_positions.end(), ComparePair1st); + + // Compute the snippet. + Snippet snippet; + snippet.ComputeSnippet(match_positions, document); + + // Now "highlight" all matches in the snippet with **. + string16 star_snippet; + Snippet::MatchPositions::const_iterator match; + size_t pos = 0; + for (match = snippet.matches().begin(); + match != snippet.matches().end(); ++match) { + star_snippet += snippet.text().substr(pos, match->first - pos); + star_snippet += UTF8ToUTF16("**"); + star_snippet += snippet.text().substr(match->first, + match->second - match->first); + star_snippet += UTF8ToUTF16("**"); + pos = match->second; + } + star_snippet += snippet.text().substr(pos); + + return star_snippet; +} + +TEST(Snippets, SimpleQuery) { + ASSERT_EQ(" ... eferred to collectively as the \"Services\" in this " + "**document** and excluding any services provided to you by " + "Goo ... ... way, Mountain View, CA 94043, United States. This " + "**document** explains how the agreement is made up, and sets " + "o ... ", + UTF16ToUTF8(BuildSnippet(kSampleDocument, "document"))); +} + +// Test that two words that are near each other don't produce two elided bits. +TEST(Snippets, NearbyWords) { + ASSERT_EQ(" ... lace of business is at 1600 Amphitheatre Parkway, " + "**Mountain** **View**, CA 94043, United States. This " + "document explains ... ", + UTF16ToUTF8(BuildSnippet(kSampleDocument, "mountain view"))); +} + +// The above tests already test that we get byte offsets correct, but here's +// one that gets the "TM" in its snippet. +TEST(Snippets, UTF8) { + ASSERT_EQ(" ... ogle\xe2\x84\xa2 Terms of Service Welcome to Google! " + "1. Your **relationship** with Google 1.1 Your use of Google's " + "products, so ... ", + UTF16ToUTF8(BuildSnippet(kSampleDocument, "relationship"))); +} + +// Bug: 1274923 +// TODO(jungshik): Move this bug report to crbugs.com +// Fails consistently. From the report, "Broken by latest ICU. Need new expected +// results." +TEST(Snippets, FAILS_ThaiUTF8) { + // There are 3 instances of '\u0E43\u0E2B\u0E49' + // (\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89) in kThaiSample. + // The 1st is more than |kSniipetContext| graphemes away from the + // 2nd while the 2nd and 3rd are within that window. However, with + // the 2nd match added, the snippet goes over the size limit so that + // the snippet ends right before the 3rd match. + ASSERT_EQ(" ... " + " \xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9" + "\xE0\xB8\xA5\xE0\xB8\xAA\xE0\xB9\x88\xE0\xB8\xA7\xE0\xB8\x99" + "\xE0\xB8\x9A\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0\xB8\xA5 " + "\xE0\xB9\x80\xE0\xB8\xA1\xE0\xB8\xB7\xE0\xB9\x88\xE0\xB8\xAD" + "\xE0\xB8\x84\xE0\xB8\xB8\xE0\xB8\x93\xE0\xB8\xA5\xE0\xB8\x87" + "\xE0\xB8\x97\xE0\xB8\xB0\xE0\xB9\x80\xE0\xB8\x9A\xE0\xB8\xB5" + "\xE0\xB8\xA2\xE0\xB8\x99\xE0\xB9\x80\xE0\xB8\x9E\xE0\xB8\xB7" + "\xE0\xB9\x88\xE0\xB8\xAD\xE0\xB9\x83\xE0\xB8\x8A\xE0\xB9\x89" + "\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xB4\xE0\xB8\x81\xE0\xB8\xB2" + "\xE0\xB8\xA3\xE0\xB8\x82\xE0\xB8\xAD\xE0\xB8\x87 Google " + "\xE0\xB8\xAB\xE0\xB8\xA3\xE0\xB8\xB7\xE0\xB8\xAD**\xE0\xB9\x83" + "\xE0\xB8\xAB\xE0\xB9\x89**\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD" + "\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5\xE0\xB8\x94\xE0\xB8\xB1" + "\xE0\xB8\x87\xE0\xB8\x81\xE0\xB8\xA5\xE0\xB9\x88\xE0\xB8\xB2" + "\xE0\xB8\xA7\xE0\xB9\x82\xE0\xB8\x94\xE0\xB8\xA2\xE0\xB8\xAA" + "\xE0\xB8\xA1\xE0\xB8\xB1\xE0\xB8\x84\xE0\xB8\xA3\xE0\xB9\x83" + "\xE0\xB8\x88 \xE0\xB9\x80\xE0\xB8\xA3\xE0\xB8\xB2\xE0\xB8\xAD" + "\xE0\xB8\xB2\xE0\xB8\x88\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\xA1" + "\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9" + "\xE0\xB8\xA5\xE0\xB8\xAA\xE0\xB9\x88\xE0\xB8\xA7\xE0\xB8\x99" + "\xE0\xB8\x9A\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0\xB8\xA5" + "\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB9\x80\xE0\xB8\x81" + "\xE0\xB9\x87\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\x9A" + "\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\xA1 ... ... " + "\xE0\xB8\x88\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\x84\xE0\xB8\xB8" + "\xE0\xB8\x93\xE0\xB9\x80\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xB2" + "\xE0\xB8\x81\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB8\x82\xE0\xB9\x89" + "\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5\xE0\xB8\x88" + "\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xB4" + "\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3\xE0\xB8\xAD\xE0\xB8\xB7" + "\xE0\xB9\x88\xE0\xB8\x99\xE0\xB8\x82\xE0\xB8\xAD\xE0\xB8\x87 " + "Google \xE0\xB8\xAB\xE0\xB8\xA3\xE0\xB8\xB7\xE0\xB8\xAD" + "\xE0\xB8\x9A\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0\xB8\xA5" + "\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB8\xAA\xE0\xB8\xB2" + "\xE0\xB8\xA1 \xE0\xB9\x80\xE0\xB8\x9E\xE0\xB8\xB7\xE0\xB9\x88" + "\xE0\xB8\xAD**\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89**\xE0\xB8\x9C" + "\xE0\xB8\xB9\xE0\xB9\x89\xE0\xB9\x83\xE0\xB8\x8A\xE0\xB9\x89" + "\xE0\xB9\x84\xE0\xB8\x94\xE0\xB9\x89\xE0\xB8\xA3\xE0\xB8\xB1" + "\xE0\xB8\x9A\xE0\xB8\x9B\xE0\xB8\xA3\xE0\xB8\xB0\xE0\xB8\xAA" + "\xE0\xB8\x9A\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3\xE0\xB8\x93" + "\xE0\xB9\x8C\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB8\x94" + "\xE0\xB8\xB5\xE0\xB8\x82\xE0\xB8\xB6\xE0\xB9\x89\xE0\xB8\x99 " + "\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\xA1\xE0\xB8\x97\xE0\xB8\xB1" + "\xE0\xB9\x89\xE0\xB8\x87\xE0\xB8\x9B\xE0\xB8\xA3\xE0\xB8\xB1" + "\xE0\xB8\x9A\xE0\xB9\x81\xE0\xB8\x95\xE0\xB9\x88\xE0\xB8\x87" + "\xE0\xB9\x80\xE0\xB8\x99\xE0\xB8\xB7\xE0\xB9\x89\xE0\xB8\xAD" + "\xE0\xB8\xAB\xE0\xB8\xB2", + UTF16ToUTF8(BuildSnippet(kThaiSample, + "\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89"))); +} + +TEST(Snippets, ExtractMatchPositions) { + struct TestData { + const std::string offsets_string; + const size_t expected_match_count; + const size_t expected_matches[10]; + } data[] = { + { "0 0 1 2 0 0 4 1 0 0 1 5", 1, { 1, 6 } }, + { "0 0 1 4 0 0 2 1", 1, { 1, 5 } }, + { "0 0 4 1 0 0 2 1", 2, { 2, 3, 4, 5 } }, + { "0 0 0 1", 1, { 0, 1 } }, + { "0 0 0 1 0 0 0 2", 1, { 0, 2 } }, + { "0 0 1 1 0 0 1 2", 1, { 1, 3 } }, + { "0 0 1 2 0 0 4 3 0 0 3 1", 1, { 1, 7 } }, + { "0 0 1 4 0 0 2 5", 1, { 1, 7 } }, + { "0 0 1 2 0 0 1 1", 1, { 1, 3 } }, + { "0 0 1 1 0 0 5 2 0 0 10 1 0 0 3 10", 2, { 1, 2, 3, 13 } }, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { + Snippet::MatchPositions matches; + Snippet::ExtractMatchPositions(data[i].offsets_string, "0", &matches); + EXPECT_EQ(data[i].expected_match_count, matches.size()); + for (size_t j = 0; j < data[i].expected_match_count; ++j) { + EXPECT_EQ(data[i].expected_matches[2 * j], matches[j].first); + EXPECT_EQ(data[i].expected_matches[2 * j + 1], matches[j].second); + } + } +} diff --git a/chrome/browser/history/starred_url_database.cc b/chrome/browser/history/starred_url_database.cc new file mode 100644 index 0000000..cf2a306 --- /dev/null +++ b/chrome/browser/history/starred_url_database.cc @@ -0,0 +1,628 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/starred_url_database.h" + +#include "app/sql/connection.h" +#include "app/sql/statement.h" +#include "base/file_util.h" +#include "base/logging.h" +#include "base/json/json_writer.h" +#include "base/scoped_vector.h" +#include "base/stl_util-inl.h" +#include "base/string_util.h" +#include "base/values.h" +#include "chrome/browser/bookmarks/bookmark_codec.h" +#include "chrome/browser/bookmarks/bookmark_model.h" +#include "chrome/browser/history/history.h" +#include "chrome/browser/history/query_parser.h" + +// The following table is used to store star (aka bookmark) information. This +// class derives from URLDatabase, which has its own schema. +// +// starred +// id Unique identifier (primary key) for the entry. +// type Type of entry, if 0 this corresponds to a URL, 1 for +// a system grouping, 2 for a user created group, 3 for +// other. +// url_id ID of the url, only valid if type == 0 +// group_id ID of the group, only valid if type != 0. This id comes +// from the UI and is NOT the same as id. +// title User assigned title. +// date_added Creation date. +// visual_order Visual order within parent. +// parent_id Group ID of the parent this entry is contained in, if 0 +// entry is not in a group. +// date_modified Time the group was last modified. See comments in +// StarredEntry::date_group_modified +// NOTE: group_id and parent_id come from the UI, id is assigned by the +// db. + +namespace history { + +namespace { + +// Fields used by FillInStarredEntry. +#define STAR_FIELDS \ + " starred.id, starred.type, starred.title, starred.date_added, " \ + "starred.visual_order, starred.parent_id, urls.url, urls.id, " \ + "starred.group_id, starred.date_modified " +const char kHistoryStarFields[] = STAR_FIELDS; + +void FillInStarredEntry(const sql::Statement& s, StarredEntry* entry) { + DCHECK(entry); + entry->id = s.ColumnInt64(0); + switch (s.ColumnInt(1)) { + case 0: + entry->type = history::StarredEntry::URL; + entry->url = GURL(s.ColumnString(6)); + break; + case 1: + entry->type = history::StarredEntry::BOOKMARK_BAR; + break; + case 2: + entry->type = history::StarredEntry::USER_GROUP; + break; + case 3: + entry->type = history::StarredEntry::OTHER; + break; + default: + NOTREACHED(); + break; + } + entry->title = s.ColumnString16(2); + entry->date_added = base::Time::FromInternalValue(s.ColumnInt64(3)); + entry->visual_order = s.ColumnInt(4); + entry->parent_group_id = s.ColumnInt64(5); + entry->url_id = s.ColumnInt64(7); + entry->group_id = s.ColumnInt64(8); + entry->date_group_modified = base::Time::FromInternalValue(s.ColumnInt64(9)); +} + +} // namespace + +StarredURLDatabase::StarredURLDatabase() { +} + +StarredURLDatabase::~StarredURLDatabase() { +} + +bool StarredURLDatabase::MigrateBookmarksToFile(const FilePath& path) { + if (!GetDB().DoesTableExist("starred")) + return true; + + if (EnsureStarredIntegrity() && !MigrateBookmarksToFileImpl(path)) { + NOTREACHED() << " Bookmarks migration failed"; + return false; + } + + if (!GetDB().Execute("DROP TABLE starred")) { + NOTREACHED() << "Unable to drop starred table"; + return false; + } + return true; +} + +bool StarredURLDatabase::GetAllStarredEntries( + std::vector<StarredEntry>* entries) { + DCHECK(entries); + std::string sql = "SELECT "; + sql.append(kHistoryStarFields); + sql.append("FROM starred LEFT JOIN urls ON starred.url_id = urls.id "); + sql += "ORDER BY parent_id, visual_order"; + + sql::Statement s(GetDB().GetUniqueStatement(sql.c_str())); + if (!s) { + NOTREACHED() << "Statement prepare failed"; + return false; + } + + history::StarredEntry entry; + while (s.Step()) { + FillInStarredEntry(s, &entry); + // Reset the url for non-url types. This is needed as we're reusing the + // same entry for the loop. + if (entry.type != history::StarredEntry::URL) + entry.url = GURL(); + entries->push_back(entry); + } + return true; +} + +bool StarredURLDatabase::EnsureStarredIntegrity() { + std::set<StarredNode*> roots; + std::set<StarID> groups_with_duplicate_ids; + std::set<StarredNode*> unparented_urls; + std::set<StarID> empty_url_ids; + + if (!BuildStarNodes(&roots, &groups_with_duplicate_ids, &unparented_urls, + &empty_url_ids)) { + return false; + } + + bool valid = EnsureStarredIntegrityImpl(&roots, groups_with_duplicate_ids, + &unparented_urls, empty_url_ids); + + STLDeleteElements(&roots); + STLDeleteElements(&unparented_urls); + return valid; +} + +bool StarredURLDatabase::UpdateStarredEntryRow(StarID star_id, + const string16& title, + UIStarID parent_group_id, + int visual_order, + base::Time date_modified) { + DCHECK(star_id && visual_order >= 0); + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE starred SET title=?, parent_id=?, visual_order=?, " + "date_modified=? WHERE id=?")); + if (!statement) + return 0; + + statement.BindString16(0, title); + statement.BindInt64(1, parent_group_id); + statement.BindInt(2, visual_order); + statement.BindInt64(3, date_modified.ToInternalValue()); + statement.BindInt64(4, star_id); + return statement.Run(); +} + +bool StarredURLDatabase::AdjustStarredVisualOrder(UIStarID parent_group_id, + int start_visual_order, + int delta) { + DCHECK(parent_group_id && start_visual_order >= 0); + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE starred SET visual_order=visual_order+? " + "WHERE parent_id=? AND visual_order >= ?")); + if (!statement) + return false; + + statement.BindInt(0, delta); + statement.BindInt64(1, parent_group_id); + statement.BindInt(2, start_visual_order); + return statement.Run(); +} + +StarID StarredURLDatabase::CreateStarredEntryRow(URLID url_id, + UIStarID group_id, + UIStarID parent_group_id, + const string16& title, + const base::Time& date_added, + int visual_order, + StarredEntry::Type type) { + DCHECK(visual_order >= 0 && + (type != history::StarredEntry::URL || url_id)); + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO starred " + "(type, url_id, group_id, title, date_added, visual_order, parent_id, " + "date_modified) VALUES (?,?,?,?,?,?,?,?)")); + if (!statement) + return 0; + + switch (type) { + case history::StarredEntry::URL: + statement.BindInt(0, 0); + break; + case history::StarredEntry::BOOKMARK_BAR: + statement.BindInt(0, 1); + break; + case history::StarredEntry::USER_GROUP: + statement.BindInt(0, 2); + break; + case history::StarredEntry::OTHER: + statement.BindInt(0, 3); + break; + default: + NOTREACHED(); + } + statement.BindInt64(1, url_id); + statement.BindInt64(2, group_id); + statement.BindString16(3, title); + statement.BindInt64(4, date_added.ToInternalValue()); + statement.BindInt(5, visual_order); + statement.BindInt64(6, parent_group_id); + statement.BindInt64(7, base::Time().ToInternalValue()); + if (statement.Run()) + return GetDB().GetLastInsertRowId(); + return 0; +} + +bool StarredURLDatabase::DeleteStarredEntryRow(StarID star_id) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM starred WHERE id=?")); + if (!statement) + return false; + + statement.BindInt64(0, star_id); + return statement.Run(); +} + +bool StarredURLDatabase::GetStarredEntry(StarID star_id, StarredEntry* entry) { + DCHECK(entry && star_id); + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" STAR_FIELDS "FROM starred LEFT JOIN urls ON " + "starred.url_id = urls.id WHERE starred.id=?")); + if (!statement) + return false; + + statement.BindInt64(0, star_id); + + if (statement.Step()) { + FillInStarredEntry(statement, entry); + return true; + } + return false; +} + +StarID StarredURLDatabase::CreateStarredEntry(StarredEntry* entry) { + entry->id = 0; // Ensure 0 for failure case. + + // Adjust the visual order when we are inserting it somewhere. + if (entry->parent_group_id) + AdjustStarredVisualOrder(entry->parent_group_id, entry->visual_order, 1); + + // Insert the new entry. + switch (entry->type) { + case StarredEntry::USER_GROUP: + entry->id = CreateStarredEntryRow(0, entry->group_id, + entry->parent_group_id, entry->title, entry->date_added, + entry->visual_order, entry->type); + break; + + case StarredEntry::URL: { + // Get the row for this URL. + URLRow url_row; + if (!GetRowForURL(entry->url, &url_row)) { + // Create a new URL row for this entry. + url_row = URLRow(entry->url); + url_row.set_title(entry->title); + url_row.set_hidden(false); + entry->url_id = this->AddURL(url_row); + } else { + entry->url_id = url_row.id(); // The caller doesn't have to set this. + } + + // Create the star entry referring to the URL row. + entry->id = CreateStarredEntryRow(entry->url_id, entry->group_id, + entry->parent_group_id, entry->title, entry->date_added, + entry->visual_order, entry->type); + + // Update the URL row to refer to this new starred entry. + UpdateURLRow(entry->url_id, url_row); + break; + } + + default: + NOTREACHED(); + break; + } + return entry->id; +} + +UIStarID StarredURLDatabase::GetMaxGroupID() { + sql::Statement max_group_id_statement(GetDB().GetUniqueStatement( + "SELECT MAX(group_id) FROM starred")); + if (!max_group_id_statement) { + NOTREACHED() << GetDB().GetErrorMessage(); + return 0; + } + if (!max_group_id_statement.Step()) { + NOTREACHED() << GetDB().GetErrorMessage(); + return 0; + } + return max_group_id_statement.ColumnInt64(0); +} + +bool StarredURLDatabase::BuildStarNodes( + std::set<StarredURLDatabase::StarredNode*>* roots, + std::set<StarID>* groups_with_duplicate_ids, + std::set<StarredNode*>* unparented_urls, + std::set<StarID>* empty_url_ids) { + std::vector<StarredEntry> star_entries; + if (!GetAllStarredEntries(&star_entries)) { + NOTREACHED() << "Unable to get bookmarks from database"; + return false; + } + + // Create the group/bookmark-bar/other nodes. + std::map<UIStarID, StarredNode*> group_id_to_node_map; + for (size_t i = 0; i < star_entries.size(); ++i) { + if (star_entries[i].type != StarredEntry::URL) { + if (group_id_to_node_map.find(star_entries[i].group_id) != + group_id_to_node_map.end()) { + // There's already a group with this ID. + groups_with_duplicate_ids->insert(star_entries[i].id); + } else { + // Create the node and update the mapping. + StarredNode* node = new StarredNode(star_entries[i]); + group_id_to_node_map[star_entries[i].group_id] = node; + } + } + } + + // Iterate again, creating nodes for URL bookmarks and parenting all + // bookmarks/folders. In addition populate the empty_url_ids with all entries + // of type URL that have an empty URL. + std::map<StarID, StarredNode*> id_to_node_map; + for (size_t i = 0; i < star_entries.size(); ++i) { + if (star_entries[i].type == StarredEntry::URL) { + if (star_entries[i].url.is_empty()) { + empty_url_ids->insert(star_entries[i].id); + } else if (!star_entries[i].parent_group_id || + group_id_to_node_map.find(star_entries[i].parent_group_id) == + group_id_to_node_map.end()) { + // This entry has no parent, or we couldn't find the parent. + StarredNode* node = new StarredNode(star_entries[i]); + unparented_urls->insert(node); + } else { + // Add the node to its parent. + StarredNode* parent = + group_id_to_node_map[star_entries[i].parent_group_id]; + StarredNode* node = new StarredNode(star_entries[i]); + parent->Add(parent->GetChildCount(), node); + } + } else if (groups_with_duplicate_ids->find(star_entries[i].id) == + groups_with_duplicate_ids->end()) { + // The entry is a group (or bookmark bar/other node) that isn't + // marked as a duplicate. + if (!star_entries[i].parent_group_id || + group_id_to_node_map.find(star_entries[i].parent_group_id) == + group_id_to_node_map.end()) { + // Entry has no parent, or the parent wasn't found. + roots->insert(group_id_to_node_map[star_entries[i].group_id]); + } else { + // Parent the group node. + StarredNode* parent = + group_id_to_node_map[star_entries[i].parent_group_id]; + StarredNode* node = group_id_to_node_map[star_entries[i].group_id]; + if (!node->HasAncestor(parent) && !parent->HasAncestor(node)) { + parent->Add(parent->GetChildCount(), node); + } else { + // The node has a cycle. Add it to the list of roots so the cycle is + // broken. + roots->insert(node); + } + } + } + } + return true; +} + +StarredURLDatabase::StarredNode* StarredURLDatabase::GetNodeByType( + const std::set<StarredURLDatabase::StarredNode*>& nodes, + StarredEntry::Type type) { + for (std::set<StarredNode*>::const_iterator i = nodes.begin(); + i != nodes.end(); ++i) { + if ((*i)->value.type == type) + return *i; + } + return NULL; +} + +bool StarredURLDatabase::EnsureVisualOrder( + StarredURLDatabase::StarredNode* node) { + for (int i = 0; i < node->GetChildCount(); ++i) { + if (node->GetChild(i)->value.visual_order != i) { + StarredEntry& entry = node->GetChild(i)->value; + entry.visual_order = i; + LOG(WARNING) << "Bookmark visual order is wrong"; + if (!UpdateStarredEntryRow(entry.id, entry.title, entry.parent_group_id, + i, entry.date_group_modified)) { + NOTREACHED() << "Unable to update visual order"; + return false; + } + } + if (!EnsureVisualOrder(node->GetChild(i))) + return false; + } + return true; +} + +bool StarredURLDatabase::EnsureStarredIntegrityImpl( + std::set<StarredURLDatabase::StarredNode*>* roots, + const std::set<StarID>& groups_with_duplicate_ids, + std::set<StarredNode*>* unparented_urls, + const std::set<StarID>& empty_url_ids) { + // Make sure the bookmark bar entry exists. + StarredNode* bookmark_node = + GetNodeByType(*roots, StarredEntry::BOOKMARK_BAR); + if (!bookmark_node) { + LOG(WARNING) << "No bookmark bar folder in database"; + // If there is no bookmark bar entry in the db things are really + // screwed. Return false, which won't trigger migration and we'll just + // drop the tables. + return false; + } + + // Make sure the other node exists. + StarredNode* other_node = GetNodeByType(*roots, StarredEntry::OTHER); + if (!other_node) { + LOG(WARNING) << "No bookmark other folder in database"; + StarredEntry entry; + entry.group_id = GetMaxGroupID() + 1; + if (entry.group_id == 1) { + NOTREACHED() << "Unable to get new id for other bookmarks folder"; + return false; + } + entry.id = CreateStarredEntryRow( + 0, entry.group_id, 0, UTF8ToUTF16("other"), base::Time::Now(), 0, + history::StarredEntry::OTHER); + if (!entry.id) { + NOTREACHED() << "Unable to create other bookmarks folder"; + return false; + } + entry.type = StarredEntry::OTHER; + StarredNode* other_node = new StarredNode(entry); + roots->insert(other_node); + } + + // We could potentially make sure only one group with type + // BOOKMARK_BAR/OTHER, but history backend enforces this. + + // Nuke any entries with no url. + for (std::set<StarID>::const_iterator i = empty_url_ids.begin(); + i != empty_url_ids.end(); ++i) { + LOG(WARNING) << "Bookmark exists with no URL"; + if (!DeleteStarredEntryRow(*i)) { + NOTREACHED() << "Unable to delete bookmark"; + return false; + } + } + + // Make sure the visual order of the nodes is correct. + for (std::set<StarredNode*>::const_iterator i = roots->begin(); + i != roots->end(); ++i) { + if (!EnsureVisualOrder(*i)) + return false; + } + + // Move any unparented bookmarks to the bookmark bar. + { + std::set<StarredNode*>::iterator i = unparented_urls->begin(); + while (i != unparented_urls->end()) { + LOG(WARNING) << "Bookmark not in a bookmark folder found"; + if (!Move(*i, bookmark_node)) + return false; + unparented_urls->erase(i++); + } + } + + // Nuke any groups with duplicate ids. A duplicate id means there are two + // folders in the starred table with the same group_id. We only keep the + // first folder, all other groups are removed. + for (std::set<StarID>::const_iterator i = groups_with_duplicate_ids.begin(); + i != groups_with_duplicate_ids.end(); ++i) { + LOG(WARNING) << "Duplicate group id in bookmark database"; + if (!DeleteStarredEntryRow(*i)) { + NOTREACHED() << "Unable to delete folder"; + return false; + } + } + + // Move unparented user groups back to the bookmark bar. + { + std::set<StarredNode*>::iterator i = roots->begin(); + while (i != roots->end()) { + if ((*i)->value.type == StarredEntry::USER_GROUP) { + LOG(WARNING) << "Bookmark folder not on bookmark bar found"; + if (!Move(*i, bookmark_node)) + return false; + roots->erase(i++); + } else { + ++i; + } + } + } + + return true; +} + +bool StarredURLDatabase::Move(StarredNode* source, StarredNode* new_parent) { + history::StarredEntry& entry = source->value; + entry.visual_order = new_parent->GetChildCount(); + entry.parent_group_id = new_parent->value.group_id; + if (!UpdateStarredEntryRow(entry.id, entry.title, + entry.parent_group_id, entry.visual_order, + entry.date_group_modified)) { + NOTREACHED() << "Unable to move folder"; + return false; + } + new_parent->Add(new_parent->GetChildCount(), source); + return true; +} + +bool StarredURLDatabase::MigrateBookmarksToFileImpl(const FilePath& path) { + std::vector<history::StarredEntry> entries; + if (!GetAllStarredEntries(&entries)) + return false; + + // Create the bookmark bar and other folder nodes. + history::StarredEntry entry; + entry.type = history::StarredEntry::BOOKMARK_BAR; + BookmarkNode bookmark_bar_node(0, GURL()); + bookmark_bar_node.Reset(entry); + entry.type = history::StarredEntry::OTHER; + BookmarkNode other_node(0, GURL()); + other_node.Reset(entry); + + std::map<history::UIStarID, history::StarID> group_id_to_id_map; + typedef std::map<history::StarID, BookmarkNode*> IDToNodeMap; + IDToNodeMap id_to_node_map; + + history::UIStarID other_folder_group_id = 0; + history::StarID other_folder_id = 0; + + // Iterate through the entries building a mapping between group_id and id. + for (std::vector<history::StarredEntry>::const_iterator i = entries.begin(); + i != entries.end(); ++i) { + if (i->type != history::StarredEntry::URL) { + group_id_to_id_map[i->group_id] = i->id; + if (i->type == history::StarredEntry::OTHER) { + other_folder_id = i->id; + other_folder_group_id = i->group_id; + } + } + } + + // Register the bookmark bar and other folder nodes in the maps. + id_to_node_map[HistoryService::kBookmarkBarID] = &bookmark_bar_node; + group_id_to_id_map[HistoryService::kBookmarkBarID] = + HistoryService::kBookmarkBarID; + if (other_folder_group_id) { + id_to_node_map[other_folder_id] = &other_node; + group_id_to_id_map[other_folder_group_id] = other_folder_id; + } + + // Iterate through the entries again creating the nodes. + for (std::vector<history::StarredEntry>::iterator i = entries.begin(); + i != entries.end(); ++i) { + if (!i->parent_group_id) { + DCHECK(i->type == history::StarredEntry::BOOKMARK_BAR || + i->type == history::StarredEntry::OTHER); + // Only entries with no parent should be the bookmark bar and other + // bookmarks folders. + continue; + } + + BookmarkNode* node = id_to_node_map[i->id]; + if (!node) { + // Creating a node results in creating the parent. As such, it is + // possible for the node representing a group to have been created before + // encountering the details. + + // The created nodes are owned by the root node. + node = new BookmarkNode(0, i->url); + id_to_node_map[i->id] = node; + } + node->Reset(*i); + + DCHECK(group_id_to_id_map.find(i->parent_group_id) != + group_id_to_id_map.end()); + history::StarID parent_id = group_id_to_id_map[i->parent_group_id]; + BookmarkNode* parent = id_to_node_map[parent_id]; + if (!parent) { + // Haven't encountered the parent yet, create it now. + parent = new BookmarkNode(0, GURL()); + id_to_node_map[parent_id] = parent; + } + + // Add the node to its parent. |entries| is ordered by parent then + // visual order so that we know we maintain visual order by always adding + // to the end. + parent->Add(parent->GetChildCount(), node); + } + + // Save to file. + BookmarkCodec encoder; + scoped_ptr<Value> encoded_bookmarks( + encoder.Encode(&bookmark_bar_node, &other_node)); + std::string content; + base::JSONWriter::Write(encoded_bookmarks.get(), true, &content); + + return (file_util::WriteFile(path, content.c_str(), + static_cast<int>(content.length())) != -1); +} + +} // namespace history diff --git a/chrome/browser/history/starred_url_database.h b/chrome/browser/history/starred_url_database.h new file mode 100644 index 0000000..8d327d8 --- /dev/null +++ b/chrome/browser/history/starred_url_database.h @@ -0,0 +1,185 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_STARRED_URL_DATABASE_H_ +#define CHROME_BROWSER_HISTORY_STARRED_URL_DATABASE_H_ + +#include <set> + +#include "app/tree_node_model.h" +#include "base/basictypes.h" +#include "base/gtest_prod_util.h" +#include "base/string16.h" +#include "chrome/browser/history/history_types.h" +#include "chrome/browser/history/url_database.h" + +class FilePath; + +namespace sql { +class Connection; +} + +namespace history { + +// Bookmarks were originally part of the url database, they have since been +// moved to a separate file. This file exists purely for historical reasons and +// contains just enough to allow migration. +class StarredURLDatabase : public URLDatabase { + public: + // Must call InitStarTable() AND any additional init functions provided by + // URLDatabase before using this class' functions. + StarredURLDatabase(); + virtual ~StarredURLDatabase(); + + protected: + // The unit tests poke our innards. + friend class HistoryTest; + friend class StarredURLDatabaseTest; + FRIEND_TEST_ALL_PREFIXES(HistoryTest, CreateStarGroup); + + // Writes bookmarks to the specified file. + bool MigrateBookmarksToFile(const FilePath& path); + + // Returns the database for the functions in this interface. + virtual sql::Connection& GetDB() = 0; + + private: + // Makes sure the starred table is in a sane state. This does the following: + // . Makes sure there is a bookmark bar and other nodes. If no bookmark bar + // node is found, the table is dropped and recreated. + // . Removes any bookmarks with no URL. This can happen if a URL is removed + // from the urls table without updating the starred table correctly. + // . Makes sure the visual order of all nodes is correct. + // . Moves all bookmarks and folders that are not descendants of the bookmark + // bar or other folders to the bookmark bar. + // . Makes sure there isn't a cycle in the folders. A cycle means some folder + // has as its parent one of its children. + // + // This returns false if the starred table is in a bad state and couldn't + // be fixed, true otherwise. + // + // This should be invoked after migration. + bool EnsureStarredIntegrity(); + + // Gets all the starred entries. + bool GetAllStarredEntries(std::vector<StarredEntry>* entries); + + // Sets the title, parent_id, parent_group_id, visual_order and date_modifed + // of the specified star entry. + // + // WARNING: Does not update the visual order. + bool UpdateStarredEntryRow(StarID star_id, + const string16& title, + UIStarID parent_group_id, + int visual_order, + base::Time date_modified); + + // Adjusts the visual order of all children of parent_group_id with a + // visual_order >= start_visual_order by delta. For example, + // AdjustStarredVisualOrder(10, 0, 1) increments the visual order all children + // of group 10 with a visual order >= 0 by 1. + bool AdjustStarredVisualOrder(UIStarID parent_group_id, + int start_visual_order, + int delta); + + // Creates a starred entry with the specified parameters in the database. + // Returns the newly created id, or 0 on failure. + // + // WARNING: Does not update the visual order. + StarID CreateStarredEntryRow(URLID url_id, + UIStarID group_id, + UIStarID parent_group_id, + const string16& title, + const base::Time& date_added, + int visual_order, + StarredEntry::Type type); + + // Deletes the entry from the starred database base on the starred id (NOT + // the url id). + // + // WARNING: Does not update the visual order. + bool DeleteStarredEntryRow(StarID star_id); + + // Gets the details for the specified star entry in entry. + bool GetStarredEntry(StarID star_id, StarredEntry* entry); + + // Creates a starred entry with the requested information. The structure will + // be updated with the ID of the newly created entry. The URL table will be + // updated to point to the entry. The URL row will be created if it doesn't + // exist. + // + // We currently only support one entry per URL. This URL should not already be + // starred when calling this function or it will fail and will return 0. + StarID CreateStarredEntry(StarredEntry* entry); + + // Used when checking integrity of starred table. + typedef TreeNodeWithValue<history::StarredEntry> StarredNode; + + // Returns the max group id, or 0 if there is an error. + UIStarID GetMaxGroupID(); + + // Gets all the bookmarks and folders creating a StarredNode for each + // bookmark and folder. On success all the root nodes (bookmark bar node, + // other folder node, folders with no parent or folders with a parent that + // would make a cycle) are added to roots. + // + // If a group_id occurs more than once, all but the first ones id is added to + // groups_with_duplicate_ids. + // + // All bookmarks not on the bookmark bar/other folder are added to + // unparented_urls. + // + // It's up to the caller to delete the nodes returned in roots and + // unparented_urls. + // + // This is used during integrity enforcing/checking of the starred table. + bool BuildStarNodes( + std::set<StarredNode*>* roots, + std::set<StarID>* groups_with_duplicate_ids, + std::set<StarredNode*>* unparented_urls, + std::set<StarID>* empty_url_ids); + + // Sets the visual order of all of node's children match the order in |node|. + // If the order differs, the database is updated. Returns false if the order + // differed and the db couldn't be updated. + bool EnsureVisualOrder(StarredNode* node); + + // Returns the first node in nodes with the specified type, or null if there + // is not a node with the specified type. + StarredNode* GetNodeByType( + const std::set<StarredNode*>& nodes, + StarredEntry::Type type); + + // Implementation for setting starred integrity. See description of + // EnsureStarredIntegrity for the details of what this does. + // + // All entries in roots that are not the bookmark bar and other node are + // moved to be children of the bookmark bar node. Similarly all nodes + // in unparented_urls are moved to be children of the bookmark bar. + // + // Returns true on success, false if the starred table is in a bad state and + // couldn't be repaired. + bool EnsureStarredIntegrityImpl( + std::set<StarredNode*>* roots, + const std::set<StarID>& groups_with_duplicate_ids, + std::set<StarredNode*>* unparented_urls, + const std::set<StarID>& empty_url_ids); + + // Resets the visual order and parent_group_id of source's StarredEntry + // and adds it to the end of new_parent's children. + // + // This is used if the starred table is an unexpected state and an entry + // needs to be moved. + bool Move(StarredNode* source, StarredNode* new_parent); + + // Does the work of migrating bookmarks to a temporary file that + // BookmarkStorage will read from. + bool MigrateBookmarksToFileImpl(const FilePath& path); + + DISALLOW_COPY_AND_ASSIGN(StarredURLDatabase); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_STARRED_URL_DATABASE_H_ diff --git a/chrome/browser/history/starred_url_database_unittest.cc b/chrome/browser/history/starred_url_database_unittest.cc new file mode 100644 index 0000000..f82e645 --- /dev/null +++ b/chrome/browser/history/starred_url_database_unittest.cc @@ -0,0 +1,284 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <vector> + +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/string_util.h" +#include "chrome/browser/history/history.h" +#include "chrome/browser/history/starred_url_database.h" +#include "chrome/common/chrome_paths.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace history { + +class StarredURLDatabaseTest : public testing::Test, + public StarredURLDatabase { + public: + StarredURLDatabaseTest() { + } + + void AddPage(const GURL& url) { + URLRow row(url); + row.set_visit_count(1); + EXPECT_TRUE(AddURL(row)); + } + + void CompareEntryByID(const StarredEntry& entry) { + DCHECK(entry.id != 0); + StarredEntry db_value; + EXPECT_TRUE(GetStarredEntry(entry.id, &db_value)); + EXPECT_EQ(entry.id, db_value.id); + EXPECT_TRUE(entry.title == db_value.title); + EXPECT_EQ(entry.date_added.ToTimeT(), db_value.date_added.ToTimeT()); + EXPECT_EQ(entry.group_id, db_value.group_id); + EXPECT_EQ(entry.parent_group_id, db_value.parent_group_id); + EXPECT_EQ(entry.visual_order, db_value.visual_order); + EXPECT_EQ(entry.type, db_value.type); + EXPECT_EQ(entry.url_id, db_value.url_id); + if (entry.type == StarredEntry::URL) + EXPECT_TRUE(entry.url == db_value.url); + } + + int GetStarredEntryCount() { + DCHECK(db_.is_open()); + std::vector<StarredEntry> entries; + GetAllStarredEntries(&entries); + return static_cast<int>(entries.size()); + } + + StarID CreateStarredEntry(StarredEntry* entry) { + return StarredURLDatabase::CreateStarredEntry(entry); + } + + bool GetStarredEntry(StarID star_id, StarredEntry* entry) { + return StarredURLDatabase::GetStarredEntry(star_id, entry); + } + + bool EnsureStarredIntegrity() { + return StarredURLDatabase::EnsureStarredIntegrity(); + } + + private: + // Test setup. + void SetUp() { + PathService::Get(base::DIR_TEMP, &db_file_); + db_file_ = db_file_.AppendASCII("VisitTest.db"); + file_util::Delete(db_file_, false); + + // Copy db file over that contains starred table. + FilePath old_history_path; + PathService::Get(chrome::DIR_TEST_DATA, &old_history_path); + old_history_path = old_history_path.AppendASCII("bookmarks"); + old_history_path = old_history_path.Append( + FILE_PATH_LITERAL("History_with_empty_starred")); + file_util::CopyFile(old_history_path, db_file_); + + EXPECT_TRUE(db_.Open(db_file_)); + + // Initialize the tables for this test. + CreateURLTable(false); + CreateMainURLIndex(); + EnsureStarredIntegrity(); + } + void TearDown() { + db_.Close(); + file_util::Delete(db_file_, false); + } + + // Provided for URL/StarredURLDatabase. + virtual sql::Connection& GetDB() { + return db_; + } + + FilePath db_file_; + sql::Connection db_; +}; + +//----------------------------------------------------------------------------- + +TEST_F(StarredURLDatabaseTest, FixOrphanedGroup) { + const int initial_count = GetStarredEntryCount(); + + // Create a group that isn't parented to the other/bookmark folders. + StarredEntry g_entry; + g_entry.type = StarredEntry::USER_GROUP; + g_entry.parent_group_id = 100; + g_entry.visual_order = 10; + g_entry.group_id = 100; + CreateStarredEntry(&g_entry); + + ASSERT_TRUE(EnsureStarredIntegrity()); + + // Make sure no new entries were added. + ASSERT_EQ(initial_count + 1, GetStarredEntryCount()); + + // Make sure the group was moved to the bookmark bar folder. + ASSERT_TRUE(GetStarredEntry(g_entry.id, &g_entry)); + ASSERT_EQ(HistoryService::kBookmarkBarID, g_entry.parent_group_id); + ASSERT_EQ(0, g_entry.visual_order); +} + +TEST_F(StarredURLDatabaseTest, FixOrphanedBookmarks) { + const int initial_count = GetStarredEntryCount(); + + // Create two bookmarks that aren't in a random folder no on the bookmark bar. + StarredEntry entry1; + entry1.parent_group_id = 100; + entry1.visual_order = 10; + entry1.url = GURL("http://google.com/1"); + CreateStarredEntry(&entry1); + + StarredEntry entry2; + entry2.parent_group_id = 101; + entry2.visual_order = 20; + entry2.url = GURL("http://google.com/2"); + CreateStarredEntry(&entry2); + + ASSERT_TRUE(EnsureStarredIntegrity()); + + // Make sure no new entries were added. + ASSERT_EQ(initial_count + 2, GetStarredEntryCount()); + + // Make sure the entries were moved to the bookmark bar and the visual order + // order was updated appropriately. + ASSERT_TRUE(GetStarredEntry(entry1.id, &entry1)); + ASSERT_EQ(HistoryService::kBookmarkBarID, entry1.parent_group_id); + + ASSERT_TRUE(GetStarredEntry(entry2.id, &entry2)); + ASSERT_EQ(HistoryService::kBookmarkBarID, entry2.parent_group_id); + ASSERT_TRUE((entry1.visual_order == 0 && entry2.visual_order == 1) || + (entry1.visual_order == 1 && entry2.visual_order == 0)); +} + +TEST_F(StarredURLDatabaseTest, FixGroupCycleDepth0) { + const int initial_count = GetStarredEntryCount(); + + // Create a group that is parented to itself. + StarredEntry entry1; + entry1.group_id = entry1.parent_group_id = 100; + entry1.visual_order = 10; + entry1.type = StarredEntry::USER_GROUP; + CreateStarredEntry(&entry1); + + ASSERT_TRUE(EnsureStarredIntegrity()); + + // Make sure no new entries were added. + ASSERT_EQ(initial_count + 1, GetStarredEntryCount()); + + // Make sure the group were moved to the bookmark bar and the visual order + // order was updated appropriately. + ASSERT_TRUE(GetStarredEntry(entry1.id, &entry1)); + ASSERT_EQ(HistoryService::kBookmarkBarID, entry1.parent_group_id); + ASSERT_EQ(0, entry1.visual_order); +} + +TEST_F(StarredURLDatabaseTest, FixGroupCycleDepth1) { + const int initial_count = GetStarredEntryCount(); + + StarredEntry entry1; + entry1.group_id = 100; + entry1.parent_group_id = 101; + entry1.visual_order = 10; + entry1.type = StarredEntry::USER_GROUP; + CreateStarredEntry(&entry1); + + StarredEntry entry2; + entry2.group_id = 101; + entry2.parent_group_id = 100; + entry2.visual_order = 11; + entry2.type = StarredEntry::USER_GROUP; + CreateStarredEntry(&entry2); + + ASSERT_TRUE(EnsureStarredIntegrity()); + + // Make sure no new entries were added. + ASSERT_EQ(initial_count + 2, GetStarredEntryCount()); + + // Because the groups caused a cycle, entry1 is moved the bookmark bar, which + // breaks the cycle. + ASSERT_TRUE(GetStarredEntry(entry1.id, &entry1)); + ASSERT_TRUE(GetStarredEntry(entry2.id, &entry2)); + ASSERT_EQ(HistoryService::kBookmarkBarID, entry1.parent_group_id); + ASSERT_EQ(100, entry2.parent_group_id); + ASSERT_EQ(0, entry1.visual_order); + ASSERT_EQ(0, entry2.visual_order); +} + +TEST_F(StarredURLDatabaseTest, FixVisualOrder) { + const int initial_count = GetStarredEntryCount(); + + // Star two urls. + StarredEntry entry1; + entry1.url = GURL("http://google.com/1"); + entry1.parent_group_id = HistoryService::kBookmarkBarID; + entry1.visual_order = 5; + CreateStarredEntry(&entry1); + + // Add url2 and star it. + StarredEntry entry2; + entry2.url = GURL("http://google.com/2"); + entry2.parent_group_id = HistoryService::kBookmarkBarID; + entry2.visual_order = 10; + CreateStarredEntry(&entry2); + + ASSERT_TRUE(EnsureStarredIntegrity()); + + // Make sure no new entries were added. + ASSERT_EQ(initial_count + 2, GetStarredEntryCount()); + + StarredEntry entry; + ASSERT_TRUE(GetStarredEntry(entry1.id, &entry)); + entry1.visual_order = 0; + CompareEntryByID(entry1); + + ASSERT_TRUE(GetStarredEntry(entry2.id, &entry)); + entry2.visual_order = 1; + CompareEntryByID(entry2); +} + +TEST_F(StarredURLDatabaseTest, FixDuplicateGroupIDs) { + const int initial_count = GetStarredEntryCount(); + + // Create two groups with the same group id. + StarredEntry entry1; + entry1.type = StarredEntry::USER_GROUP; + entry1.group_id = 10; + entry1.parent_group_id = HistoryService::kBookmarkBarID; + CreateStarredEntry(&entry1); + StarredEntry entry2 = entry1; + CreateStarredEntry(&entry2); + + ASSERT_TRUE(EnsureStarredIntegrity()); + + // Make sure only one group exists. + ASSERT_EQ(initial_count + 1, GetStarredEntryCount()); + + StarredEntry entry; + ASSERT_TRUE(GetStarredEntry(entry1.id, &entry) || + GetStarredEntry(entry2.id, &entry)); +} + +TEST_F(StarredURLDatabaseTest, RemoveStarredEntriesWithEmptyURL) { + const int initial_count = GetStarredEntryCount(); + + StarredEntry entry; + entry.url = GURL("http://google.com"); + entry.title = UTF8ToUTF16("FOO"); + entry.parent_group_id = HistoryService::kBookmarkBarID; + + ASSERT_NE(0, CreateStarredEntry(&entry)); + + // Remove the URL. + DeleteURLRow(entry.url_id); + + // Fix up the table. + ASSERT_TRUE(EnsureStarredIntegrity()); + + // The entry we just created should have been nuked. + ASSERT_EQ(initial_count, GetStarredEntryCount()); +} + +} // namespace history diff --git a/chrome/browser/history/text_database.cc b/chrome/browser/history/text_database.cc new file mode 100644 index 0000000..3327869 --- /dev/null +++ b/chrome/browser/history/text_database.cc @@ -0,0 +1,378 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <limits> +#include <set> +#include <string> + +#include "chrome/browser/history/text_database.h" + +#include "app/sql/connection.h" +#include "app/sql/statement.h" +#include "app/sql/transaction.h" +#include "base/file_util.h" +#include "base/histogram.h" +#include "base/logging.h" +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/diagnostics/sqlite_diagnostics.h" + +// There are two tables in each database, one full-text search (FTS) table which +// indexes the contents and title of the pages. The other is a regular SQLITE +// table which contains non-indexed information about the page. All columns of +// a FTS table are indexed using the text search algorithm, which isn't what we +// want for things like times. If this were in the FTS table, there would be +// different words in the index for each time number. +// +// "pages" FTS table: +// url URL of the page so searches will match the URL. +// title Title of the page. +// body Body of the page. +// +// "info" regular table: +// time Time the corresponding FTS entry was visited. +// +// We do joins across these two tables by using their internal rowids, which we +// keep in sync between the two tables. The internal rowid is the only part of +// an FTS table that is indexed like a normal table, and the index over it is +// free since sqlite always indexes the internal rowid. + +namespace history { + +namespace { + +// Version 1 uses FTS2 for index files. +// Version 2 uses FTS3. +static const int kCurrentVersionNumber = 2; +static const int kCompatibleVersionNumber = 2; + +// Snippet computation relies on the index of the columns in the original +// create statement. These are the 0-based indices (as strings) of the +// corresponding columns. +const char kTitleColumnIndex[] = "1"; +const char kBodyColumnIndex[] = "2"; + +// The string prepended to the database identifier to generate the filename. +const FilePath::CharType kFilePrefix[] = FILE_PATH_LITERAL("History Index "); + +} // namespace + +TextDatabase::TextDatabase(const FilePath& path, + DBIdent id, + bool allow_create) + : path_(path), + ident_(id), + allow_create_(allow_create) { + // Compute the file name. + file_name_ = path_.Append(IDToFileName(ident_)); +} + +TextDatabase::~TextDatabase() { +} + +// static +const FilePath::CharType* TextDatabase::file_base() { + return kFilePrefix; +} + +// static +FilePath TextDatabase::IDToFileName(DBIdent id) { + // Identifiers are intended to be a combination of the year and month, for + // example, 200801 for January 2008. We convert this to + // "History Index 2008-01". However, we don't make assumptions about this + // scheme: the caller should assign IDs as it feels fit with the knowledge + // that they will apppear on disk in this form. + FilePath::StringType filename(file_base()); + StringAppendF(&filename, FILE_PATH_LITERAL("%d-%02d"), + id / 100, id % 100); + return FilePath(filename); +} + +// static +TextDatabase::DBIdent TextDatabase::FileNameToID(const FilePath& file_path) { + FilePath::StringType file_name = file_path.BaseName().value(); + + // We don't actually check the prefix here. Since the file system could + // be case insensitive in ways we can't predict (NTFS), checking could + // potentially be the wrong thing to do. Instead, we just look for a suffix. + static const size_t kIDStringLength = 7; // Room for "xxxx-xx". + if (file_name.length() < kIDStringLength) + return 0; + const FilePath::StringType suffix( + &file_name[file_name.length() - kIDStringLength]); + + if (suffix.length() != kIDStringLength || + suffix[4] != FILE_PATH_LITERAL('-')) { + return 0; + } + + int year = StringToInt(suffix.substr(0, 4)); + int month = StringToInt(suffix.substr(5, 2)); + + return year * 100 + month; +} + +bool TextDatabase::Init() { + // Make sure, if we're not allowed to create the file, that it exists. + if (!allow_create_) { + if (!file_util::PathExists(file_name_)) + return false; + } + + // Set the exceptional sqlite error handler. + db_.set_error_delegate(GetErrorHandlerForTextDb()); + + // Set the database page size to something a little larger to give us + // better performance (we're typically seek rather than bandwidth limited). + // This only has an effect before any tables have been created, otherwise + // this is a NOP. Must be a power of 2 and a max of 8192. + db_.set_page_size(2096); + + // The default cache size is 2000 which give >8MB of data. Since we will often + // have 2-3 of these objects, each with their own 8MB, this adds up very fast. + // We therefore reduce the size so when there are multiple objects, we're not + // too big. + db_.set_cache_size(512); + + // Run the database in exclusive mode. Nobody else should be accessing the + // database while we're running, and this will give somewhat improved perf. + db_.set_exclusive_locking(); + + // Attach the database to our index file. + if (!db_.Open(file_name_)) + return false; + + // Meta table tracking version information. + if (!meta_table_.Init(&db_, kCurrentVersionNumber, kCompatibleVersionNumber)) + return false; + if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) { + // This version is too new. We don't bother notifying the user on this + // error, and just fail to use the file. Normally if they have version skew, + // they will get it for the main history file and it won't be necessary + // here. If that's not the case, since this is only indexed data, it's + // probably better to just not give FTS results than strange errors when + // everything else is working OK. + LOG(WARNING) << "Text database is too new."; + return false; + } + + return CreateTables(); +} + +void TextDatabase::BeginTransaction() { + db_.BeginTransaction(); +} + +void TextDatabase::CommitTransaction() { + db_.CommitTransaction(); +} + +bool TextDatabase::CreateTables() { + // FTS table of page contents. + if (!db_.DoesTableExist("pages")) { + if (!db_.Execute("CREATE VIRTUAL TABLE pages USING fts3(" + "TOKENIZE icu," + "url LONGVARCHAR," + "title LONGVARCHAR," + "body LONGVARCHAR)")) + return false; + } + + // Non-FTS table containing URLs and times so we can efficiently find them + // using a regular index (all FTS columns are special and are treated as + // full-text-search, which is not what we want when retrieving this data). + if (!db_.DoesTableExist("info")) { + // Note that there is no point in creating an index over time. Since + // we must always query the entire FTS table (it can not efficiently do + // subsets), we will always end up doing that first, and joining the info + // table off of that. + if (!db_.Execute("CREATE TABLE info(time INTEGER NOT NULL)")) + return false; + } + + // Create the index. This will fail when the index already exists, so we just + // ignore the error. + db_.Execute("CREATE INDEX info_time ON info(time)"); + return true; +} + +bool TextDatabase::AddPageData(base::Time time, + const std::string& url, + const std::string& title, + const std::string& contents) { + sql::Transaction committer(&db_); + if (!committer.Begin()) + return false; + + // Add to the pages table. + sql::Statement add_to_pages(db_.GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO pages (url, title, body) VALUES (?,?,?)")); + if (!add_to_pages) { + NOTREACHED() << db_.GetErrorMessage(); + return false; + } + add_to_pages.BindString(0, url); + add_to_pages.BindString(1, title); + add_to_pages.BindString(2, contents); + if (!add_to_pages.Run()) { + NOTREACHED() << db_.GetErrorMessage(); + return false; + } + + int64 rowid = db_.GetLastInsertRowId(); + + // Add to the info table with the same rowid. + sql::Statement add_to_info(db_.GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO info (rowid, time) VALUES (?,?)")); + if (!add_to_info) { + NOTREACHED() << db_.GetErrorMessage(); + return false; + } + add_to_info.BindInt64(0, rowid); + add_to_info.BindInt64(1, time.ToInternalValue()); + if (!add_to_info.Run()) { + NOTREACHED() << db_.GetErrorMessage(); + return false; + } + + return committer.Commit(); +} + +void TextDatabase::DeletePageData(base::Time time, const std::string& url) { + // First get all rows that match. Selecing on time (which has an index) allows + // us to avoid brute-force searches on the full-text-index table (there will + // generally be only one match per time). + sql::Statement select_ids(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT info.rowid " + "FROM info JOIN pages ON info.rowid = pages.rowid " + "WHERE info.time=? AND pages.url=?")); + if (!select_ids) + return; + select_ids.BindInt64(0, time.ToInternalValue()); + select_ids.BindString(1, url); + std::set<int64> rows_to_delete; + while (select_ids.Step()) + rows_to_delete.insert(select_ids.ColumnInt64(0)); + + // Delete from the pages table. + sql::Statement delete_page(db_.GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM pages WHERE rowid=?")); + if (!delete_page) + return; + for (std::set<int64>::const_iterator i = rows_to_delete.begin(); + i != rows_to_delete.end(); ++i) { + delete_page.BindInt64(0, *i); + if (!delete_page.Run()) { + NOTREACHED(); + return; + } + delete_page.Reset(); + } + + // Delete from the info table. + sql::Statement delete_info(db_.GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM info WHERE rowid=?")); + if (!delete_info) + return; + for (std::set<int64>::const_iterator i = rows_to_delete.begin(); + i != rows_to_delete.end(); ++i) { + delete_info.BindInt64(0, *i); + if (!delete_info.Run()) { + NOTREACHED(); + return; + } + delete_info.Reset(); + } +} + +void TextDatabase::Optimize() { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT OPTIMIZE(pages) FROM pages LIMIT 1")); + if (!statement) + return; + statement.Run(); +} + +void TextDatabase::GetTextMatches(const std::string& query, + const QueryOptions& options, + std::vector<Match>* results, + URLSet* found_urls, + base::Time* first_time_searched) { + *first_time_searched = options.begin_time; + + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT url, title, time, offsets(pages), body " + "FROM pages LEFT OUTER JOIN info ON pages.rowid = info.rowid " + "WHERE pages MATCH ? AND time >= ? AND time < ? " + "ORDER BY time DESC " + "LIMIT ?")); + if (!statement) + return; + + // When their values indicate "unspecified", saturate the numbers to the max + // or min to get the correct result. + int64 effective_begin_time = options.begin_time.is_null() ? + 0 : options.begin_time.ToInternalValue(); + int64 effective_end_time = options.end_time.is_null() ? + std::numeric_limits<int64>::max() : options.end_time.ToInternalValue(); + int effective_max_count = options.max_count ? + options.max_count : std::numeric_limits<int>::max(); + + statement.BindString(0, query); + statement.BindInt64(1, effective_begin_time); + statement.BindInt64(2, effective_end_time); + statement.BindInt(3, effective_max_count); + + while (statement.Step()) { + // TODO(brettw) allow canceling the query in the middle. + // if (canceled_or_something) + // break; + + GURL url(statement.ColumnString(0)); + URLSet::const_iterator found_url = found_urls->find(url); + if (found_url != found_urls->end()) + continue; // Don't add this duplicate. + + // Fill the results into the vector (avoid copying the URL with Swap()). + results->resize(results->size() + 1); + Match& match = results->at(results->size() - 1); + match.url.Swap(&url); + + match.title = statement.ColumnString16(1); + match.time = base::Time::FromInternalValue(statement.ColumnInt64(2)); + + // Extract any matches in the title. + std::string offsets_str = statement.ColumnString(3); + Snippet::ExtractMatchPositions(offsets_str, kTitleColumnIndex, + &match.title_match_positions); + Snippet::ConvertMatchPositionsToWide(statement.ColumnString(1), + &match.title_match_positions); + + // Extract the matches in the body. + Snippet::MatchPositions match_positions; + Snippet::ExtractMatchPositions(offsets_str, kBodyColumnIndex, + &match_positions); + + // Compute the snippet based on those matches. + std::string body = statement.ColumnString(4); + match.snippet.ComputeSnippet(match_positions, body); + } + + // When we have returned all the results possible (or determined that there + // are none), then we have searched all the time requested, so we can + // set the first_time_searched to that value. + if (results->size() == 0 || + options.max_count == 0 || // Special case for wanting all the results. + static_cast<int>(results->size()) < options.max_count) { + *first_time_searched = options.begin_time; + } else { + // Since we got the results in order, we know the last item is the last + // time we considered. + *first_time_searched = results->back().time; + } + + statement.Reset(); +} + +} // namespace history diff --git a/chrome/browser/history/text_database.h b/chrome/browser/history/text_database.h new file mode 100644 index 0000000..e34c071 --- /dev/null +++ b/chrome/browser/history/text_database.h @@ -0,0 +1,162 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_ +#define CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_ + +#include <set> +#include <vector> + +#include "app/sql/connection.h" +#include "app/sql/meta_table.h" +#include "base/basictypes.h" +#include "base/file_path.h" +#include "base/string16.h" +#include "chrome/browser/history/history_types.h" +#include "googleurl/src/gurl.h" + +namespace history { + +// Encapsulation of a full-text indexed database file. +class TextDatabase { + public: + typedef int DBIdent; + + typedef std::set<GURL> URLSet; + + // Returned from the search function. + struct Match { + // URL of the match. + GURL url; + + // The title is returned because the title in the text database and the URL + // database may differ. This happens because we capture the title when the + // body is captured, and don't update it later. + string16 title; + + // Time the page that was returned was visited. + base::Time time; + + // Identifies any found matches in the title of the document. These are not + // included in the snippet. + Snippet::MatchPositions title_match_positions; + + // Snippet of the match we generated from the body. + Snippet snippet; + }; + + // Note: You must call init which must succeed before using this class. + // + // Computes the matches for the query, returning results in decreasing order + // of visit time. + // + // This function will attach the new database to the given database + // connection. This allows one sqlite3 object to share many TextDatabases, + // meaning that they will all share the same cache, which allows us to limit + // the total size that text indexing databasii can take up. + // + // |file_name| is the name of the file on disk. + // + // ID is the identifier for the database. It should uniquely identify it among + // other databases on disk and in the sqlite connection. + // + // |allow_create| indicates if we want to allow creation of the file if it + // doesn't exist. For files associated with older time periods, we don't want + // to create them if they don't exist, so this flag would be false. + TextDatabase(const FilePath& path, + DBIdent id, + bool allow_create); + ~TextDatabase(); + + // Initializes the database connection and creates the file if the class + // was created with |allow_create|. If the file couldn't be opened or + // created, this will return false. No other functions should be called + // after this. + bool Init(); + + // Allows updates to be batched. This gives higher performance when multiple + // updates are happening because every insert doesn't require a sync to disk. + // Transactions can be nested, only the outermost one will actually count. + void BeginTransaction(); + void CommitTransaction(); + + // For testing, returns the file name of the database so it can be deleted + // after the test. This is valid even before Init() is called. + const FilePath& file_name() const { return file_name_; } + + // Returns a NULL-terminated string that is the base of history index files, + // which is the part before the database identifier. For example + // "History Index *". This is for finding existing database files. + static const FilePath::CharType* file_base(); + + // Converts a filename on disk (optionally including a path) to a database + // identifier. If the filename doesn't have the correct format, returns 0. + static DBIdent FileNameToID(const FilePath& file_path); + + // Changing operations ------------------------------------------------------- + + // Adds the given data to the page. Returns true on success. The data should + // already be converted to UTF-8. + bool AddPageData(base::Time time, + const std::string& url, + const std::string& title, + const std::string& contents); + + // Deletes the indexed data exactly matching the given URL/time pair. + void DeletePageData(base::Time time, const std::string& url); + + // Optimizes the tree inside the database. This will, in addition to making + // access faster, remove any deleted data from the database (normally it is + // added again as "removed" and it is manually cleaned up when it decides to + // optimize it naturally). It is bad for privacy if a user is deleting a + // page from history but it still exists in the full text database in some + // form. This function will clean that up. + void Optimize(); + + // Querying ------------------------------------------------------------------ + + // Executes the given query. See QueryOptions for more info on input. + // + // The results are appended to any existing ones in |*results|, and the first + // time considered for the output is in |first_time_searched| + // (see QueryResults for more). + // + // Any URLs found will be added to |unique_urls|. If a URL is already in the + // set, additional results will not be added (giving the ability to uniquify + // URL results). + // + // Callers must run QueryParser on the user text and pass the results of the + // QueryParser to this method as the query string. + void GetTextMatches(const std::string& query, + const QueryOptions& options, + std::vector<Match>* results, + URLSet* unique_urls, + base::Time* first_time_searched); + + // Converts the given database identifier to a filename. This does not include + // the path, just the file and extension. + static FilePath IDToFileName(DBIdent id); + + private: + // Ensures that the tables and indices are created. Returns true on success. + bool CreateTables(); + + // The sql database. Not valid until Init is called. + sql::Connection db_; + + const FilePath path_; + const DBIdent ident_; + const bool allow_create_; + + // Full file name of the file on disk, computed in Init(). + FilePath file_name_; + + sql::MetaTable meta_table_; + + DISALLOW_COPY_AND_ASSIGN(TextDatabase); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_ diff --git a/chrome/browser/history/text_database_manager.cc b/chrome/browser/history/text_database_manager.cc new file mode 100644 index 0000000..ff1ae38 --- /dev/null +++ b/chrome/browser/history/text_database_manager.cc @@ -0,0 +1,550 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/text_database_manager.h" + +#include "base/compiler_specific.h" +#include "base/file_util.h" +#include "base/histogram.h" +#include "base/logging.h" +#include "base/message_loop.h" +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/history/history_publisher.h" +#include "chrome/browser/history/visit_database.h" +#include "chrome/common/mru_cache.h" + +using base::Time; +using base::TimeDelta; +using base::TimeTicks; + +namespace history { + +namespace { + +// The number of database files we will be attached to at once. +const int kCacheDBSize = 5; + +std::string ConvertStringForIndexer(const string16& input) { + // TODO(evanm): other transformations here? + return UTF16ToUTF8(CollapseWhitespace(input, false)); +} + +// Data older than this will be committed to the full text index even if we +// haven't gotten a title and/or body. +const int kExpirationSec = 20; + +} // namespace + +// TextDatabaseManager::PageInfo ----------------------------------------------- + +TextDatabaseManager::PageInfo::PageInfo(URLID url_id, + VisitID visit_id, + Time visit_time) + : url_id_(url_id), + visit_id_(visit_id), + visit_time_(visit_time) { + added_time_ = TimeTicks::Now(); +} + +void TextDatabaseManager::PageInfo::set_title(const string16& ttl) { + if (ttl.empty()) // Make the title nonempty when we set it for EverybodySet. + title_ = ASCIIToUTF16(" "); + else + title_ = ttl; +} + +void TextDatabaseManager::PageInfo::set_body(const string16& bdy) { + if (bdy.empty()) // Make the body nonempty when we set it for EverybodySet. + body_ = ASCIIToUTF16(" "); + else + body_ = bdy; +} + +bool TextDatabaseManager::PageInfo::Expired(TimeTicks now) const { + return now - added_time_ > TimeDelta::FromSeconds(kExpirationSec); +} + +// TextDatabaseManager --------------------------------------------------------- + +TextDatabaseManager::TextDatabaseManager(const FilePath& dir, + URLDatabase* url_database, + VisitDatabase* visit_database) + : dir_(dir), + url_database_(url_database), + visit_database_(visit_database), + recent_changes_(RecentChangeList::NO_AUTO_EVICT), + transaction_nesting_(0), + db_cache_(DBCache::NO_AUTO_EVICT), + present_databases_loaded_(false), + ALLOW_THIS_IN_INITIALIZER_LIST(factory_(this)), + history_publisher_(NULL) { +} + +TextDatabaseManager::~TextDatabaseManager() { + if (transaction_nesting_) + CommitTransaction(); +} + +// static +TextDatabase::DBIdent TextDatabaseManager::TimeToID(Time time) { + Time::Exploded exploded; + time.UTCExplode(&exploded); + + // We combine the month and year into a 6-digit number (200801 for + // January, 2008). The month is 1-based. + return exploded.year * 100 + exploded.month; +} + +// static +Time TextDatabaseManager::IDToTime(TextDatabase::DBIdent id) { + Time::Exploded exploded; + memset(&exploded, 0, sizeof(Time::Exploded)); + exploded.year = id / 100; + exploded.month = id % 100; + return Time::FromUTCExploded(exploded); +} + +bool TextDatabaseManager::Init(const HistoryPublisher* history_publisher) { + history_publisher_ = history_publisher; + + // Start checking recent changes and committing them. + ScheduleFlushOldChanges(); + return true; +} + +void TextDatabaseManager::BeginTransaction() { + transaction_nesting_++; +} + +void TextDatabaseManager::CommitTransaction() { + DCHECK(transaction_nesting_); + transaction_nesting_--; + if (transaction_nesting_) + return; // Still more nesting of transactions before committing. + + // Commit all databases with open transactions on them. + for (DBIdentSet::const_iterator i = open_transactions_.begin(); + i != open_transactions_.end(); ++i) { + DBCache::iterator iter = db_cache_.Get(*i); + if (iter == db_cache_.end()) { + NOTREACHED() << "All open transactions should be cached."; + continue; + } + iter->second->CommitTransaction(); + } + open_transactions_.clear(); + + // Now that the transaction is over, we can expire old connections. + db_cache_.ShrinkToSize(kCacheDBSize); +} + +void TextDatabaseManager::InitDBList() { + if (present_databases_loaded_) + return; + + present_databases_loaded_ = true; + + // Find files on disk matching our pattern so we can quickly test for them. + FilePath::StringType filepattern(TextDatabase::file_base()); + filepattern.append(FILE_PATH_LITERAL("*")); + file_util::FileEnumerator enumerator( + dir_, false, file_util::FileEnumerator::FILES, filepattern); + FilePath cur_file; + while (!(cur_file = enumerator.Next()).empty()) { + // Convert to the number representing this file. + TextDatabase::DBIdent id = TextDatabase::FileNameToID(cur_file); + if (id) // Will be 0 on error. + present_databases_.insert(id); + } +} + +void TextDatabaseManager::AddPageURL(const GURL& url, + URLID url_id, + VisitID visit_id, + Time time) { + // Delete any existing page info. + RecentChangeList::iterator found = recent_changes_.Peek(url); + if (found != recent_changes_.end()) + recent_changes_.Erase(found); + + // Just save this info for later. We will save it when it expires or when all + // the data is complete. + recent_changes_.Put(url, PageInfo(url_id, visit_id, time)); +} + +void TextDatabaseManager::AddPageTitle(const GURL& url, + const string16& title) { + RecentChangeList::iterator found = recent_changes_.Peek(url); + if (found == recent_changes_.end()) { + // This page is not in our cache of recent pages. This is very much an edge + // case as normally a title will come in <20 seconds after the page commits, + // and TabContents will avoid spamming us with >1 title per page. However, + // it could come up if your connection is unhappy, and we don't want to + // miss anything. + // + // To solve this problem, we'll just associate the most recent visit with + // the new title and index that using the regular code path. + URLRow url_row; + if (!url_database_->GetRowForURL(url, &url_row)) + return; // URL is unknown, give up. + VisitRow visit; + if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit)) + return; // No recent visit, give up. + + if (visit.is_indexed) { + // If this page was already indexed, we could have a body that came in + // first and we don't want to overwrite it. We could go query for the + // current body, or have a special setter for only the title, but this is + // not worth it for this edge case. + // + // It will be almost impossible for the title to take longer than + // kExpirationSec yet we got a body in less than that time, since the + // title should always come in first. + return; + } + + AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time, + title, string16()); + return; // We don't know about this page, give up. + } + + PageInfo& info = found->second; + if (info.has_body()) { + // This info is complete, write to the database. + AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(), + title, info.body()); + recent_changes_.Erase(found); + return; + } + + info.set_title(title); +} + +void TextDatabaseManager::AddPageContents(const GURL& url, + const string16& body) { + RecentChangeList::iterator found = recent_changes_.Peek(url); + if (found == recent_changes_.end()) { + // This page is not in our cache of recent pages. This means that the page + // took more than kExpirationSec to load. Often, this will be the result of + // a very slow iframe or other resource on the page that makes us think its + // still loading. + // + // As a fallback, set the most recent visit's contents using the input, and + // use the last set title in the URL table as the title to index. + URLRow url_row; + if (!url_database_->GetRowForURL(url, &url_row)) + return; // URL is unknown, give up. + VisitRow visit; + if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit)) + return; // No recent visit, give up. + + // Use the title from the URL row as the title for the indexing. + AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time, + url_row.title(), body); + return; + } + + PageInfo& info = found->second; + if (info.has_title()) { + // This info is complete, write to the database. + AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(), + info.title(), body); + recent_changes_.Erase(found); + return; + } + + info.set_body(body); +} + +bool TextDatabaseManager::AddPageData(const GURL& url, + URLID url_id, + VisitID visit_id, + Time visit_time, + const string16& title, + const string16& body) { + TextDatabase* db = GetDBForTime(visit_time, true); + if (!db) + return false; + + TimeTicks beginning_time = TimeTicks::Now(); + + // First delete any recently-indexed data for this page. This will delete + // anything in the main database, but we don't bother looking through the + // archived database. + VisitVector visits; + visit_database_->GetVisitsForURL(url_id, &visits); + size_t our_visit_row_index = visits.size(); + for (size_t i = 0; i < visits.size(); i++) { + // While we're going trough all the visits, also find our row so we can + // avoid another DB query. + if (visits[i].visit_id == visit_id) { + our_visit_row_index = i; + } else if (visits[i].is_indexed) { + visits[i].is_indexed = false; + visit_database_->UpdateVisitRow(visits[i]); + DeletePageData(visits[i].visit_time, url, NULL); + } + } + + if (visit_id) { + // We're supposed to update the visit database. + if (our_visit_row_index >= visits.size()) { + NOTREACHED() << "We should always have found a visit when given an ID."; + return false; + } + + DCHECK(visit_time == visits[our_visit_row_index].visit_time); + + // Update the visit database to reference our addition. + visits[our_visit_row_index].is_indexed = true; + if (!visit_database_->UpdateVisitRow(visits[our_visit_row_index])) + return false; + } + + // Now index the data. + std::string url_str = URLDatabase::GURLToDatabaseURL(url); + bool success = db->AddPageData(visit_time, url_str, + ConvertStringForIndexer(title), + ConvertStringForIndexer(body)); + + UMA_HISTOGRAM_TIMES("History.AddFTSData", + TimeTicks::Now() - beginning_time); + + if (history_publisher_) + history_publisher_->PublishPageContent(visit_time, url, title, body); + + return success; +} + +void TextDatabaseManager::DeletePageData(Time time, const GURL& url, + ChangeSet* change_set) { + TextDatabase::DBIdent db_ident = TimeToID(time); + + // We want to open the database for writing, but only if it exists. To + // achieve this, we check whether it exists by saying we're not going to + // write to it (avoiding the autocreation code normally called when writing) + // and then access it for writing only if it succeeds. + TextDatabase* db = GetDB(db_ident, false); + if (!db) + return; + db = GetDB(db_ident, true); + + if (change_set) + change_set->Add(db_ident); + + db->DeletePageData(time, URLDatabase::GURLToDatabaseURL(url)); +} + +void TextDatabaseManager::DeleteFromUncommitted( + const std::set<GURL>& restrict_urls, Time begin, Time end) { + // First find the beginning of the range to delete. Recall that the list + // has the most recent item at the beginning. There won't normally be very + // many items, so a brute-force search is fine. + RecentChangeList::iterator cur = recent_changes_.begin(); + if (!end.is_null()) { + // Walk from the beginning of the list backwards in time to find the newest + // entry that should be deleted. + while (cur != recent_changes_.end() && cur->second.visit_time() >= end) + ++cur; + } + + // Now delete all visits up to the oldest one we were supposed to delete. + // Note that if begin is_null, it will be less than or equal to any other + // time. + if (restrict_urls.empty()) { + while (cur != recent_changes_.end() && cur->second.visit_time() >= begin) + cur = recent_changes_.Erase(cur); + } else { + while (cur != recent_changes_.end() && cur->second.visit_time() >= begin) { + if (restrict_urls.find(cur->first) != restrict_urls.end()) + cur = recent_changes_.Erase(cur); + else + ++cur; + } + } +} + +void TextDatabaseManager::DeleteAll() { + DCHECK_EQ(0, transaction_nesting_) << "Calling deleteAll in a transaction."; + + InitDBList(); + + // Close all open databases. + db_cache_.Clear(); + + // Now go through and delete all the files. + for (DBIdentSet::iterator i = present_databases_.begin(); + i != present_databases_.end(); ++i) { + FilePath file_name = dir_.Append(TextDatabase::IDToFileName(*i)); + file_util::Delete(file_name, false); + } +} + +void TextDatabaseManager::OptimizeChangedDatabases( + const ChangeSet& change_set) { + for (ChangeSet::DBSet::const_iterator i = + change_set.changed_databases_.begin(); + i != change_set.changed_databases_.end(); ++i) { + // We want to open the database for writing, but only if it exists. To + // achieve this, we check whether it exists by saying we're not going to + // write to it (avoiding the autocreation code normally called when writing) + // and then access it for writing only if it succeeds. + TextDatabase* db = GetDB(*i, false); + if (!db) + continue; + db = GetDB(*i, true); + if (!db) + continue; // The file may have changed or something. + db->Optimize(); + } +} + +void TextDatabaseManager::GetTextMatches( + const string16& query, + const QueryOptions& options, + std::vector<TextDatabase::Match>* results, + Time* first_time_searched) { + results->clear(); + + InitDBList(); + if (present_databases_.empty()) { + // Nothing to search. + *first_time_searched = options.begin_time; + return; + } + + // Get the query into the proper format for the individual DBs. + string16 fts_query16; + query_parser_.ParseQuery(query, &fts_query16); + std::string fts_query = UTF16ToUTF8(fts_query16); + + // Need a copy of the options so we can modify the max count for each call + // to the individual databases. + QueryOptions cur_options(options); + + // Compute the minimum and maximum values for the identifiers that could + // encompass the input time range. + TextDatabase::DBIdent min_ident = options.begin_time.is_null() ? + *present_databases_.begin() : + TimeToID(options.begin_time); + TextDatabase::DBIdent max_ident = options.end_time.is_null() ? + *present_databases_.rbegin() : + TimeToID(options.end_time); + + // Iterate over the databases from the most recent backwards. + bool checked_one = false; + TextDatabase::URLSet found_urls; + for (DBIdentSet::reverse_iterator i = present_databases_.rbegin(); + i != present_databases_.rend(); + ++i) { + // TODO(brettw) allow canceling the query in the middle. + // if (canceled_or_something) + // break; + + // This code is stupid, we just loop until we find the correct starting + // time range rather than search in an intelligent way. Users will have a + // few dozen files at most, so this should not be an issue. + if (*i > max_ident) + continue; // Haven't gotten to the time range yet. + if (*i < min_ident) + break; // Covered all the time range. + + TextDatabase* cur_db = GetDB(*i, false); + if (!cur_db) + continue; + + // Adjust the max count according to how many results we've already got. + if (options.max_count) { + cur_options.max_count = options.max_count - + static_cast<int>(results->size()); + } + + // Since we are going backwards in time, it is always OK to pass the + // current first_time_searched, since it will always be smaller than + // any previous set. + cur_db->GetTextMatches(fts_query, cur_options, + results, &found_urls, first_time_searched); + checked_one = true; + + DCHECK(options.max_count == 0 || + static_cast<int>(results->size()) <= options.max_count); + if (options.max_count && + static_cast<int>(results->size()) >= options.max_count) + break; // Got the max number of results. + } + + // When there were no databases in the range, we need to fix up the min time. + if (!checked_one) + *first_time_searched = options.begin_time; +} + +TextDatabase* TextDatabaseManager::GetDB(TextDatabase::DBIdent id, + bool for_writing) { + DBCache::iterator found_db = db_cache_.Get(id); + if (found_db != db_cache_.end()) { + if (transaction_nesting_ && for_writing && + open_transactions_.find(id) == open_transactions_.end()) { + // If we currently have an open transaction, that database is not yet + // part of the transaction, and the database will be written to, it needs + // to be part of our transaction. + found_db->second->BeginTransaction(); + open_transactions_.insert(id); + } + return found_db->second; + } + + // Need to make the database. + TextDatabase* new_db = new TextDatabase(dir_, id, for_writing); + if (!new_db->Init()) { + delete new_db; + return NULL; + } + db_cache_.Put(id, new_db); + present_databases_.insert(id); + + if (transaction_nesting_ && for_writing) { + // If we currently have an open transaction and the new database will be + // written to, it needs to be part of our transaction. + new_db->BeginTransaction(); + open_transactions_.insert(id); + } + + // When no transaction is open, allow this new one to kick out an old one. + if (!transaction_nesting_) + db_cache_.ShrinkToSize(kCacheDBSize); + + return new_db; +} + +TextDatabase* TextDatabaseManager::GetDBForTime(Time time, + bool create_if_necessary) { + return GetDB(TimeToID(time), create_if_necessary); +} + +void TextDatabaseManager::ScheduleFlushOldChanges() { + factory_.RevokeAll(); + MessageLoop::current()->PostDelayedTask(FROM_HERE, factory_.NewRunnableMethod( + &TextDatabaseManager::FlushOldChanges), + kExpirationSec * Time::kMillisecondsPerSecond); +} + +void TextDatabaseManager::FlushOldChanges() { + FlushOldChangesForTime(TimeTicks::Now()); +} + +void TextDatabaseManager::FlushOldChangesForTime(TimeTicks now) { + // The end of the list is the oldest, so we just start from there committing + // things until we get something too new. + RecentChangeList::reverse_iterator i = recent_changes_.rbegin(); + while (i != recent_changes_.rend() && i->second.Expired(now)) { + AddPageData(i->first, i->second.url_id(), i->second.visit_id(), + i->second.visit_time(), i->second.title(), i->second.body()); + i = recent_changes_.Erase(i); + } + + ScheduleFlushOldChanges(); +} + +} // namespace history diff --git a/chrome/browser/history/text_database_manager.h b/chrome/browser/history/text_database_manager.h new file mode 100644 index 0000000..7f25bf7 --- /dev/null +++ b/chrome/browser/history/text_database_manager.h @@ -0,0 +1,311 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_ +#define CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_ + +#include <set> +#include <vector> + +#include "base/basictypes.h" +#include "base/file_path.h" +#include "base/gtest_prod_util.h" +#include "base/string16.h" +#include "base/task.h" +#include "chrome/browser/history/history_types.h" +#include "chrome/browser/history/text_database.h" +#include "chrome/browser/history/query_parser.h" +#include "chrome/browser/history/url_database.h" +#include "chrome/common/mru_cache.h" + +struct sqlite3; + +namespace history { + +class HistoryPublisher; +class VisitDatabase; + +// Manages a set of text databases representing different time periods. This +// will page them in and out as necessary, and will manage queries for times +// spanning multiple databases. +// +// It will also keep a list of partial changes, such as page adds and title and +// body sets, all of which come in at different times for a given page. When +// all data is received or enough time has elapsed since adding, the indexed +// data will be comitted. +// +// This allows us to minimize inserts and modifications, which are slow for the +// full text database, since each page's information is added exactly once. +// +// Note: be careful to delete the relevant entries from this uncommitted list +// when clearing history or this information may get added to the database soon +// after the clear. +class TextDatabaseManager { + public: + // Tracks a set of changes (only deletes need to be supported now) to the + // databases. This is opaque to the caller, but allows it to pass back a list + // of all database that it has caused a change to. + // + // This is necessary for the feature where we optimize full text databases + // which have changed as a result of the user deleting history via + // OptimizeChangedDatabases. We want to do each affected database only once at + // the end of the delete, but we don't want the caller to have to worry about + // our internals. + class ChangeSet { + public: + ChangeSet() {} + + private: + friend class TextDatabaseManager; + + typedef std::set<TextDatabase::DBIdent> DBSet; + + void Add(TextDatabase::DBIdent id) { changed_databases_.insert(id); } + + DBSet changed_databases_; + }; + + // You must call Init() to complete initialization. + // + // |dir| is the directory that will hold the full text database files (there + // will be many files named by their date ranges). + // + // The visit database is a pointer owned by the caller for the main database + // (of recent visits). The visit database will be updated to refer to the + // added text database entries. + explicit TextDatabaseManager(const FilePath& dir, + URLDatabase* url_database, + VisitDatabase* visit_database); + ~TextDatabaseManager(); + + // Must call before using other functions. If it returns false, no other + // functions should be called. + bool Init(const HistoryPublisher* history_publisher); + + // Returns the directory that holds the full text database files. + const FilePath& GetDir() { return dir_; } + + // Allows scoping updates. This also allows things to go faster since every + // page add doesn't need to be committed to disk (slow). Note that files will + // still get created during a transaction. + void BeginTransaction(); + void CommitTransaction(); + + // Sets specific information for the given page to be added to the database. + // In normal operation, URLs will be added as the user visits them, the titles + // and bodies will come in some time after that. These changes will be + // automatically coalesced and added to the database some time in the future + // using AddPageData(). + // + // AddPageURL must be called for a given URL (+ its corresponding ID) before + // either the title or body set. The visit ID specifies the visit that will + // get updated to refer to the full text indexed information. The visit time + // should be the time corresponding to that visit in the database. + void AddPageURL(const GURL& url, URLID url_id, VisitID visit_id, + base::Time visit_time); + void AddPageTitle(const GURL& url, const string16& title); + void AddPageContents(const GURL& url, const string16& body); + + // Adds the given data to the appropriate database file, returning true on + // success. The visit database row identified by |visit_id| will be updated + // to refer to the full text index entry. If the visit ID is 0, the visit + // database will not be updated. + bool AddPageData(const GURL& url, + URLID url_id, + VisitID visit_id, + base::Time visit_time, + const string16& title, + const string16& body); + + // Deletes the instance of indexed data identified by the given time and URL. + // Any changes will be tracked in the optional change set for use when calling + // OptimizeChangedDatabases later. change_set can be NULL. + void DeletePageData(base::Time time, const GURL& url, + ChangeSet* change_set); + + // The text database manager keeps a list of changes that are made to the + // file AddPageURL/Title/Body that may not be committed to the database yet. + // This function removes entires from this list happening between the given + // time range. It is called when the user clears their history for a time + // range, and we don't want any of our data to "leak." If restrict_urls is + // not empty, only changes on those URLs are deleted. + // + // Either or both times my be is_null to be unbounded in that direction. When + // non-null, the range is [begin, end). + void DeleteFromUncommitted(const std::set<GURL>& restrict_urls, + base::Time begin, base::Time end); + + // Deletes all full text search data by removing the files from the disk. + // This must be called OUTSIDE of a transaction since it actually deletes the + // files rather than messing with the database. + void DeleteAll(); + + // Calls optimize on all the databases identified in a given change set (see + // the definition of ChangeSet above for more). Optimizing means that old data + // will be removed rather than marked unused. + void OptimizeChangedDatabases(const ChangeSet& change_set); + + // Executes the given query. See QueryOptions for more info on input. + // + // The results are filled into |results|, and the first time considered for + // the output is in |first_time_searched| (see QueryResults for more). + // + // This function will return more than one match per URL if there is more than + // one entry for that URL in the database. + void GetTextMatches(const string16& query, + const QueryOptions& options, + std::vector<TextDatabase::Match>* results, + base::Time* first_time_searched); + + private: + // These tests call ExpireRecentChangesForTime to force expiration. + FRIEND_TEST_ALL_PREFIXES(TextDatabaseManagerTest, InsertPartial); + FRIEND_TEST_ALL_PREFIXES(TextDatabaseManagerTest, PartialComplete); + FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, DeleteURLAndFavicon); + FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, FlushRecentURLsUnstarred); + FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, + FlushRecentURLsUnstarredRestricted); + + // Stores "recent stuff" that has happened with the page, since the page + // visit, title, and body all come in at different times. + class PageInfo { + public: + PageInfo(URLID url_id, VisitID visit_id, base::Time visit_time); + + // Getters. + URLID url_id() const { return url_id_; } + VisitID visit_id() const { return visit_id_; } + base::Time visit_time() const { return visit_time_; } + const string16& title() const { return title_; } + const string16& body() const { return body_; } + + // Setters, we can only update the title and body. + void set_title(const string16& ttl); + void set_body(const string16& bdy); + + // Returns true if both the title or body of the entry has been set. Since + // both the title and body setters will "fix" empty strings to be a space, + // these indicate if the setter was ever called. + bool has_title() const { return !title_.empty(); } + bool has_body() { return !body_.empty(); } + + // Returns true if this entry was added too long ago and we should give up + // waiting for more data. The current time is passed in as an argument so we + // can check many without re-querying the timer. + bool Expired(base::TimeTicks now) const; + + private: + URLID url_id_; + VisitID visit_id_; + + // Time of the visit of the URL. This will be the value stored in the URL + // and visit tables for the entry. + base::Time visit_time_; + + // When this page entry was created. We have a cap on the maximum time that + // an entry will be in the queue before being flushed to the database. + base::TimeTicks added_time_; + + // Will be the string " " when they are set to distinguish set and unset. + string16 title_; + string16 body_; + }; + + // Converts the given time to a database identifier or vice-versa. + static TextDatabase::DBIdent TimeToID(base::Time time); + static base::Time IDToTime(TextDatabase::DBIdent id); + + // Returns a text database for the given identifier or time. This file will + // be created if it doesn't exist and |for_writing| is set. On error, + // including the case where the file doesn't exist and |for_writing| + // is false, it will return NULL. + // + // When |for_writing| is set, a transaction on the database will be opened + // if there is a transaction open on this manager. + // + // The pointer will be tracked in the cache. The caller should not store it + // or delete it since it will get automatically deleted as necessary. + TextDatabase* GetDB(TextDatabase::DBIdent id, bool for_writing); + TextDatabase* GetDBForTime(base::Time time, bool for_writing); + + // Populates the present_databases_ list based on which files are on disk. + // When the list is already initialized, this will do nothing, so you can + // call it whenever you want to ensure the present_databases_ set is filled. + void InitDBList(); + + // Schedules a call to ExpireRecentChanges in the future. + void ScheduleFlushOldChanges(); + + // Checks the recent_changes_ list and commits partial data that has been + // around too long. + void FlushOldChanges(); + + // Given "now," this will expire old things from the recent_changes_ list. + // This is used as the backend for FlushOldChanges and is called directly + // by the unit tests with fake times. + void FlushOldChangesForTime(base::TimeTicks now); + + // Directory holding our index files. + const FilePath dir_; + + // Non-owning pointers to the recent history databases for URLs and visits. + URLDatabase* url_database_; + VisitDatabase* visit_database_; + + // Lists recent additions that we have not yet filled out with the title and + // body. Sorted by time, we will flush them when they are complete or have + // been in the queue too long without modification. + // + // We kind of abuse the MRUCache because we never move things around in it + // using Get. Instead, we keep them in the order they were inserted, since + // this is the metric we use to measure age. The MRUCache gives us an ordered + // list with fast lookup by URL. + typedef MRUCache<GURL, PageInfo> RecentChangeList; + RecentChangeList recent_changes_; + + // Nesting levels of transactions. Since sqlite only allows one open + // transaction, we simulate nested transactions by mapping the outermost one + // to a real transaction. Since this object never needs to do ROLLBACK, losing + // the ability for all transactions to rollback is inconsequential. + int transaction_nesting_; + + // The cache owns the TextDatabase pointers, they will be automagically + // deleted when the cache entry is removed or expired. + typedef OwningMRUCache<TextDatabase::DBIdent, TextDatabase*> DBCache; + DBCache db_cache_; + + // Tells us about the existance of database files on disk. All existing + // databases will be in here, and non-existant ones will not, so we don't + // have to check the disk every time. + // + // This set is populated LAZILY by InitDBList(), you should call that function + // before accessing the list. + // + // Note that iterators will work on the keys in-order. Normally, reverse + // iterators will be used to iterate the keys in reverse-order. + typedef std::set<TextDatabase::DBIdent> DBIdentSet; + DBIdentSet present_databases_; + bool present_databases_loaded_; // Set by InitDBList when populated. + + // Lists all databases with open transactions. These will have to be closed + // when the transaction is committed. + DBIdentSet open_transactions_; + + QueryParser query_parser_; + + // Generates tasks for our periodic checking of expired "recent changes". + ScopedRunnableMethodFactory<TextDatabaseManager> factory_; + + // This object is created and managed by the history backend. We maintain an + // opaque pointer to the object for our use. + // This can be NULL if there are no indexers registered to receive indexing + // data from us. + const HistoryPublisher* history_publisher_; + + DISALLOW_COPY_AND_ASSIGN(TextDatabaseManager); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_ diff --git a/chrome/browser/history/text_database_manager_unittest.cc b/chrome/browser/history/text_database_manager_unittest.cc new file mode 100644 index 0000000..8e7f27e --- /dev/null +++ b/chrome/browser/history/text_database_manager_unittest.cc @@ -0,0 +1,537 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "app/sql/connection.h" +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/message_loop.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/history/text_database_manager.h" +#include "chrome/browser/history/visit_database.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::Time; +using base::TimeDelta; +using base::TimeTicks; + +namespace history { + +namespace { + +const char* kURL1 = "http://www.google.com/asdf"; +const char* kTitle1 = "Google A"; +const char* kBody1 = "FOO page one."; + +const char* kURL2 = "http://www.google.com/qwer"; +const char* kTitle2 = "Google B"; +const char* kBody2 = "FOO two."; + +const char* kURL3 = "http://www.google.com/zxcv"; +const char* kTitle3 = "Google C"; +const char* kBody3 = "FOO drei"; + +const char* kURL4 = "http://www.google.com/hjkl"; +const char* kTitle4 = "Google D"; +const char* kBody4 = "FOO lalala four."; + +const char* kURL5 = "http://www.google.com/uiop"; +const char* kTitle5 = "Google cinq"; +const char* kBody5 = "FOO page one."; + +// This provides a simple implementation of a URL+VisitDatabase using an +// in-memory sqlite connection. The text database manager expects to be able to +// update the visit database to keep in sync. +class InMemDB : public URLDatabase, public VisitDatabase { + public: + InMemDB() { + EXPECT_TRUE(db_.OpenInMemory()); + CreateURLTable(false); + InitVisitTable(); + } + ~InMemDB() { + } + + private: + virtual sql::Connection& GetDB() { return db_; } + + sql::Connection db_; + + DISALLOW_COPY_AND_ASSIGN(InMemDB); +}; + +// Adds all the pages once, and the first page once more in the next month. +// The times of all the pages will be filled into |*times|. +void AddAllPages(TextDatabaseManager& manager, VisitDatabase* visit_db, + std::vector<Time>* times) { + Time::Exploded exploded; + memset(&exploded, 0, sizeof(Time::Exploded)); + + // Put the visits in two different months so it will query across databases. + exploded.year = 2008; + exploded.month = 1; + exploded.day_of_month = 3; + + VisitRow visit_row; + visit_row.url_id = 1; + visit_row.visit_time = Time::FromUTCExploded(exploded); + visit_row.referring_visit = 0; + visit_row.transition = 0; + visit_row.segment_id = 0; + visit_row.is_indexed = false; + VisitID visit_id = visit_db->AddVisit(&visit_row); + + times->push_back(visit_row.visit_time); + manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id, + visit_row.visit_time, UTF8ToUTF16(kTitle1), + UTF8ToUTF16(kBody1)); + + exploded.day_of_month++; + visit_row.url_id = 2; + visit_row.visit_time = Time::FromUTCExploded(exploded); + visit_id = visit_db->AddVisit(&visit_row); + times->push_back(visit_row.visit_time); + manager.AddPageData(GURL(kURL2), visit_row.url_id, visit_row.visit_id, + visit_row.visit_time, UTF8ToUTF16(kTitle2), + UTF8ToUTF16(kBody2)); + + exploded.day_of_month++; + visit_row.url_id = 2; + visit_row.visit_time = Time::FromUTCExploded(exploded); + visit_id = visit_db->AddVisit(&visit_row); + times->push_back(visit_row.visit_time); + manager.AddPageData(GURL(kURL3), visit_row.url_id, visit_row.visit_id, + visit_row.visit_time, UTF8ToUTF16(kTitle3), + UTF8ToUTF16(kBody3)); + + // Put the next ones in the next month. + exploded.month++; + visit_row.url_id = 2; + visit_row.visit_time = Time::FromUTCExploded(exploded); + visit_id = visit_db->AddVisit(&visit_row); + times->push_back(visit_row.visit_time); + manager.AddPageData(GURL(kURL4), visit_row.url_id, visit_row.visit_id, + visit_row.visit_time, UTF8ToUTF16(kTitle4), + UTF8ToUTF16(kBody4)); + + exploded.day_of_month++; + visit_row.url_id = 2; + visit_row.visit_time = Time::FromUTCExploded(exploded); + visit_id = visit_db->AddVisit(&visit_row); + times->push_back(visit_row.visit_time); + manager.AddPageData(GURL(kURL5), visit_row.url_id, visit_row.visit_id, + visit_row.visit_time, UTF8ToUTF16(kTitle5), + UTF8ToUTF16(kBody5)); + + // Put the first one in again in the second month. + exploded.day_of_month++; + visit_row.url_id = 2; + visit_row.visit_time = Time::FromUTCExploded(exploded); + visit_id = visit_db->AddVisit(&visit_row); + times->push_back(visit_row.visit_time); + manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id, + visit_row.visit_time, UTF8ToUTF16(kTitle1), + UTF8ToUTF16(kBody1)); +} + +bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results, + const char* url) { + GURL gurl(url); + for (size_t i = 0; i < results.size(); i++) { + if (results[i].url == gurl) + return true; + } + return false; +} + +} // namespace + +class TextDatabaseManagerTest : public testing::Test { + public: + // Called manually by the test so it can report failure to initialize. + bool Init() { + return file_util::CreateNewTempDirectory( + FILE_PATH_LITERAL("TestSearchTest"), &dir_); + } + + protected: + void SetUp() { + } + + void TearDown() { + file_util::Delete(dir_, true); + } + + MessageLoop message_loop_; + + // Directory containing the databases. + FilePath dir_; +}; + +// Tests basic querying. +TEST_F(TextDatabaseManagerTest, InsertQuery) { + ASSERT_TRUE(Init()); + InMemDB visit_db; + TextDatabaseManager manager(dir_, &visit_db, &visit_db); + ASSERT_TRUE(manager.Init(NULL)); + + std::vector<Time> times; + AddAllPages(manager, &visit_db, ×); + + QueryOptions options; + options.begin_time = times[0] - TimeDelta::FromDays(100); + options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100); + std::vector<TextDatabase::Match> results; + Time first_time_searched; + manager.GetTextMatches(UTF8ToUTF16("FOO"), options, + &results, &first_time_searched); + + // We should have matched every page. + EXPECT_EQ(6U, results.size()); + EXPECT_TRUE(ResultsHaveURL(results, kURL1)); + EXPECT_TRUE(ResultsHaveURL(results, kURL2)); + EXPECT_TRUE(ResultsHaveURL(results, kURL3)); + EXPECT_TRUE(ResultsHaveURL(results, kURL4)); + EXPECT_TRUE(ResultsHaveURL(results, kURL5)); + + // The first time searched should have been the first page's time or before + // (it could have eliminated some time for us). + EXPECT_TRUE(first_time_searched <= times[0]); +} + +// Tests that adding page components piecemeal will get them added properly. +// This does not supply a visit to update, this mode is used only by the unit +// tests right now, but we test it anyway. +TEST_F(TextDatabaseManagerTest, InsertCompleteNoVisit) { + ASSERT_TRUE(Init()); + InMemDB visit_db; + TextDatabaseManager manager(dir_, &visit_db, &visit_db); + ASSERT_TRUE(manager.Init(NULL)); + + // First add one without a visit. + const GURL url(kURL1); + manager.AddPageURL(url, 0, 0, Time::Now()); + manager.AddPageTitle(url, UTF8ToUTF16(kTitle1)); + manager.AddPageContents(url, UTF8ToUTF16(kBody1)); + + // Check that the page got added. + QueryOptions options; + std::vector<TextDatabase::Match> results; + Time first_time_searched; + + manager.GetTextMatches(UTF8ToUTF16("FOO"), options, + &results, &first_time_searched); + ASSERT_EQ(1U, results.size()); + EXPECT_EQ(kTitle1, UTF16ToUTF8(results[0].title)); +} + +// Like InsertCompleteNoVisit but specifies a visit to update. We check that the +// visit was updated properly. +TEST_F(TextDatabaseManagerTest, InsertCompleteVisit) { + ASSERT_TRUE(Init()); + InMemDB visit_db; + TextDatabaseManager manager(dir_, &visit_db, &visit_db); + ASSERT_TRUE(manager.Init(NULL)); + + // First add a visit to a page. We can just make up a URL ID since there is + // not actually any URL database around. + VisitRow visit; + visit.url_id = 1; + visit.visit_time = Time::Now(); + visit.referring_visit = 0; + visit.transition = PageTransition::LINK; + visit.segment_id = 0; + visit.is_indexed = false; + visit_db.AddVisit(&visit); + + // Add a full text indexed entry for that visit. + const GURL url(kURL2); + manager.AddPageURL(url, visit.url_id, visit.visit_id, visit.visit_time); + manager.AddPageContents(url, UTF8ToUTF16(kBody2)); + manager.AddPageTitle(url, UTF8ToUTF16(kTitle2)); + + // Check that the page got added. + QueryOptions options; + std::vector<TextDatabase::Match> results; + Time first_time_searched; + + manager.GetTextMatches(UTF8ToUTF16("FOO"), options, + &results, &first_time_searched); + ASSERT_EQ(1U, results.size()); + EXPECT_EQ(kTitle2, UTF16ToUTF8(results[0].title)); + + // Check that the visit got updated for its new indexed state. + VisitRow out_visit; + ASSERT_TRUE(visit_db.GetRowForVisit(visit.visit_id, &out_visit)); + EXPECT_TRUE(out_visit.is_indexed); +} + +// Tests that partial inserts that expire are added to the database. +TEST_F(TextDatabaseManagerTest, InsertPartial) { + ASSERT_TRUE(Init()); + InMemDB visit_db; + TextDatabaseManager manager(dir_, &visit_db, &visit_db); + ASSERT_TRUE(manager.Init(NULL)); + + // Add the first one with just a URL. + GURL url1(kURL1); + manager.AddPageURL(url1, 0, 0, Time::Now()); + + // Now add a second one with a URL and title. + GURL url2(kURL2); + manager.AddPageURL(url2, 0, 0, Time::Now()); + manager.AddPageTitle(url2, UTF8ToUTF16(kTitle2)); + + // The third one has a URL and body. + GURL url3(kURL3); + manager.AddPageURL(url3, 0, 0, Time::Now()); + manager.AddPageContents(url3, UTF8ToUTF16(kBody3)); + + // Expire stuff very fast. This assumes that the time between the first + // AddPageURL and this line is less than the expiration time (20 seconds). + TimeTicks added_time = TimeTicks::Now(); + TimeTicks expire_time = added_time + TimeDelta::FromSeconds(5); + manager.FlushOldChangesForTime(expire_time); + + // Do a query, nothing should be added yet. + QueryOptions options; + std::vector<TextDatabase::Match> results; + Time first_time_searched; + manager.GetTextMatches(UTF8ToUTF16("google"), options, + &results, &first_time_searched); + ASSERT_EQ(0U, results.size()); + + // Compute a time threshold that will cause everything to be flushed, and + // poke at the manager's internals to cause this to happen. + expire_time = added_time + TimeDelta::FromDays(1); + manager.FlushOldChangesForTime(expire_time); + + // Now we should have all 3 URLs added. + manager.GetTextMatches(UTF8ToUTF16("google"), options, + &results, &first_time_searched); + ASSERT_EQ(3U, results.size()); + EXPECT_TRUE(ResultsHaveURL(results, kURL1)); + EXPECT_TRUE(ResultsHaveURL(results, kURL2)); + EXPECT_TRUE(ResultsHaveURL(results, kURL3)); +} + +// Tests that partial inserts (due to timeouts) will still get updated if the +// data comes in later. +TEST_F(TextDatabaseManagerTest, PartialComplete) { + ASSERT_TRUE(Init()); + InMemDB visit_db; + TextDatabaseManager manager(dir_, &visit_db, &visit_db); + ASSERT_TRUE(manager.Init(NULL)); + + Time added_time = Time::Now(); + GURL url(kURL1); + + // We have to have the URL in the URL and visit databases for this test to + // work. + URLRow url_row(url); + url_row.set_title(UTF8ToUTF16("chocolate")); + URLID url_id = visit_db.AddURL(url_row); + ASSERT_TRUE(url_id); + VisitRow visit_row; + visit_row.url_id = url_id; + visit_row.visit_time = added_time; + visit_db.AddVisit(&visit_row); + + // Add a URL with no title or body, and say that it expired. + manager.AddPageURL(url, 0, 0, added_time); + TimeTicks expire_time = TimeTicks::Now() + TimeDelta::FromDays(1); + manager.FlushOldChangesForTime(expire_time); + + // Add the title. We should be able to query based on that. The title in the + // URL row we set above should not come into the picture. + manager.AddPageTitle(url, UTF8ToUTF16("Some unique title")); + Time first_time_searched; + QueryOptions options; + std::vector<TextDatabase::Match> results; + manager.GetTextMatches(UTF8ToUTF16("unique"), options, + &results, &first_time_searched); + EXPECT_EQ(1U, results.size()); + manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, + &results, &first_time_searched); + EXPECT_EQ(0U, results.size()); + + // Now add the body, which should be queryable. + manager.AddPageContents(url, UTF8ToUTF16("Very awesome body")); + manager.GetTextMatches(UTF8ToUTF16("awesome"), options, &results, &first_time_searched); + EXPECT_EQ(1U, results.size()); + + // Adding the body will actually copy the title from the URL table rather + // than the previously indexed row (we made them not match above). This isn't + // necessarily what we want, but it's how it's implemented, and we don't want + // to regress it. + manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, &results, &first_time_searched); + EXPECT_EQ(1U, results.size()); +} + +// Tests that changes get properly committed to disk. +TEST_F(TextDatabaseManagerTest, Writing) { + ASSERT_TRUE(Init()); + + QueryOptions options; + std::vector<TextDatabase::Match> results; + Time first_time_searched; + + InMemDB visit_db; + + // Create the manager and write some stuff to it. + { + TextDatabaseManager manager(dir_, &visit_db, &visit_db); + ASSERT_TRUE(manager.Init(NULL)); + + std::vector<Time> times; + AddAllPages(manager, &visit_db, ×); + + // We should have matched every page. + manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched); + EXPECT_EQ(6U, results.size()); + } + results.clear(); + + // Recreate the manager and make sure it finds the written stuff. + { + TextDatabaseManager manager(dir_, &visit_db, &visit_db); + ASSERT_TRUE(manager.Init(NULL)); + + // We should have matched every page again. + manager.GetTextMatches(UTF8ToUTF16("FOO"), options, + &results, &first_time_searched); + EXPECT_EQ(6U, results.size()); + } +} + +// Tests that changes get properly committed to disk, as in the Writing test +// above, but when there is a transaction around the adds. +TEST_F(TextDatabaseManagerTest, WritingTransaction) { + ASSERT_TRUE(Init()); + + QueryOptions options; + std::vector<TextDatabase::Match> results; + Time first_time_searched; + + InMemDB visit_db; + + // Create the manager and write some stuff to it. + { + TextDatabaseManager manager(dir_, &visit_db, &visit_db); + ASSERT_TRUE(manager.Init(NULL)); + + std::vector<Time> times; + manager.BeginTransaction(); + AddAllPages(manager, &visit_db, ×); + // "Forget" to commit, it should be autocommittedd for us. + + // We should have matched every page. + manager.GetTextMatches(UTF8ToUTF16("FOO"), options, + &results, &first_time_searched); + EXPECT_EQ(6U, results.size()); + } + results.clear(); + + // Recreate the manager and make sure it finds the written stuff. + { + TextDatabaseManager manager(dir_, &visit_db, &visit_db); + ASSERT_TRUE(manager.Init(NULL)); + + // We should have matched every page again. + manager.GetTextMatches(UTF8ToUTF16("FOO"), options, + &results, &first_time_searched); + EXPECT_EQ(6U, results.size()); + } +} + +// Tests querying where the maximum number of items is met. +TEST_F(TextDatabaseManagerTest, QueryMax) { + ASSERT_TRUE(Init()); + InMemDB visit_db; + TextDatabaseManager manager(dir_, &visit_db, &visit_db); + ASSERT_TRUE(manager.Init(NULL)); + + std::vector<Time> times; + AddAllPages(manager, &visit_db, ×); + + string16 foo = UTF8ToUTF16("FOO"); + + QueryOptions options; + options.begin_time = times[0] - TimeDelta::FromDays(100); + options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100); + options.max_count = 2; + std::vector<TextDatabase::Match> results; + Time first_time_searched; + manager.GetTextMatches(foo, options, &results, &first_time_searched); + + // We should have gotten the last two pages as results (the first page is + // also the last). + EXPECT_EQ(2U, results.size()); + EXPECT_TRUE(first_time_searched <= times[4]); + EXPECT_TRUE(ResultsHaveURL(results, kURL5)); + EXPECT_TRUE(ResultsHaveURL(results, kURL1)); + + // Asking for 4 pages, the first one should be in another DB. + options.max_count = 4; + manager.GetTextMatches(foo, options, &results, &first_time_searched); + + EXPECT_EQ(4U, results.size()); + EXPECT_TRUE(first_time_searched <= times[4]); + EXPECT_TRUE(ResultsHaveURL(results, kURL3)); + EXPECT_TRUE(ResultsHaveURL(results, kURL4)); + EXPECT_TRUE(ResultsHaveURL(results, kURL5)); + EXPECT_TRUE(ResultsHaveURL(results, kURL1)); +} + +// Tests querying backwards in time in chunks. +TEST_F(TextDatabaseManagerTest, QueryBackwards) { + ASSERT_TRUE(Init()); + InMemDB visit_db; + TextDatabaseManager manager(dir_, &visit_db, &visit_db); + ASSERT_TRUE(manager.Init(NULL)); + + std::vector<Time> times; + AddAllPages(manager, &visit_db, ×); + + string16 foo = UTF8ToUTF16("FOO"); + + // First do a query for all time, but with a max of 2. This will give us the + // last two results and will tell us where to start searching when we want + // to go back in time. + QueryOptions options; + options.begin_time = times[0] - TimeDelta::FromDays(100); + options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100); + options.max_count = 2; + std::vector<TextDatabase::Match> results; + Time first_time_searched; + manager.GetTextMatches(foo, options, &results, &first_time_searched); + + // Check that we got the last two results. + EXPECT_EQ(2U, results.size()); + EXPECT_TRUE(first_time_searched <= times[4]); + EXPECT_TRUE(ResultsHaveURL(results, kURL5)); + EXPECT_TRUE(ResultsHaveURL(results, kURL1)); + + // Query the previous two URLs and make sure we got the correct ones. + options.end_time = first_time_searched; + manager.GetTextMatches(foo, options, &results, &first_time_searched); + EXPECT_EQ(2U, results.size()); + EXPECT_TRUE(first_time_searched <= times[2]); + EXPECT_TRUE(ResultsHaveURL(results, kURL3)); + EXPECT_TRUE(ResultsHaveURL(results, kURL4)); + + // Query the previous two URLs... + options.end_time = first_time_searched; + manager.GetTextMatches(foo, options, &results, &first_time_searched); + EXPECT_EQ(2U, results.size()); + EXPECT_TRUE(first_time_searched <= times[0]); + EXPECT_TRUE(ResultsHaveURL(results, kURL2)); + EXPECT_TRUE(ResultsHaveURL(results, kURL1)); + + // Try to query some more, there should be no results. + options.end_time = first_time_searched; + manager.GetTextMatches(foo, options, &results, &first_time_searched); + EXPECT_EQ(0U, results.size()); +} + +} // namespace history diff --git a/chrome/browser/history/text_database_unittest.cc b/chrome/browser/history/text_database_unittest.cc new file mode 100644 index 0000000..f604301 --- /dev/null +++ b/chrome/browser/history/text_database_unittest.cc @@ -0,0 +1,332 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <string> + +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/scoped_ptr.h" +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/history/text_database.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/platform_test.h" + +using base::Time; + +namespace history { + +namespace { + +// Note that all pages have "COUNTTAG" which allows us to count the number of +// pages in the database withoujt adding any extra functions to the DB object. +const char kURL1[] = "http://www.google.com/"; +const int kTime1 = 1000; +const char kTitle1[] = "Google"; +const char kBody1[] = + "COUNTTAG Web Images Maps News Shopping Gmail more My Account | " + "Sign out Advanced Search Preferences Language Tools Advertising Programs " + "- Business Solutions - About Google, 2008 Google"; + +const char kURL2[] = "http://images.google.com/"; +const int kTime2 = 2000; +const char kTitle2[] = "Google Image Search"; +const char kBody2[] = + "COUNTTAG Web Images Maps News Shopping Gmail more My Account | " + "Sign out Advanced Image Search Preferences The most comprehensive image " + "search on the web. Want to help improve Google Image Search? Try Google " + "Image Labeler. Advertising Programs - Business Solutions - About Google " + "2008 Google"; + +const char kURL3[] = "http://slashdot.org/"; +const int kTime3 = 3000; +const char kTitle3[] = "Slashdot: News for nerds, stuff that matters"; +const char kBody3[] = + "COUNTTAG Slashdot Log In Create Account Subscribe Firehose Why " + "Log In? Why Subscribe? Nickname Password Public Terminal Sections " + "Main Apple AskSlashdot Backslash Books Developers Games Hardware " + "Interviews IT Linux Mobile Politics Science YRO"; + +// Returns the number of rows currently in the database. +int RowCount(TextDatabase* db) { + QueryOptions options; + options.begin_time = Time::FromInternalValue(0); + // Leave end_time at now. + + std::vector<TextDatabase::Match> results; + Time first_time_searched; + TextDatabase::URLSet unique_urls; + db->GetTextMatches("COUNTTAG", options, &results, &unique_urls, + &first_time_searched); + return static_cast<int>(results.size()); +} + +// Adds each of the test pages to the database. +void AddAllTestData(TextDatabase* db) { + EXPECT_TRUE(db->AddPageData( + Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1)); + EXPECT_TRUE(db->AddPageData( + Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2)); + EXPECT_TRUE(db->AddPageData( + Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3)); + EXPECT_EQ(3, RowCount(db)); +} + +bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results, + const char* url) { + GURL gurl(url); + for (size_t i = 0; i < results.size(); i++) { + if (results[i].url == gurl) + return true; + } + return false; +} + +} // namespace + +class TextDatabaseTest : public PlatformTest { + public: + TextDatabaseTest() : db_(NULL) { + } + + protected: + void SetUp() { + PlatformTest::SetUp(); + PathService::Get(base::DIR_TEMP, &temp_path_); + } + + void TearDown() { + for (size_t i = 0; i < opened_files_.size(); i++) + file_util::Delete(opened_files_[i], false); + file_util::Delete(file_name_, false); + PlatformTest::TearDown(); + } + + // Create databases with this function, which will ensure that the files are + // deleted on shutdown. Only open one database for each file. Returns NULL on + // failure. + // + // Set |delete_file| to delete any existing file. If we are trying to create + // the file for the first time, we don't want a previous test left in a + // weird state to have left a file that would affect us. + TextDatabase* CreateDB(TextDatabase::DBIdent id, + bool allow_create, + bool delete_file) { + TextDatabase* db = new TextDatabase(temp_path_, id, allow_create); + + if (delete_file) + file_util::Delete(db->file_name(), false); + + if (!db->Init()) { + delete db; + return NULL; + } + opened_files_.push_back(db->file_name()); + return db; + } + + // Directory containing the databases. + FilePath temp_path_; + + // Name of the main database file. + FilePath file_name_; + sqlite3* db_; + + std::vector<FilePath> opened_files_; +}; + +TEST_F(TextDatabaseTest, AttachDetach) { + // First database with one page. + const int kIdee1 = 200801; + scoped_ptr<TextDatabase> db1(CreateDB(kIdee1, true, true)); + ASSERT_TRUE(!!db1.get()); + EXPECT_TRUE(db1->AddPageData( + Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1)); + + // Second database with one page. + const int kIdee2 = 200802; + scoped_ptr<TextDatabase> db2(CreateDB(kIdee2, true, true)); + ASSERT_TRUE(!!db2.get()); + EXPECT_TRUE(db2->AddPageData( + Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2)); + + // Detach, then reattach database one. The file should exist, so we force + // opening an existing file. + db1.reset(); + db1.reset(CreateDB(kIdee1, false, false)); + ASSERT_TRUE(!!db1.get()); + + // We should not be able to attach this random database for which no file + // exists. + const int kIdeeNoExisto = 999999999; + scoped_ptr<TextDatabase> db3(CreateDB(kIdeeNoExisto, false, true)); + EXPECT_FALSE(!!db3.get()); +} + +TEST_F(TextDatabaseTest, AddRemove) { + // Create a database and add some pages to it. + const int kIdee1 = 200801; + scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true)); + ASSERT_TRUE(!!db.get()); + URLID id1 = db->AddPageData( + Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1); + EXPECT_NE(0, id1); + URLID id2 = db->AddPageData( + Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2); + EXPECT_NE(0, id2); + URLID id3 = db->AddPageData( + Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3); + EXPECT_NE(0, id3); + EXPECT_EQ(3, RowCount(db.get())); + + // Make sure we can delete some of the data. + db->DeletePageData(Time::FromInternalValue(kTime1), kURL1); + EXPECT_EQ(2, RowCount(db.get())); + + // Close and reopen. + db.reset(new TextDatabase(temp_path_, kIdee1, false)); + EXPECT_TRUE(db->Init()); + + // Verify that the deleted ID is gone and try to delete another one. + EXPECT_EQ(2, RowCount(db.get())); + db->DeletePageData(Time::FromInternalValue(kTime2), kURL2); + EXPECT_EQ(1, RowCount(db.get())); +} + +TEST_F(TextDatabaseTest, Query) { + // Make a database with some pages. + const int kIdee1 = 200801; + scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true)); + EXPECT_TRUE(!!db.get()); + AddAllTestData(db.get()); + + // Get all the results. + QueryOptions options; + options.begin_time = Time::FromInternalValue(0); + + std::vector<TextDatabase::Match> results; + Time first_time_searched; + TextDatabase::URLSet unique_urls; + db->GetTextMatches("COUNTTAG", options, &results, &unique_urls, + &first_time_searched); + EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs"; + + // All 3 sites should be returned in order. + ASSERT_EQ(3U, results.size()); + EXPECT_EQ(GURL(kURL1), results[2].url); + EXPECT_EQ(GURL(kURL2), results[1].url); + EXPECT_EQ(GURL(kURL3), results[0].url); + + // Verify the info on those results. + EXPECT_TRUE(Time::FromInternalValue(kTime1) == results[2].time); + EXPECT_TRUE(Time::FromInternalValue(kTime2) == results[1].time); + EXPECT_TRUE(Time::FromInternalValue(kTime3) == results[0].time); + + EXPECT_EQ(std::string(kTitle1), UTF16ToUTF8(results[2].title)); + EXPECT_EQ(std::string(kTitle2), UTF16ToUTF8(results[1].title)); + EXPECT_EQ(std::string(kTitle3), UTF16ToUTF8(results[0].title)); + + // Should have no matches in the title. + EXPECT_EQ(0U, results[0].title_match_positions.size()); + EXPECT_EQ(0U, results[1].title_match_positions.size()); + EXPECT_EQ(0U, results[2].title_match_positions.size()); + + // We don't want to be dependent on the exact snippet algorithm, but we know + // since we searched for "COUNTTAG" which occurs at the beginning of each + // document, that each snippet should start with that. + EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[0].snippet.text()), + "COUNTTAG", false)); + EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[1].snippet.text()), + "COUNTTAG", false)); + EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[2].snippet.text()), + "COUNTTAG", false)); +} + +TEST_F(TextDatabaseTest, TimeRange) { + // Make a database with some pages. + const int kIdee1 = 200801; + scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true)); + ASSERT_TRUE(!!db.get()); + AddAllTestData(db.get()); + + // Beginning should be inclusive, and the ending exclusive. + // Get all the results. + QueryOptions options; + options.begin_time = Time::FromInternalValue(kTime1); + options.end_time = Time::FromInternalValue(kTime3); + + std::vector<TextDatabase::Match> results; + Time first_time_searched; + TextDatabase::URLSet unique_urls; + db->GetTextMatches("COUNTTAG", options, &results, &unique_urls, + &first_time_searched); + EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs"; + + // The first and second should have been returned. + EXPECT_EQ(2U, results.size()); + EXPECT_TRUE(ResultsHaveURL(results, kURL1)); + EXPECT_TRUE(ResultsHaveURL(results, kURL2)); + EXPECT_FALSE(ResultsHaveURL(results, kURL3)); + EXPECT_EQ(kTime1, first_time_searched.ToInternalValue()); + + // --------------------------------------------------------------------------- + // Do a query where there isn't a result on the begin boundary, so we can + // test that the first time searched is set to the minimum time considered + // instead of the min value. + options.begin_time = Time::FromInternalValue((kTime2 - kTime1) / 2 + kTime1); + options.end_time = Time::FromInternalValue(kTime3 + 1); + results.clear(); // GetTextMatches does *not* clear the results. + db->GetTextMatches("COUNTTAG", options, &results, &unique_urls, + &first_time_searched); + EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs"; + EXPECT_EQ(options.begin_time.ToInternalValue(), + first_time_searched.ToInternalValue()); + + // Should have two results, the second and third. + EXPECT_EQ(2U, results.size()); + EXPECT_FALSE(ResultsHaveURL(results, kURL1)); + EXPECT_TRUE(ResultsHaveURL(results, kURL2)); + EXPECT_TRUE(ResultsHaveURL(results, kURL3)); + + // No results should also set the first_time_searched. + options.begin_time = Time::FromInternalValue(kTime3 + 1); + options.end_time = Time::FromInternalValue(kTime3 * 100); + results.clear(); + db->GetTextMatches("COUNTTAG", options, &results, &unique_urls, + &first_time_searched); + EXPECT_EQ(options.begin_time.ToInternalValue(), + first_time_searched.ToInternalValue()); +} + +// Make sure that max_count works. +TEST_F(TextDatabaseTest, MaxCount) { + // Make a database with some pages. + const int kIdee1 = 200801; + scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true)); + ASSERT_TRUE(!!db.get()); + AddAllTestData(db.get()); + + // Set up the query to return all the results with "Google" (should be 2), but + // with a maximum of 1. + QueryOptions options; + options.begin_time = Time::FromInternalValue(kTime1); + options.end_time = Time::FromInternalValue(kTime3 + 1); + options.max_count = 1; + + std::vector<TextDatabase::Match> results; + Time first_time_searched; + TextDatabase::URLSet unique_urls; + db->GetTextMatches("google", options, &results, &unique_urls, + &first_time_searched); + EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs"; + + // There should be one result, the most recent one. + EXPECT_EQ(1U, results.size()); + EXPECT_TRUE(ResultsHaveURL(results, kURL2)); + + // The max time considered should be the date of the returned item. + EXPECT_EQ(kTime2, first_time_searched.ToInternalValue()); +} + +} // namespace history diff --git a/chrome/browser/history/thumbnail_database.cc b/chrome/browser/history/thumbnail_database.cc new file mode 100644 index 0000000..8bf203d --- /dev/null +++ b/chrome/browser/history/thumbnail_database.cc @@ -0,0 +1,551 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/thumbnail_database.h" + +#include "app/sql/statement.h" +#include "app/sql/transaction.h" +#include "base/command_line.h" +#include "base/file_util.h" +#if defined(OS_MACOSX) +#include "base/mac_util.h" +#endif +#include "base/ref_counted_memory.h" +#include "base/time.h" +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/diagnostics/sqlite_diagnostics.h" +#include "chrome/browser/history/history_publisher.h" +#include "chrome/browser/history/url_database.h" +#include "chrome/common/chrome_switches.h" +#include "chrome/common/thumbnail_score.h" +#include "gfx/codec/jpeg_codec.h" +#include "third_party/skia/include/core/SkBitmap.h" + +namespace history { + +// Version number of the database. +static const int kCurrentVersionNumber = 3; +static const int kCompatibleVersionNumber = 3; + +ThumbnailDatabase::ThumbnailDatabase() : history_publisher_(NULL), + use_top_sites_(false) { +} + +ThumbnailDatabase::~ThumbnailDatabase() { + // The DBCloseScoper will delete the DB and the cache. +} + +sql::InitStatus ThumbnailDatabase::Init( + const FilePath& db_name, + const HistoryPublisher* history_publisher) { + history_publisher_ = history_publisher; + sql::InitStatus status = OpenDatabase(&db_, db_name); + if (status != sql::INIT_OK) + return status; + + // Scope initialization in a transaction so we can't be partially initialized. + sql::Transaction transaction(&db_); + transaction.Begin(); + +#if defined(OS_MACOSX) + // Exclude the thumbnails file and its journal from backups. + mac_util::SetFileBackupExclusion(db_name, true); + FilePath::StringType db_name_string(db_name.value()); + db_name_string += "-journal"; + FilePath db_journal_name(db_name_string); + mac_util::SetFileBackupExclusion(db_journal_name, true); +#endif + + // Create the tables. + if (!meta_table_.Init(&db_, kCurrentVersionNumber, + kCompatibleVersionNumber) || + !InitThumbnailTable() || + !InitFavIconsTable(&db_, false)) { + db_.Close(); + return sql::INIT_FAILURE; + } + InitFavIconsIndex(); + + // Version check. We should not encounter a database too old for us to handle + // in the wild, so we try to continue in that case. + if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) { + LOG(WARNING) << "Thumbnail database is too new."; + return sql::INIT_TOO_NEW; + } + + int cur_version = meta_table_.GetVersionNumber(); + if (cur_version == 2) { + if (!UpgradeToVersion3()) { + LOG(WARNING) << "Unable to update to thumbnail database to version 3."; + db_.Close(); + return sql::INIT_FAILURE; + } + ++cur_version; + } + + LOG_IF(WARNING, cur_version < kCurrentVersionNumber) << + "Thumbnail database version " << cur_version << " is too old to handle."; + + // Initialization is complete. + if (!transaction.Commit()) { + db_.Close(); + return sql::INIT_FAILURE; + } + + return sql::INIT_OK; +} + +sql::InitStatus ThumbnailDatabase::OpenDatabase(sql::Connection* db, + const FilePath& db_name) { + // Set the exceptional sqlite error handler. + db->set_error_delegate(GetErrorHandlerForThumbnailDb()); + + // Set the database page size to something larger to give us + // better performance (we're typically seek rather than bandwidth limited). + // This only has an effect before any tables have been created, otherwise + // this is a NOP. Must be a power of 2 and a max of 8192. We use a bigger + // one because we're storing larger data (4-16K) in it, so we want a few + // blocks per element. + db->set_page_size(4096); + + // The UI is generally designed to work well when the thumbnail database is + // slow, so we can tolerate much less caching. The file is also very large + // and so caching won't save a significant percentage of it for us, + // reducing the benefit of caching in the first place. With the default cache + // size of 2000 pages, it will take >8MB of memory, so reducing it can be a + // big savings. + db->set_cache_size(64); + + // Run the database in exclusive mode. Nobody else should be accessing the + // database while we're running, and this will give somewhat improved perf. + db->set_exclusive_locking(); + + if (!db->Open(db_name)) + return sql::INIT_FAILURE; + + return sql::INIT_OK; +} + +bool ThumbnailDatabase::InitThumbnailTable() { + if (!db_.DoesTableExist("thumbnails")) { + if (CommandLine::ForCurrentProcess()-> HasSwitch(switches::kTopSites)) { + use_top_sites_ = true; + return true; + } + if (!db_.Execute("CREATE TABLE thumbnails (" + "url_id INTEGER PRIMARY KEY," + "boring_score DOUBLE DEFAULT 1.0," + "good_clipping INTEGER DEFAULT 0," + "at_top INTEGER DEFAULT 0," + "last_updated INTEGER DEFAULT 0," + "data BLOB)")) + return false; + } + return true; +} + +bool ThumbnailDatabase::UpgradeToVersion3() { + if (use_top_sites_) { + meta_table_.SetVersionNumber(3); + meta_table_.SetCompatibleVersionNumber( + std::min(3, kCompatibleVersionNumber)); + return true; // Not needed after migration to TopSites. + } + + // sqlite doesn't like the "ALTER TABLE xxx ADD (column_one, two, + // three)" syntax, so list out the commands we need to execute: + const char* alterations[] = { + "ALTER TABLE thumbnails ADD boring_score DOUBLE DEFAULT 1.0", + "ALTER TABLE thumbnails ADD good_clipping INTEGER DEFAULT 0", + "ALTER TABLE thumbnails ADD at_top INTEGER DEFAULT 0", + "ALTER TABLE thumbnails ADD last_updated INTEGER DEFAULT 0", + NULL + }; + + for (int i = 0; alterations[i] != NULL; ++i) { + if (!db_.Execute(alterations[i])) { + NOTREACHED(); + return false; + } + } + + meta_table_.SetVersionNumber(3); + meta_table_.SetCompatibleVersionNumber(std::min(3, kCompatibleVersionNumber)); + return true; +} + +bool ThumbnailDatabase::RecreateThumbnailTable() { + if (use_top_sites_) + return true; // Not needed after migration to TopSites. + + if (!db_.Execute("DROP TABLE thumbnails")) + return false; + return InitThumbnailTable(); +} + +bool ThumbnailDatabase::InitFavIconsTable(sql::Connection* db, + bool is_temporary) { + // Note: if you update the schema, don't forget to update + // CopyToTemporaryFaviconTable as well. + const char* name = is_temporary ? "temp_favicons" : "favicons"; + if (!db->DoesTableExist(name)) { + std::string sql; + sql.append("CREATE TABLE "); + sql.append(name); + sql.append("(" + "id INTEGER PRIMARY KEY," + "url LONGVARCHAR NOT NULL," + "last_updated INTEGER DEFAULT 0," + "image_data BLOB)"); + if (!db->Execute(sql.c_str())) + return false; + } + return true; +} + +void ThumbnailDatabase::InitFavIconsIndex() { + // Add an index on the url column. We ignore errors. Since this is always + // called during startup, the index will normally already exist. + db_.Execute("CREATE INDEX favicons_url ON favicons(url)"); +} + +void ThumbnailDatabase::BeginTransaction() { + db_.BeginTransaction(); +} + +void ThumbnailDatabase::CommitTransaction() { + db_.CommitTransaction(); +} + +void ThumbnailDatabase::Vacuum() { + DCHECK(db_.transaction_nesting() == 0) << + "Can not have a transaction when vacuuming."; + db_.Execute("VACUUM"); +} + +void ThumbnailDatabase::SetPageThumbnail( + const GURL& url, + URLID id, + const SkBitmap& thumbnail, + const ThumbnailScore& score, + base::Time time) { + if (use_top_sites_) + return; // Not possible after migration to TopSites. + + if (!thumbnail.isNull()) { + bool add_thumbnail = true; + ThumbnailScore current_score; + if (ThumbnailScoreForId(id, ¤t_score)) { + add_thumbnail = ShouldReplaceThumbnailWith(current_score, score); + } + + if (add_thumbnail) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "INSERT OR REPLACE INTO thumbnails " + "(url_id, boring_score, good_clipping, at_top, last_updated, data) " + "VALUES (?,?,?,?,?,?)")); + if (!statement) + return; + + // We use 90 quality (out of 100) which is pretty high, because + // we're very sensitive to artifacts for these small sized, + // highly detailed images. + std::vector<unsigned char> jpeg_data; + SkAutoLockPixels thumbnail_lock(thumbnail); + bool encoded = gfx::JPEGCodec::Encode( + reinterpret_cast<unsigned char*>(thumbnail.getAddr32(0, 0)), + gfx::JPEGCodec::FORMAT_BGRA, thumbnail.width(), + thumbnail.height(), + static_cast<int>(thumbnail.rowBytes()), 90, + &jpeg_data); + + if (encoded) { + statement.BindInt64(0, id); + statement.BindDouble(1, score.boring_score); + statement.BindBool(2, score.good_clipping); + statement.BindBool(3, score.at_top); + statement.BindInt64(4, score.time_at_snapshot.ToTimeT()); + statement.BindBlob(5, &jpeg_data[0], + static_cast<int>(jpeg_data.size())); + if (!statement.Run()) + NOTREACHED() << db_.GetErrorMessage(); + } + + // Publish the thumbnail to any indexers listening to us. + // The tests may send an invalid url. Hence avoid publishing those. + if (url.is_valid() && history_publisher_ != NULL) + history_publisher_->PublishPageThumbnail(jpeg_data, url, time); + } + } else { + if (!DeleteThumbnail(id) ) + DLOG(WARNING) << "Unable to delete thumbnail"; + } +} + +bool ThumbnailDatabase::GetPageThumbnail(URLID id, + std::vector<unsigned char>* data) { + if (use_top_sites_) + return false; // Not possible after migration to TopSites. + + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT data FROM thumbnails WHERE url_id=?")); + if (!statement) + return false; + + statement.BindInt64(0, id); + if (!statement.Step()) + return false; // don't have a thumbnail for this ID + + statement.ColumnBlobAsVector(0, data); + return true; +} + +bool ThumbnailDatabase::DeleteThumbnail(URLID id) { + if (use_top_sites_) + return true; // Not possible after migration to TopSites. + + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM thumbnails WHERE url_id = ?")); + if (!statement) + return false; + + statement.BindInt64(0, id); + return statement.Run(); +} + +bool ThumbnailDatabase::ThumbnailScoreForId(URLID id, + ThumbnailScore* score) { + if (use_top_sites_) + return false; // Not possible after migration to TopSites. + + // Fetch the current thumbnail's information to make sure we + // aren't replacing a good thumbnail with one that's worse. + sql::Statement select_statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT boring_score, good_clipping, at_top, last_updated " + "FROM thumbnails WHERE url_id=?")); + if (!select_statement) { + NOTREACHED() << "Couldn't build select statement!"; + } else { + select_statement.BindInt64(0, id); + if (select_statement.Step()) { + double current_boring_score = select_statement.ColumnDouble(0); + bool current_clipping = select_statement.ColumnBool(1); + bool current_at_top = select_statement.ColumnBool(2); + base::Time last_updated = + base::Time::FromTimeT(select_statement.ColumnInt64(3)); + *score = ThumbnailScore(current_boring_score, current_clipping, + current_at_top, last_updated); + return true; + } + } + + return false; +} + +bool ThumbnailDatabase::SetFavIcon(URLID icon_id, + scoped_refptr<RefCountedMemory> icon_data, + base::Time time) { + DCHECK(icon_id); + if (icon_data->size()) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "UPDATE favicons SET image_data=?, last_updated=? WHERE id=?")); + if (!statement) + return 0; + + statement.BindBlob(0, icon_data->front(), + static_cast<int>(icon_data->size())); + statement.BindInt64(1, time.ToTimeT()); + statement.BindInt64(2, icon_id); + return statement.Run(); + } else { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "UPDATE favicons SET image_data=NULL, last_updated=? WHERE id=?")); + if (!statement) + return 0; + + statement.BindInt64(0, time.ToTimeT()); + statement.BindInt64(1, icon_id); + return statement.Run(); + } +} + +bool ThumbnailDatabase::SetFavIconLastUpdateTime(FavIconID icon_id, + base::Time time) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "UPDATE favicons SET last_updated=? WHERE id=?")); + if (!statement) + return 0; + + statement.BindInt64(0, time.ToTimeT()); + statement.BindInt64(1, icon_id); + return statement.Run(); +} + +FavIconID ThumbnailDatabase::GetFavIconIDForFavIconURL(const GURL& icon_url) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT id FROM favicons WHERE url=?")); + if (!statement) + return 0; + + statement.BindString(0, URLDatabase::GURLToDatabaseURL(icon_url)); + if (!statement.Step()) + return 0; // not cached + + return statement.ColumnInt64(0); +} + +bool ThumbnailDatabase::GetFavIcon( + FavIconID icon_id, + base::Time* last_updated, + std::vector<unsigned char>* png_icon_data, + GURL* icon_url) { + DCHECK(icon_id); + + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT last_updated, image_data, url FROM favicons WHERE id=?")); + if (!statement) + return 0; + + statement.BindInt64(0, icon_id); + + if (!statement.Step()) + return false; // No entry for the id. + + *last_updated = base::Time::FromTimeT(statement.ColumnInt64(0)); + if (statement.ColumnByteLength(1) > 0) + statement.ColumnBlobAsVector(1, png_icon_data); + if (icon_url) + *icon_url = GURL(statement.ColumnString(2)); + + return true; +} + +FavIconID ThumbnailDatabase::AddFavIcon(const GURL& icon_url) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO favicons (url) VALUES (?)")); + if (!statement) + return 0; + + statement.BindString(0, URLDatabase::GURLToDatabaseURL(icon_url)); + if (!statement.Run()) + return 0; + return db_.GetLastInsertRowId(); +} + +bool ThumbnailDatabase::DeleteFavIcon(FavIconID id) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM favicons WHERE id = ?")); + if (!statement) + return false; + statement.BindInt64(0, id); + return statement.Run(); +} + +FavIconID ThumbnailDatabase::CopyToTemporaryFavIconTable(FavIconID source) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO temp_favicons (url, last_updated, image_data)" + "SELECT url, last_updated, image_data " + "FROM favicons WHERE id = ?")); + if (!statement) + return 0; + statement.BindInt64(0, source); + if (!statement.Run()) + return 0; + + // We return the ID of the newly inserted favicon. + return db_.GetLastInsertRowId(); +} + +bool ThumbnailDatabase::CommitTemporaryFavIconTable() { + // Delete the old favicons table. + if (!db_.Execute("DROP TABLE favicons")) + return false; + + // Rename the temporary one. + if (!db_.Execute("ALTER TABLE temp_favicons RENAME TO favicons")) + return false; + + // The renamed table needs the index (the temporary table doesn't have one). + InitFavIconsIndex(); + return true; +} + +bool ThumbnailDatabase::NeedsMigrationToTopSites() { + return !use_top_sites_; +} + +bool ThumbnailDatabase::RenameAndDropThumbnails(const FilePath& old_db_file, + const FilePath& new_db_file) { + // Init favicons table - same schema as the thumbnails. + sql::Connection favicons; + if (OpenDatabase(&favicons, new_db_file) != sql::INIT_OK) + return false; + + if (!InitFavIconsTable(&favicons, false)) { + NOTREACHED() << "Couldn't init favicons table."; + favicons.Close(); + return false; + } + favicons.Close(); + + // Can't attach within a transaction. + CommitTransaction(); + + // Attach new DB. + { + // This block is needed because otherwise the attach statement is + // never cleared from cache and we can't close the DB :P + sql::Statement attach(db_.GetUniqueStatement("ATTACH ? AS new_favicons")); + if (!attach) { + NOTREACHED() << "Unable to attach database."; + // Keep the transaction open, even though we failed. + BeginTransaction(); + return false; + } + +#if defined(OS_POSIX) + attach.BindString(0, new_db_file.value()); +#else + attach.BindString(0, WideToUTF8(new_db_file.value())); +#endif + + if (!attach.Run()) { + NOTREACHED() << db_.GetErrorMessage(); + BeginTransaction(); + return false; + } + } + + // Move favicons to the new DB. + if (!db_.Execute("INSERT OR REPLACE INTO new_favicons.favicons " + "SELECT * FROM favicons")) { + NOTREACHED() << "Unable to copy favicons."; + BeginTransaction(); + return false; + } + + if (!db_.Execute("DETACH new_favicons")) { + NOTREACHED() << "Unable to detach database."; + BeginTransaction(); + return false; + } + + db_.Close(); + + // Reset the DB to point to new file. + if (OpenDatabase(&db_, new_db_file) != sql::INIT_OK) + return false; + + file_util::Delete(old_db_file, false); + + InitFavIconsIndex(); + + // Reopen the transaction. + BeginTransaction(); + use_top_sites_ = true; + return true; +} + +} // namespace history diff --git a/chrome/browser/history/thumbnail_database.h b/chrome/browser/history/thumbnail_database.h new file mode 100644 index 0000000..81d498c --- /dev/null +++ b/chrome/browser/history/thumbnail_database.h @@ -0,0 +1,193 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_THUMBNAIL_DATABASE_H_ +#define CHROME_BROWSER_HISTORY_THUMBNAIL_DATABASE_H_ + +#include <vector> + +#include "app/sql/connection.h" +#include "app/sql/init_status.h" +#include "app/sql/meta_table.h" +#include "base/ref_counted.h" +#include "chrome/browser/history/history_types.h" + +class FilePath; +class RefCountedMemory; +struct ThumbnailScore; +class SkBitmap; + +namespace base { +class Time; +} + +namespace history { + +class ExpireHistoryBackend; +class HistoryPublisher; + +// This database interface is owned by the history backend and runs on the +// history thread. It is a totally separate component from history partially +// because we may want to move it to its own thread in the future. The +// operations we will do on this database will be slow, but we can tolerate +// higher latency (it's OK for thumbnails to come in slower than the rest +// of the data). Moving this to a separate thread would not block potentially +// higher priority history operations. +class ThumbnailDatabase { + public: + ThumbnailDatabase(); + ~ThumbnailDatabase(); + + // Must be called after creation but before any other methods are called. + // When not INIT_OK, no other functions should be called. + sql::InitStatus Init(const FilePath& db_name, + const HistoryPublisher* history_publisher); + + // Open database on a given filename. If the file does not exist, + // it is created. + // |db| is the database to open. + // |db_name| is a path to the database file. + static sql::InitStatus OpenDatabase(sql::Connection* db, + const FilePath& db_name); + + // Transactions on the database. + void BeginTransaction(); + void CommitTransaction(); + int transaction_nesting() const { + return db_.transaction_nesting(); + } + + // Vacuums the database. This will cause sqlite to defragment and collect + // unused space in the file. It can be VERY SLOW. + void Vacuum(); + + // Thumbnails ---------------------------------------------------------------- + + // Sets the given data to be the thumbnail for the given URL, + // overwriting any previous data. If the SkBitmap contains no pixel + // data, the thumbnail will be deleted. + void SetPageThumbnail(const GURL& url, + URLID id, + const SkBitmap& thumbnail, + const ThumbnailScore& score, + base::Time time); + + // Retrieves thumbnail data for the given URL, returning true on success, + // false if there is no such thumbnail or there was some other error. + bool GetPageThumbnail(URLID id, std::vector<unsigned char>* data); + + // Delete the thumbnail with the provided id. Returns false on failure + bool DeleteThumbnail(URLID id); + + // If there is a thumbnail score for the id provided, retrieves the + // current thumbnail score and places it in |score| and returns + // true. Returns false otherwise. + bool ThumbnailScoreForId(URLID id, ThumbnailScore* score); + + // Called by the to delete all old thumbnails and make a clean table. + // Returns true on success. + bool RecreateThumbnailTable(); + + // FavIcons ------------------------------------------------------------------ + + // Sets the bits for a favicon. This should be png encoded data. + // The time indicates the access time, and is used to detect when the favicon + // should be refreshed. + bool SetFavIcon(FavIconID icon_id, + scoped_refptr<RefCountedMemory> icon_data, + base::Time time); + + // Sets the time the favicon was last updated. + bool SetFavIconLastUpdateTime(FavIconID icon_id, base::Time time); + + // Returns the id of the entry in the favicon database with the specified url. + // Returns 0 if no entry exists for the specified url. + FavIconID GetFavIconIDForFavIconURL(const GURL& icon_url); + + // Gets the png encoded favicon and last updated time for the specified + // favicon id. + bool GetFavIcon(FavIconID icon_id, + base::Time* last_updated, + std::vector<unsigned char>* png_icon_data, + GURL* icon_url); + + // Adds the favicon URL to the favicon db, returning its id. + FavIconID AddFavIcon(const GURL& icon_url); + + // Delete the favicon with the provided id. Returns false on failure + bool DeleteFavIcon(FavIconID id); + + // Temporary FavIcons -------------------------------------------------------- + + // Create a temporary table to store favicons. Favicons will be copied to + // this table by CopyToTemporaryFavIconTable() and then the original table + // will be dropped, leaving only those copied favicons remaining. This is + // used to quickly delete most of the favicons when clearing history. + bool InitTemporaryFavIconsTable() { + return InitFavIconsTable(&db_, true); + } + + // Copies the given favicon from the "main" favicon table to the temporary + // one. This is only valid in between calls to InitTemporaryFavIconsTable() + // and CommitTemporaryFavIconTable(). + // + // The ID of the favicon will change when this copy takes place. The new ID + // is returned, or 0 on failure. + FavIconID CopyToTemporaryFavIconTable(FavIconID source); + + // Replaces the main URL table with the temporary table created by + // InitTemporaryFavIconsTable(). This will mean all favicons not copied over + // will be deleted. Returns true on success. + bool CommitTemporaryFavIconTable(); + + // Returns true iff the thumbnails table exists. + // Migrating to TopSites is dropping the thumbnails table. + bool NeedsMigrationToTopSites(); + + // Renames the database file and drops the Thumbnails table. + bool RenameAndDropThumbnails(const FilePath& old_db_file, + const FilePath& new_db_file); + + private: + friend class ExpireHistoryBackend; + + // Creates the thumbnail table, returning true if the table already exists + // or was successfully created. + bool InitThumbnailTable(); + + // Creates the favicon table, returning true if the table already exists, + // or was successfully created. |is_temporary| will be false when generating + // the "regular" favicons table. The expirer sets this to true to generate the + // temporary table, which will have a different name but the same schema. + // |db| is the connection to use for initializing the table. + // A different connection is used in RenameAndDropThumbnails, when we + // need to copy the favicons between two database files. + bool InitFavIconsTable(sql::Connection* db, bool is_temporary); + + // Adds support for the new metadata on web page thumbnails. + bool UpgradeToVersion3(); + + // Creates the index over the favicon table. This will be called during + // initialization after the table is created. This is a separate function + // because it is used by SwapFaviconTables to create an index over the + // newly-renamed favicons table (formerly the temporary table with no index). + void InitFavIconsIndex(); + + sql::Connection db_; + sql::MetaTable meta_table_; + + // This object is created and managed by the history backend. We maintain an + // opaque pointer to the object for our use. + // This can be NULL if there are no indexers registered to receive indexing + // data from us. + const HistoryPublisher* history_publisher_; + + // True if migration to TopSites has been done and the thumbnails + // table should not be used. + bool use_top_sites_; +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_THUMBNAIL_DATABASE_H_ diff --git a/chrome/browser/history/thumbnail_database_unittest.cc b/chrome/browser/history/thumbnail_database_unittest.cc new file mode 100644 index 0000000..4d2c2bf --- /dev/null +++ b/chrome/browser/history/thumbnail_database_unittest.cc @@ -0,0 +1,371 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <algorithm> +#include <vector> + +#include "base/basictypes.h" +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/ref_counted_memory.h" +#include "base/scoped_temp_dir.h" +#include "chrome/browser/history/thumbnail_database.h" +#include "chrome/common/chrome_paths.h" +#include "chrome/common/thumbnail_score.h" +#include "chrome/tools/profiles/thumbnail-inl.h" +#include "gfx/codec/jpeg_codec.h" +#include "googleurl/src/gurl.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/skia/include/core/SkBitmap.h" + +using base::Time; +using base::TimeDelta; + +namespace history { + +namespace { + +// data we'll put into the thumbnail database +static const unsigned char blob1[] = + "12346102356120394751634516591348710478123649165419234519234512349134"; +static const unsigned char blob2[] = + "goiwuegrqrcomizqyzkjalitbahxfjytrqvpqeroicxmnlkhlzunacxaneviawrtxcywhgef"; +static const unsigned char blob3[] = + "3716871354098370776510470746794707624107647054607467847164027"; +const double kBoringness = 0.25; +const double kWorseBoringness = 0.50; +const double kBetterBoringness = 0.10; +const double kTotallyBoring = 1.0; + +const int64 kPage1 = 1234; + +} // namespace + +class ThumbnailDatabaseTest : public testing::Test { + public: + ThumbnailDatabaseTest() { + } + ~ThumbnailDatabaseTest() { + } + + protected: + virtual void SetUp() { + // Get a temporary directory for the test DB files. + ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); + + file_name_ = temp_dir_.path().AppendASCII("TestThumbnails.db"); + new_file_name_ = temp_dir_.path().AppendASCII("TestFavicons.db"); + + google_bitmap_.reset( + gfx::JPEGCodec::Decode(kGoogleThumbnail, sizeof(kGoogleThumbnail))); + } + + scoped_ptr<SkBitmap> google_bitmap_; + + ScopedTempDir temp_dir_; + FilePath file_name_; + FilePath new_file_name_; +}; + +TEST_F(ThumbnailDatabaseTest, AddDelete) { + ThumbnailDatabase db; + ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL)); + + // Add one page & verify it got added. + ThumbnailScore boring(kBoringness, true, true); + Time time; + GURL gurl; + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, boring, time); + ThumbnailScore score_output; + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_output)); + ASSERT_TRUE(boring.Equals(score_output)); + + // Verify a random page is not found. + int64 page2 = 5678; + std::vector<unsigned char> jpeg_data; + EXPECT_FALSE(db.GetPageThumbnail(page2, &jpeg_data)); + EXPECT_FALSE(db.ThumbnailScoreForId(page2, &score_output)); + + // Add another page with a better boringness & verify it got added. + ThumbnailScore better_boringness(kBetterBoringness, true, true); + + db.SetPageThumbnail(gurl, page2, *google_bitmap_, better_boringness, time); + ASSERT_TRUE(db.ThumbnailScoreForId(page2, &score_output)); + ASSERT_TRUE(better_boringness.Equals(score_output)); + + // Delete the thumbnail for the second page. + ThumbnailScore worse_boringness(kWorseBoringness, true, true); + db.SetPageThumbnail(gurl, page2, SkBitmap(), worse_boringness, time); + ASSERT_FALSE(db.GetPageThumbnail(page2, &jpeg_data)); + ASSERT_FALSE(db.ThumbnailScoreForId(page2, &score_output)); + + // Delete the first thumbnail using the explicit delete API. + ASSERT_TRUE(db.DeleteThumbnail(kPage1)); + + // Make sure it is gone + ASSERT_FALSE(db.ThumbnailScoreForId(kPage1, &score_output)); + ASSERT_FALSE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_FALSE(db.ThumbnailScoreForId(page2, &score_output)); + ASSERT_FALSE(db.GetPageThumbnail(page2, &jpeg_data)); +} + +TEST_F(ThumbnailDatabaseTest, UseLessBoringThumbnails) { + ThumbnailDatabase db; + Time now = Time::Now(); + ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL)); + + // Add one page & verify it got added. + ThumbnailScore boring(kBoringness, true, true); + + Time time; + GURL gurl; + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, boring, time); + std::vector<unsigned char> jpeg_data; + ThumbnailScore score_out; + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(boring.Equals(score_out)); + + // Attempt to update the first page entry with a thumbnail that + // is more boring and verify that it doesn't change. + ThumbnailScore more_boring(kWorseBoringness, true, true); + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, more_boring, time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(boring.Equals(score_out)); + + // Attempt to update the first page entry with a thumbnail that + // is less boring and verify that we update it. + ThumbnailScore less_boring(kBetterBoringness, true, true); + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, less_boring, time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(less_boring.Equals(score_out)); +} + +TEST_F(ThumbnailDatabaseTest, UseAtTopThumbnails) { + ThumbnailDatabase db; + Time now = Time::Now(); + ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL)); + + // Add one page & verify it got added. Note that it doesn't have + // |good_clipping| and isn't |at_top|. + ThumbnailScore boring_and_bad(kBoringness, false, false); + + Time time; + GURL gurl; + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, boring_and_bad, time); + std::vector<unsigned char> jpeg_data; + ThumbnailScore score_out; + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(boring_and_bad.Equals(score_out)); + + // A thumbnail that's at the top of the page should replace + // thumbnails that are in the middle, for the same boringness. + ThumbnailScore boring_but_better(kBoringness, false, true); + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, boring_but_better, time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(boring_but_better.Equals(score_out)); + + // The only case where we should replace a thumbnail at the top with + // a thumbnail in the middle/bottom is when the current thumbnail is + // weirdly stretched and the incoming thumbnail isn't. + ThumbnailScore better_boring_bad_framing(kBetterBoringness, false, false); + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, better_boring_bad_framing, + time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(boring_but_better.Equals(score_out)); + + ThumbnailScore boring_good_clipping(kBoringness, true, false); + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, boring_good_clipping, + time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(boring_good_clipping.Equals(score_out)); + + // Now that we have a non-stretched, middle of the page thumbnail, + // we shouldn't be able to replace it with: + + // 1) A stretched thumbnail in the middle of the page + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, + ThumbnailScore(kBetterBoringness, false, false, now), + time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(boring_good_clipping.Equals(score_out)); + + // 2) A stretched thumbnail at the top of the page + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, + ThumbnailScore(kBetterBoringness, false, true, now), + time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(boring_good_clipping.Equals(score_out)); + + // But it should be replaced by a thumbnail that's clipped properly + // and is at the top + ThumbnailScore best_score(kBetterBoringness, true, true); + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, best_score, time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(best_score.Equals(score_out)); +} + +TEST_F(ThumbnailDatabaseTest, ThumbnailTimeDegradation) { + ThumbnailDatabase db; + const Time kNow = Time::Now(); + const Time kThreeHoursAgo = kNow - TimeDelta::FromHours(4); + const Time kFiveHoursAgo = kNow - TimeDelta::FromHours(6); + const double kBaseBoringness = 0.305; + const double kWorseBoringness = 0.345; + + ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL)); + + // add one page & verify it got added. + ThumbnailScore base_boringness(kBaseBoringness, true, true, kFiveHoursAgo); + + Time time; + GURL gurl; + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, base_boringness, time); + std::vector<unsigned char> jpeg_data; + ThumbnailScore score_out; + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(base_boringness.Equals(score_out)); + + // Try to add a different thumbnail with a worse score an hour later + // (but not enough to trip the boringness degradation threshold). + ThumbnailScore hour_later(kWorseBoringness, true, true, kThreeHoursAgo); + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, hour_later, time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(base_boringness.Equals(score_out)); + + // After a full five hours, things should have degraded enough + // that we'll allow the same thumbnail with the same (worse) + // boringness that we previous rejected. + ThumbnailScore five_hours_later(kWorseBoringness, true, true, kNow); + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, five_hours_later, time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(five_hours_later.Equals(score_out)); +} + +TEST_F(ThumbnailDatabaseTest, NeverAcceptTotallyBoringThumbnail) { + // We enforce a maximum boringness score: even in cases where we + // should replace a thumbnail with another because of reasons other + // than straight up boringness score, still reject because the + // thumbnail is totally boring. + ThumbnailDatabase db; + Time now = Time::Now(); + ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL)); + + std::vector<unsigned char> jpeg_data; + ThumbnailScore score_out; + const double kBaseBoringness = 0.50; + const Time kNow = Time::Now(); + const int kSizeOfTable = 4; + struct { + bool good_scaling; + bool at_top; + } const heiarchy_table[] = { + {false, false}, + {false, true}, + {true, false}, + {true, true} + }; + + Time time; + GURL gurl; + + // Test that for each entry type, all entry types that are better + // than it still will reject thumbnails which are totally boring. + for (int i = 0; i < kSizeOfTable; ++i) { + ThumbnailScore base(kBaseBoringness, + heiarchy_table[i].good_scaling, + heiarchy_table[i].at_top, + kNow); + + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, base, time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(base.Equals(score_out)); + + for (int j = i; j < kSizeOfTable; ++j) { + ThumbnailScore shouldnt_replace( + kTotallyBoring, heiarchy_table[j].good_scaling, + heiarchy_table[j].at_top, kNow); + + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, shouldnt_replace, + time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(base.Equals(score_out)); + } + + // Clean up for the next iteration + ASSERT_TRUE(db.DeleteThumbnail(kPage1)); + ASSERT_FALSE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_FALSE(db.ThumbnailScoreForId(kPage1, &score_out)); + } + + // We should never accept a totally boring thumbnail no matter how + // much old the current thumbnail is. + ThumbnailScore base_boring(kBaseBoringness, true, true, kNow); + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, base_boring, time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(base_boring.Equals(score_out)); + + ThumbnailScore totally_boring_in_the_future( + kTotallyBoring, true, true, kNow + TimeDelta::FromDays(365)); + db.SetPageThumbnail(gurl, kPage1, *google_bitmap_, + totally_boring_in_the_future, time); + ASSERT_TRUE(db.GetPageThumbnail(kPage1, &jpeg_data)); + ASSERT_TRUE(db.ThumbnailScoreForId(kPage1, &score_out)); + ASSERT_TRUE(base_boring.Equals(score_out)); +} + +TEST_F(ThumbnailDatabaseTest, NeedsMigrationToTopSites) { + ThumbnailDatabase db; + ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL)); + db.BeginTransaction(); + EXPECT_TRUE(db.NeedsMigrationToTopSites()); + EXPECT_TRUE(db.RenameAndDropThumbnails(file_name_, new_file_name_)); + EXPECT_FALSE(db.NeedsMigrationToTopSites()); + EXPECT_FALSE(file_util::PathExists(file_name_)); + EXPECT_TRUE(file_util::PathExists(new_file_name_)); +} + +TEST_F(ThumbnailDatabaseTest, GetFaviconAfterMigrationToTopSites) { + ThumbnailDatabase db; + ASSERT_EQ(sql::INIT_OK, db.Init(file_name_, NULL)); + db.BeginTransaction(); + + std::vector<unsigned char> data(blob1, blob1 + sizeof(blob1)); + scoped_refptr<RefCountedBytes> favicon(new RefCountedBytes(data)); + + GURL url("http://google.com"); + FavIconID id = db.AddFavIcon(url); + base::Time time = base::Time::Now(); + db.SetFavIcon(id, favicon, time); + EXPECT_TRUE(db.RenameAndDropThumbnails(file_name_, new_file_name_)); + + base::Time time_out; + std::vector<unsigned char> favicon_out; + GURL url_out; + EXPECT_TRUE(db.GetFavIcon(id, &time_out, &favicon_out, &url_out)); + EXPECT_EQ(url, url_out); + EXPECT_EQ(time.ToTimeT(), time_out.ToTimeT()); + ASSERT_EQ(data.size(), favicon_out.size()); + EXPECT_TRUE(std::equal(data.begin(), + data.end(), + favicon_out.begin())); +} + +} // namespace history diff --git a/chrome/browser/history/top_sites.cc b/chrome/browser/history/top_sites.cc new file mode 100644 index 0000000..1ef4dec --- /dev/null +++ b/chrome/browser/history/top_sites.cc @@ -0,0 +1,572 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/top_sites.h" + +#include <algorithm> + +#include "base/file_util.h" +#include "base/logging.h" +#include "chrome/browser/chrome_thread.h" +#include "chrome/browser/profile.h" +#include "chrome/browser/history/top_sites_database.h" +#include "chrome/browser/history/history_notifications.h" +#include "chrome/browser/history/page_usage_data.h" +#include "chrome/browser/tab_contents/navigation_controller.h" +#include "chrome/browser/tab_contents/navigation_entry.h" +#include "gfx/codec/jpeg_codec.h" +#include "third_party/skia/include/core/SkBitmap.h" + +namespace history { + +// How many top sites to store in the cache. +static const size_t kTopSitesNumber = 20; +static const int kDaysOfHistory = 90; +// Time from startup to first HistoryService query. +static const int64 kUpdateIntervalSecs = 15; +// Intervals between requests to HistoryService. +static const int64 kMinUpdateIntervalMinutes = 1; +static const int64 kMaxUpdateIntervalMinutes = 60; + + +TopSites::TopSites(Profile* profile) : profile_(profile), + mock_history_service_(NULL), + last_num_urls_changed_(0), + migration_in_progress_(false), + waiting_for_results_(true) { + registrar_.Add(this, NotificationType::HISTORY_URLS_DELETED, + Source<Profile>(profile_)); + registrar_.Add(this, NotificationType::NAV_ENTRY_COMMITTED, + NotificationService::AllSources()); +} + +TopSites::~TopSites() { + timer_.Stop(); +} + +void TopSites::Init(const FilePath& db_name) { + db_path_ = db_name; + db_.reset(new TopSitesDatabaseImpl()); + if (!db_->Init(db_name)) { + NOTREACHED() << "Failed to initialize database."; + return; + } + + ChromeThread::PostTask(ChromeThread::DB, FROM_HERE, NewRunnableMethod( + this, &TopSites::ReadDatabase)); + + // Start the one-shot timer. + timer_.Start(base::TimeDelta::FromSeconds(kUpdateIntervalSecs), this, + &TopSites::StartQueryForMostVisited); +} + +void TopSites::ReadDatabase() { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::DB)); + std::map<GURL, Images> thumbnails; + + DCHECK(db_.get()); + { + AutoLock lock(lock_); + MostVisitedURLList top_urls; + db_->GetPageThumbnails(&top_urls, &thumbnails); + StoreMostVisited(&top_urls); + } // Lock is released here. + + for (size_t i = 0; i < top_sites_.size(); i++) { + GURL url = top_sites_[i].url; + Images thumbnail = thumbnails[url]; + if (!thumbnail.thumbnail.get() || !thumbnail.thumbnail->size()) { + LOG(INFO) << "No thumbnail for " << url.spec(); + } else { + SetPageThumbnailNoDB(url, thumbnail.thumbnail, + thumbnail.thumbnail_score); + } + } +} + +// Public function that encodes the bitmap into RefCountedBytes and +// updates the database. +bool TopSites::SetPageThumbnail(const GURL& url, + const SkBitmap& thumbnail, + const ThumbnailScore& score) { + bool add_temp_thumbnail = false; + if (canonical_urls_.find(url) == canonical_urls_.end()) { + if (top_sites_.size() < kTopSitesNumber) { + add_temp_thumbnail = true; + } else { + return false; // This URL is not known to us. + } + } + + if (!HistoryService::CanAddURL(url)) + return false; // It's not a real webpage. + + scoped_refptr<RefCountedBytes> thumbnail_data = new RefCountedBytes; + SkAutoLockPixels thumbnail_lock(thumbnail); + bool encoded = gfx::JPEGCodec::Encode( + reinterpret_cast<unsigned char*>(thumbnail.getAddr32(0, 0)), + gfx::JPEGCodec::FORMAT_BGRA, thumbnail.width(), + thumbnail.height(), + static_cast<int>(thumbnail.rowBytes()), 90, + &thumbnail_data->data); + if (!encoded) + return false; + + if (add_temp_thumbnail) { + AddTemporaryThumbnail(url, thumbnail_data, score); + return true; + } + + return SetPageThumbnail(url, thumbnail_data, score); +} + +bool TopSites::SetPageThumbnail(const GURL& url, + const RefCountedBytes* thumbnail, + const ThumbnailScore& score) { + if (!SetPageThumbnailNoDB(url, thumbnail, score)) + return false; + + // Update the database. + if (!db_.get()) + return true; + std::map<GURL, size_t>::iterator found = canonical_urls_.find(url); + if (found == canonical_urls_.end()) + return false; + size_t index = found->second; + + MostVisitedURL& most_visited = top_sites_[index]; + ChromeThread::PostTask(ChromeThread::DB, FROM_HERE, NewRunnableMethod( + this, &TopSites::WriteThumbnailToDB, + most_visited, index, top_images_[most_visited.url])); + return true; +} + +void TopSites::WriteThumbnailToDB(const MostVisitedURL& url, + int url_rank, + const TopSites::Images& thumbnail) { + DCHECK(db_.get()); + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::DB)); + db_->SetPageThumbnail(url, url_rank, thumbnail); +} + +// private +bool TopSites::SetPageThumbnailNoDB(const GURL& url, + const RefCountedBytes* thumbnail_data, + const ThumbnailScore& score) { + AutoLock lock(lock_); + + std::map<GURL, size_t>::iterator found = canonical_urls_.find(url); + if (found == canonical_urls_.end()) { + if (top_sites_.size() >= kTopSitesNumber) + return false; // This URL is not known to us. + + // We don't have enough Top Sites - add this one too. + MostVisitedURL mv; + mv.url = url; + mv.redirects.push_back(url); + top_sites_.push_back(mv); + size_t index = top_sites_.size() - 1; + StoreRedirectChain(top_sites_[index].redirects, index); + found = canonical_urls_.find(url); + } + + MostVisitedURL& most_visited = top_sites_[found->second]; + Images& image = top_images_[most_visited.url]; + + // When comparing the thumbnail scores, we need to take into account the + // redirect hops, which are not generated when the thumbnail is because the + // redirects weren't known. We fill that in here since we know the redirects. + ThumbnailScore new_score_with_redirects(score); + new_score_with_redirects.redirect_hops_from_dest = + GetRedirectDistanceForURL(most_visited, url); + + if (!ShouldReplaceThumbnailWith(image.thumbnail_score, + new_score_with_redirects) && + image.thumbnail.get()) + return false; // The one we already have is better. + + // Take ownership of the thumbnail data. + image.thumbnail = const_cast<RefCountedBytes*>(thumbnail_data); + image.thumbnail_score = new_score_with_redirects; + + return true; +} + +void TopSites::GetMostVisitedURLs(CancelableRequestConsumer* consumer, + GetTopSitesCallback* callback) { + + scoped_refptr<CancelableRequest<GetTopSitesCallback> > request( + new CancelableRequest<GetTopSitesCallback>(callback)); + // This ensures cancelation of requests when either the consumer or the + // provider is deleted. Deletion of requests is also guaranteed. + AddRequest(request, consumer); + if (waiting_for_results_) { + // A request came in before we have any top sites. + // We have to keep track of the requests ourselves. + pending_callbacks_.insert(request); + return; + } + if (request->canceled()) + return; + request->ForwardResult(GetTopSitesCallback::TupleType(top_sites_)); +} + +bool TopSites::GetPageThumbnail(const GURL& url, RefCountedBytes** data) const { + std::map<GURL, Images>::const_iterator found = top_images_.find(url); + if (found == top_images_.end()) + return false; // No thumbnail for this URL. + + Images image = found->second; + *data = image.thumbnail.get(); + return true; +} + +void TopSites::UpdateMostVisited(MostVisitedURLList most_visited) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::DB)); + // TODO(brettw) filter for blacklist! + + if (!top_sites_.empty()) { + std::vector<size_t> added; // Indices into most_visited. + std::vector<size_t> deleted; // Indices into top_sites_. + std::vector<size_t> moved; // Indices into most_visited. + DiffMostVisited(top_sites_, most_visited, &added, &deleted, &moved); + + // #added == #deleted; #added + #moved = total. + last_num_urls_changed_ = added.size() + moved.size(); + + // Process the diff: delete from images and disk, add to disk. + // Delete all the thumbnails associated with URLs that were deleted. + for (size_t i = 0; i < deleted.size(); i++) { + const MostVisitedURL& deleted_url = top_sites_[deleted[i]]; + std::map<GURL, Images>::iterator found = + top_images_.find(deleted_url.url); + if (found != top_images_.end()) + top_images_.erase(found); + + // Delete from disk. + if (db_.get()) + db_->RemoveURL(deleted_url); + } + + if (db_.get()) { + // Write both added and moved urls. + for (size_t i = 0; i < added.size(); i++) { + MostVisitedURL& added_url = most_visited[added[i]]; + db_->SetPageThumbnail(added_url, added[i], Images()); + } + for (size_t i = 0; i < moved.size(); i++) { + MostVisitedURL moved_url = most_visited[moved[i]]; + db_->UpdatePageRank(moved_url, moved[i]); + } + } + } + + StoreMostVisited(&most_visited); + if (migration_in_progress_) { + // Copy all thumnbails from the history service. + for (size_t i = 0; i < top_sites_.size(); i++) { + GURL& url = top_sites_[i].url; + Images& img = top_images_[url]; + if (!img.thumbnail.get() || !img.thumbnail->size()) { + StartQueryForThumbnail(i); + } + } + } + + // If we are not expecting any thumbnails, migration is done. + if (migration_in_progress_ && migration_pending_urls_.empty()) + OnMigrationDone(); + + timer_.Stop(); + timer_.Start(GetUpdateDelay(), this, + &TopSites::StartQueryForMostVisited); +} + +void TopSites::OnMigrationDone() { + migration_in_progress_ = false; + HistoryService* hs = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); + // |hs| may be null during unit tests. + if (!hs) + return; + hs->OnTopSitesReady(); +} + +void TopSites::AddTemporaryThumbnail(const GURL& url, + const RefCountedBytes* thumbnail, + const ThumbnailScore& score) { + Images& img = temp_thumbnails_map_[url]; + img.thumbnail = const_cast<RefCountedBytes*>(thumbnail); + img.thumbnail_score = score; +} + +void TopSites::StartQueryForThumbnail(size_t index) { + DCHECK(migration_in_progress_); + migration_pending_urls_.insert(top_sites_[index].url); + + if (mock_history_service_) { + // Testing with a mockup. + // QueryMostVisitedURLs is not virtual, so we have to duplicate the code. + // This calls SetClientData. + mock_history_service_->GetPageThumbnail( + top_sites_[index].url, + &cancelable_consumer_, + NewCallback(this, &TopSites::OnThumbnailAvailable), + index); + return; + } + + HistoryService* hs = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); + // |hs| may be null during unit tests. + if (!hs) + return; + HistoryService::Handle handle = + hs->GetPageThumbnail(top_sites_[index].url, + &cancelable_consumer_, + NewCallback(this, &TopSites::OnThumbnailAvailable)); + cancelable_consumer_.SetClientData(hs, handle, index); +} + +void TopSites::StoreMostVisited(MostVisitedURLList* most_visited) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::DB)); + // Take ownership of the most visited data. + top_sites_.clear(); + top_sites_.swap(*most_visited); + waiting_for_results_ = false; + + // Save the redirect information for quickly mapping to the canonical URLs. + canonical_urls_.clear(); + for (size_t i = 0; i < top_sites_.size(); i++) { + const MostVisitedURL& mv = top_sites_[i]; + StoreRedirectChain(mv.redirects, i); + + std::map<GURL, Images>::iterator it = temp_thumbnails_map_.begin(); + GURL canonical_url = GetCanonicalURL(mv.url); + for (; it != temp_thumbnails_map_.end(); it++) { + // Must map all temp URLs to canonical ones. + // temp_thumbnails_map_ contains non-canonical URLs, because + // when we add a temp thumbnail, redirect chain is not known. + // This is slow, but temp_thumbnails_map_ should have very few URLs. + if (canonical_url == GetCanonicalURL(it->first)) { + SetPageThumbnail(mv.url, it->second.thumbnail, + it->second.thumbnail_score); + temp_thumbnails_map_.erase(it); + break; + } + } + } + if (top_sites_.size() >= kTopSitesNumber) + temp_thumbnails_map_.clear(); +} + +void TopSites::StoreRedirectChain(const RedirectList& redirects, + size_t destination) { + if (redirects.empty()) { + NOTREACHED(); + return; + } + + // Map all the redirected URLs to the destination. + for (size_t i = 0; i < redirects.size(); i++) + canonical_urls_[redirects[i]] = destination; +} + +GURL TopSites::GetCanonicalURL(const GURL& url) const { + std::map<GURL, size_t>::const_iterator found = canonical_urls_.find(url); + if (found == canonical_urls_.end()) + return GURL(); // Don't know anything about this URL. + return top_sites_[found->second].url; +} + +// static +int TopSites::GetRedirectDistanceForURL(const MostVisitedURL& most_visited, + const GURL& url) { + for (size_t i = 0; i < most_visited.redirects.size(); i++) { + if (most_visited.redirects[i] == url) + return static_cast<int>(most_visited.redirects.size() - i - 1); + } + NOTREACHED() << "URL should always be found."; + return 0; +} + +// static +void TopSites::DiffMostVisited(const MostVisitedURLList& old_list, + const MostVisitedURLList& new_list, + std::vector<size_t>* added_urls, + std::vector<size_t>* deleted_urls, + std::vector<size_t>* moved_urls) { + added_urls->clear(); + deleted_urls->clear(); + moved_urls->clear(); + + // Add all the old URLs for quick lookup. This maps URLs to the corresponding + // index in the input. + std::map<GURL, size_t> all_old_urls; + for (size_t i = 0; i < old_list.size(); i++) + all_old_urls[old_list[i].url] = i; + + // Check all the URLs in the new set to see which ones are new or just moved. + // When we find a match in the old set, we'll reset its index to our special + // marker. This allows us to quickly identify the deleted ones in a later + // pass. + const size_t kAlreadyFoundMarker = static_cast<size_t>(-1); + for (size_t i = 0; i < new_list.size(); i++) { + std::map<GURL, size_t>::iterator found = all_old_urls.find(new_list[i].url); + if (found == all_old_urls.end()) { + added_urls->push_back(i); + } else { + if (found->second != i) + moved_urls->push_back(i); + found->second = kAlreadyFoundMarker; + } + } + + // Any member without the special marker in the all_old_urls list means that + // there wasn't a "new" URL that mapped to it, so it was deleted. + for (std::map<GURL, size_t>::const_iterator i = all_old_urls.begin(); + i != all_old_urls.end(); ++i) { + if (i->second != kAlreadyFoundMarker) + deleted_urls->push_back(i->second); + } +} + +void TopSites::StartQueryForMostVisited() { + if (mock_history_service_) { + // Testing with a mockup. + // QueryMostVisitedURLs is not virtual, so we have to duplicate the code. + mock_history_service_->QueryMostVisitedURLs( + kTopSitesNumber, + kDaysOfHistory, + &cancelable_consumer_, + NewCallback(this, &TopSites::OnTopSitesAvailable)); + } else { + HistoryService* hs = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); + // |hs| may be null during unit tests. + if (hs) { + hs->QueryMostVisitedURLs( + kTopSitesNumber, + kDaysOfHistory, + &cancelable_consumer_, + NewCallback(this, &TopSites::OnTopSitesAvailable)); + } else { + LOG(INFO) << "History Service not available."; + } + } +} + +void TopSites::StartMigration() { + migration_in_progress_ = true; + StartQueryForMostVisited(); +} + +base::TimeDelta TopSites::GetUpdateDelay() { + if (top_sites_.size() == 0) + return base::TimeDelta::FromSeconds(30); + + int64 range = kMaxUpdateIntervalMinutes - kMinUpdateIntervalMinutes; + int64 minutes = kMaxUpdateIntervalMinutes - + last_num_urls_changed_ * range / top_sites_.size(); + return base::TimeDelta::FromMinutes(minutes); +} + +void TopSites::OnTopSitesAvailable( + CancelableRequestProvider::Handle handle, + MostVisitedURLList pages) { + if (!pending_callbacks_.empty()) { + PendingCallbackSet copy(pending_callbacks_); + PendingCallbackSet::iterator i; + for (i = pending_callbacks_.begin(); + i != pending_callbacks_.end(); ++i) { + scoped_refptr<CancelableRequest<GetTopSitesCallback> > request = *i; + if (!request->canceled()) + request->ForwardResult(GetTopSitesCallback::TupleType(pages)); + } + pending_callbacks_.clear(); + } + + ChromeThread::PostTask(ChromeThread::DB, FROM_HERE, NewRunnableMethod( + this, &TopSites::UpdateMostVisited, pages)); +} + +void TopSites::OnThumbnailAvailable(CancelableRequestProvider::Handle handle, + scoped_refptr<RefCountedBytes> thumbnail) { + size_t index; + if (mock_history_service_) { + index = handle; + } else { + HistoryService* hs = profile_ ->GetHistoryService(Profile::EXPLICIT_ACCESS); + index = cancelable_consumer_.GetClientData(hs, handle); + } + DCHECK(static_cast<size_t>(index) < top_sites_.size()); + + if (migration_in_progress_) + migration_pending_urls_.erase(top_sites_[index].url); + + if (thumbnail.get() && thumbnail->size()) { + const MostVisitedURL& url = top_sites_[index]; + SetPageThumbnail(url.url, thumbnail, ThumbnailScore()); + } + + if (migration_in_progress_ && migration_pending_urls_.empty() && + !mock_history_service_) + OnMigrationDone(); +} + +void TopSites::SetMockHistoryService(MockHistoryService* mhs) { + mock_history_service_ = mhs; +} + +void TopSites::Observe(NotificationType type, + const NotificationSource& source, + const NotificationDetails& details) { + if (type == NotificationType::HISTORY_URLS_DELETED) { + Details<history::URLsDeletedDetails> deleted_details(details); + if (deleted_details->all_history) { + top_sites_.clear(); + ChromeThread::PostTask(ChromeThread::DB, FROM_HERE, + NewRunnableMethod(this, &TopSites::ResetDatabase)); + } else { + std::set<GURL>::iterator it; + for (it = deleted_details->urls.begin(); + it != deleted_details->urls.end(); ++it) { + for (size_t i = 0; i < top_sites_.size(); i++) { + if (top_sites_[i].url == *it) { + top_sites_.erase(top_sites_.begin() + i); + break; + } + } + } + } + StartQueryForMostVisited(); + } else if (type == NotificationType::NAV_ENTRY_COMMITTED) { + if (top_sites_.size() < kTopSitesNumber) { + const NavigationController::LoadCommittedDetails& load_details = + *Details<NavigationController::LoadCommittedDetails>(details).ptr(); + GURL url = load_details.entry->url(); + if (canonical_urls_.find(url) == canonical_urls_.end() && + HistoryService::CanAddURL(url)) { + // Add this page to the known pages in case the thumbnail comes + // in before we get the results. + MostVisitedURL mv; + mv.url = url; + mv.redirects.push_back(url); + top_sites_.push_back(mv); + size_t index = top_sites_.size() - 1; + StoreRedirectChain(top_sites_[index].redirects, index); + } + StartQueryForMostVisited(); + } + } +} + +void TopSites::ResetDatabase() { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::DB)); + db_.reset(new TopSitesDatabaseImpl()); + file_util::Delete(db_path_, false); + if (!db_->Init(db_path_)) { + NOTREACHED() << "Failed to initialize database."; + return; + } +} + +} // namespace history diff --git a/chrome/browser/history/top_sites.h b/chrome/browser/history/top_sites.h new file mode 100644 index 0000000..4cc6059 --- /dev/null +++ b/chrome/browser/history/top_sites.h @@ -0,0 +1,281 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_TOP_SITES_H_ +#define CHROME_BROWSER_HISTORY_TOP_SITES_H_ + +#include <map> +#include <set> +#include <string> +#include <vector> + +#include "base/basictypes.h" +#include "base/gtest_prod_util.h" +#include "base/lock.h" +#include "base/timer.h" +#include "base/ref_counted.h" +#include "base/ref_counted_memory.h" +#include "chrome/browser/cancelable_request.h" +#include "chrome/browser/history/history_types.h" +#include "chrome/browser/history/history.h" +#include "chrome/browser/history/page_usage_data.h" +#include "chrome/common/notification_service.h" +#include "chrome/common/thumbnail_score.h" +#include "googleurl/src/gurl.h" + +class SkBitmap; +class Profile; + +namespace history { + +class TopSitesBackend; +class TopSitesDatabase; +class TopSitesTest; + +typedef std::vector<MostVisitedURL> MostVisitedURLList; + +// Stores the data for the top "most visited" sites. This includes a cache of +// the most visited data from history, as well as the corresponding thumbnails +// of those sites. +// +// This class IS threadsafe. It is designed to be used from the UI thread of +// the browser (where history requests must be kicked off and received from) +// and from the I/O thread (where new tab page requests come in). Handling the +// new tab page requests on the I/O thread without proxying to the UI thread is +// a nontrivial performance win, especially when the browser is starting and +// the UI thread is busy. +class TopSites : public NotificationObserver, + public base::RefCountedThreadSafe<TopSites>, + public CancelableRequestProvider { + public: + explicit TopSites(Profile* profile); + + class MockHistoryService { + // A mockup of a HistoryService used for testing TopSites. + public: + virtual HistoryService::Handle QueryMostVisitedURLs( + int result_count, int days_back, + CancelableRequestConsumerBase* consumer, + HistoryService::QueryMostVisitedURLsCallback* callback) = 0; + virtual ~MockHistoryService() {} + virtual void GetPageThumbnail( + const GURL& page_url, + CancelableRequestConsumerTSimple<size_t>* consumer, + HistoryService::ThumbnailDataCallback* callback, + size_t index) = 0; + }; + + struct Images { + scoped_refptr<RefCountedBytes> thumbnail; + ThumbnailScore thumbnail_score; + + // TODO(brettw): this will eventually store the favicon. + // scoped_refptr<RefCountedBytes> favicon; + }; + + // Initializes TopSites. + void Init(const FilePath& db_name); + + // Sets the given thumbnail for the given URL. Returns true if the thumbnail + // was updated. False means either the URL wasn't known to us, or we felt + // that our current thumbnail was superior to the given one. + bool SetPageThumbnail(const GURL& url, + const SkBitmap& thumbnail, + const ThumbnailScore& score); + + // Callback for GetMostVisitedURLs. + typedef Callback1<const MostVisitedURLList&>::Type GetTopSitesCallback; + + // Returns a list of most visited URLs via a callback. + // NOTE: the callback may be called immediately if we have the data cached. + void GetMostVisitedURLs(CancelableRequestConsumer* consumer, + GetTopSitesCallback* callback); + + // Get a thumbnail for a given page. Returns true iff we have the thumbnail. + bool GetPageThumbnail(const GURL& url, RefCountedBytes** data) const; + + // For testing with a HistoryService mock. + void SetMockHistoryService(MockHistoryService* mhs); + + // Start reading thumbnails from the ThumbnailDatabase. + void StartMigration(); + + private: + friend class base::RefCountedThreadSafe<TopSites>; + friend class TopSitesTest; + FRIEND_TEST_ALL_PREFIXES(TopSitesTest, GetMostVisited); + FRIEND_TEST_ALL_PREFIXES(TopSitesTest, RealDatabase); + FRIEND_TEST_ALL_PREFIXES(TopSitesTest, MockDatabase); + FRIEND_TEST_ALL_PREFIXES(TopSitesTest, DeleteNotifications); + FRIEND_TEST_ALL_PREFIXES(TopSitesTest, GetUpdateDelay); + FRIEND_TEST_ALL_PREFIXES(TopSitesTest, Migration); + FRIEND_TEST_ALL_PREFIXES(TopSitesTest, QueueingRequestsForTopSites); + FRIEND_TEST_ALL_PREFIXES(TopSitesTest, CancelingRequestsForTopSites); + FRIEND_TEST_ALL_PREFIXES(TopSitesTest, AddTemporaryThumbnail); + + ~TopSites(); + + // Sets the thumbnail without writing to the database. Useful when + // reading last known top sites from the DB. + // Returns true if the thumbnail was set, false if the existing one is better. + bool SetPageThumbnailNoDB(const GURL& url, + const RefCountedBytes* thumbnail_data, + const ThumbnailScore& score); + + // A version of SetPageThumbnail that takes RefCountedBytes as + // returned by HistoryService. + bool SetPageThumbnail(const GURL& url, + const RefCountedBytes* thumbnail, + const ThumbnailScore& score); + + // Query history service for the list of available thumbnails. + void StartQueryForMostVisited(); + + // Query history service for the thumbnail for a given url. |index| + // is the index into top_sites_. + void StartQueryForThumbnail(size_t index); + + // Called when history service returns a list of top URLs. + void OnTopSitesAvailable(CancelableRequestProvider::Handle handle, + MostVisitedURLList data); + + // Called when history service returns a thumbnail. + void OnThumbnailAvailable(CancelableRequestProvider::Handle handle, + scoped_refptr<RefCountedBytes> thumbnail); + + // Saves the set of the top URLs visited by this user. The 0th item is the + // most popular. + // DANGER! This will clear all data from the input argument. + void StoreMostVisited(MostVisitedURLList* most_visited); + + // Saves the given set of redirects. The redirects are in order of the + // given vector, so [0] -> [1] -> [2]. + void StoreRedirectChain(const RedirectList& redirects, + size_t destination); + + // Each item in the most visited view can redirect elsewhere. This returns + // the canonical URL one identifying the site if the given URL does appear + // in the "top sites" list. + // + // If the given URL is not in the top sites, this will return an empty GURL. + GURL GetCanonicalURL(const GURL& url) const; + + // Finds the given URL in the redirect chain for the given TopSite, and + // returns the distance from the destination in hops that the given URL is. + // The URL is assumed to be in the list. The destination is 0. + static int GetRedirectDistanceForURL(const MostVisitedURL& most_visited, + const GURL& url); + + // Generates the diff of things that happened between "old" and "new." + // + // The URLs that are in "new" but not "old" will be have their index into + // "new" put in |added_urls|. The URLs that are in "old" but not "new" will + // have their index into "old" put into |deleted_urls|. + // + // URLs appearing in both old and new lists but having different indices will + // have their index into "new" be put into |moved_urls|. + static void DiffMostVisited(const MostVisitedURLList& old_list, + const MostVisitedURLList& new_list, + std::vector<size_t>* added_urls, + std::vector<size_t>* deleted_urls, + std::vector<size_t>* moved_urls); + + // Implementation of NotificationObserver. + virtual void Observe(NotificationType type, + const NotificationSource& source, + const NotificationDetails& details); + + // Returns the delay until the next update of history is needed. + // Uses num_urls_changed + base::TimeDelta GetUpdateDelay(); + + // The following methods must be run on the DB thread since they + // access the database. + + // Reads the database from disk. Called on startup to get the last + // known top sites. + void ReadDatabase(); + + // Write a thumbnail to database. + void WriteThumbnailToDB(const MostVisitedURL& url, + int url_rank, + const TopSites::Images& thumbnail); + + // Updates the top sites list and writes the difference to disk. + void UpdateMostVisited(MostVisitedURLList most_visited); + + // Deletes the database file, then reinitializes the database. + void ResetDatabase(); + + // Called after TopSites completes migration. + void OnMigrationDone(); + + // Add a thumbnail for an unknown url. See temp_thumbnails_map_. + void AddTemporaryThumbnail(const GURL& url, + const RefCountedBytes* thumbnail, + const ThumbnailScore& score); + + Profile* profile_; + // A mockup to use for testing. If NULL, use the real HistoryService + // from the profile_. See SetMockHistoryService. + MockHistoryService* mock_history_service_; + CancelableRequestConsumerTSimple<size_t> cancelable_consumer_; + mutable Lock lock_; + + // The cached version of the top sites. The 0th item in this vector is the + // #1 site. + MostVisitedURLList top_sites_; + + // The images corresponding to the top_sites. This is indexed by the URL of + // the top site, so this doesn't have to be shuffled around when the ordering + // changes of the top sites. Some top_sites_ entries may not have images. + std::map<GURL, Images> top_images_; + + // Generated from the redirects to and from the most visited pages, this + // maps the redirects to the index into top_sites_ that contains it. + std::map<GURL, size_t> canonical_urls_; + + // Timer for updating TopSites data. + base::OneShotTimer<TopSites> timer_; + + scoped_ptr<TopSitesDatabase> db_; + FilePath db_path_; + + NotificationRegistrar registrar_; + + // The number of URLs changed on the last update. + size_t last_num_urls_changed_; + + // Are we in the middle of migration from ThumbnailsDatabase to + // TopSites? + bool migration_in_progress_; + + // URLs for which we are expecting thumbnails. + std::set<GURL> migration_pending_urls_; + + // The map of requests for the top sites list. Can only be + // non-empty at startup. After we read the top sites from the DB, we'll + // always have a cached list. + typedef std::set<scoped_refptr<CancelableRequest<GetTopSitesCallback> > > + PendingCallbackSet; + PendingCallbackSet pending_callbacks_; + + // Are we waiting for the top sites from HistoryService? + bool waiting_for_results_; + + // Stores thumbnails for unknown pages. When SetPageThumbnail is + // called, if we don't know about that URL yet and we don't have + // enough Top Sites (new profile), we store it until the next + // UpdateMostVisitedURLs call. + std::map<GURL, Images> temp_thumbnails_map_; + + // TODO(brettw): use the blacklist. + // std::set<GURL> blacklist_; + + DISALLOW_COPY_AND_ASSIGN(TopSites); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_TOP_SITES_H_ diff --git a/chrome/browser/history/top_sites_database.cc b/chrome/browser/history/top_sites_database.cc new file mode 100644 index 0000000..99f0bb4 --- /dev/null +++ b/chrome/browser/history/top_sites_database.cc @@ -0,0 +1,329 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "app/sql/transaction.h" +#include "base/string_util.h" +#include "chrome/browser/diagnostics/sqlite_diagnostics.h" +#include "chrome/browser/history/top_sites.h" +#include "chrome/browser/history/top_sites_database.h" + +namespace history { + +TopSitesDatabaseImpl::TopSitesDatabaseImpl() { +} + +bool TopSitesDatabaseImpl::Init(const FilePath& db_name) { + // Settings copied from ThumbnailDatabase. + db_.set_error_delegate(GetErrorHandlerForThumbnailDb()); + db_.set_page_size(4096); + db_.set_cache_size(64); + + if (!db_.Open(db_name)) { + LOG(WARNING) << db_.GetErrorMessage(); + return false; + } + + return InitThumbnailTable(); +} + +bool TopSitesDatabaseImpl::InitThumbnailTable() { + if (!db_.DoesTableExist("thumbnails")) { + if (!db_.Execute("CREATE TABLE thumbnails (" + "url LONGVARCHAR PRIMARY KEY," + "url_rank INTEGER ," + "title LONGVARCHAR," + "thumbnail BLOB," + "redirects LONGVARCHAR," + "boring_score DOUBLE DEFAULT 1.0, " + "good_clipping INTEGER DEFAULT 0, " + "at_top INTEGER DEFAULT 0, " + "last_updated INTEGER DEFAULT 0) ")) { + LOG(WARNING) << db_.GetErrorMessage(); + return false; + } + } + return true; +} + +void TopSitesDatabaseImpl::GetPageThumbnails(MostVisitedURLList* urls, + std::map<GURL, + TopSites::Images>* thumbnails) { + sql::Statement statement(db_.GetCachedStatement( + SQL_FROM_HERE, + "SELECT url, url_rank, title, thumbnail, redirects, " + "boring_score, good_clipping, at_top, last_updated " + "FROM thumbnails ORDER BY url_rank ")); + + if (!statement) { + LOG(WARNING) << db_.GetErrorMessage(); + return; + } + + urls->clear(); + thumbnails->clear(); + + while (statement.Step()) { + // Results are sorted by url_rank. + MostVisitedURL url; + GURL gurl(statement.ColumnString(0)); + url.url = gurl; + url.title = statement.ColumnString16(2); + std::string redirects = statement.ColumnString(4); + SetRedirects(redirects, &url); + urls->push_back(url); + + std::vector<unsigned char> data; + statement.ColumnBlobAsVector(3, &data); + TopSites::Images thumbnail; + thumbnail.thumbnail = RefCountedBytes::TakeVector(&data); + thumbnail.thumbnail_score.boring_score = statement.ColumnDouble(5); + thumbnail.thumbnail_score.good_clipping = statement.ColumnBool(6); + thumbnail.thumbnail_score.at_top = statement.ColumnBool(7); + thumbnail.thumbnail_score.time_at_snapshot = + base::Time::FromInternalValue(statement.ColumnInt64(8)); + + (*thumbnails)[gurl] = thumbnail; + } +} + +// static +std::string TopSitesDatabaseImpl::GetRedirects(const MostVisitedURL& url) { + std::vector<std::string> redirects; + for (size_t i = 0; i < url.redirects.size(); i++) + redirects.push_back(url.redirects[i].spec()); + return JoinString(redirects, ' '); +} + +// static +void TopSitesDatabaseImpl::SetRedirects(const std::string& redirects, + MostVisitedURL* url) { + std::vector<std::string> redirects_vector; + SplitStringAlongWhitespace(redirects, &redirects_vector); + for (size_t i = 0; i < redirects_vector.size(); i++) + url->redirects.push_back(GURL(redirects_vector[i])); +} + +void TopSitesDatabaseImpl::SetPageThumbnail(const MostVisitedURL& url, + int new_rank, + const TopSites::Images& thumbnail) { + sql::Transaction transaction(&db_); + transaction.Begin(); + + int rank = GetURLRank(url); + if (rank == -1) { + AddPageThumbnail(url, new_rank, thumbnail); + } else { + UpdatePageRankNoTransaction(url, new_rank); + UpdatePageThumbnail(url, thumbnail); + } + + transaction.Commit(); +} + +void TopSitesDatabaseImpl::UpdatePageThumbnail( + const MostVisitedURL& url, const TopSites::Images& thumbnail) { + sql::Statement statement(db_.GetCachedStatement( + SQL_FROM_HERE, + "UPDATE thumbnails SET " + "title = ?, thumbnail = ?, redirects = ?, " + "boring_score = ?, good_clipping = ?, at_top = ?, last_updated = ? " + "WHERE url = ? ")); + if (!statement) + return; + + statement.BindString16(0, url.title); + if (thumbnail.thumbnail.get()) { + statement.BindBlob(1, &thumbnail.thumbnail->data.front(), + static_cast<int>(thumbnail.thumbnail->data.size())); + } + statement.BindString(2, GetRedirects(url)); + const ThumbnailScore& score = thumbnail.thumbnail_score; + statement.BindDouble(3, score.boring_score); + statement.BindBool(4, score.good_clipping); + statement.BindBool(5, score.at_top); + statement.BindInt64(6, score.time_at_snapshot.ToInternalValue()); + statement.BindString(7, url.url.spec()); + if (!statement.Run()) + NOTREACHED() << db_.GetErrorMessage(); +} + +void TopSitesDatabaseImpl::AddPageThumbnail(const MostVisitedURL& url, + int new_rank, + const TopSites::Images& thumbnail) { + int count = GetRowCount(); + + sql::Statement statement(db_.GetCachedStatement( + SQL_FROM_HERE, + "INSERT OR REPLACE INTO thumbnails " + "(url, url_rank, title, thumbnail, redirects, " + "boring_score, good_clipping, at_top, last_updated) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)")); + if (!statement) + return; + + statement.BindString(0, url.url.spec()); + statement.BindInt(1, count); // Make it the last url. + statement.BindString16(2, url.title); + if (thumbnail.thumbnail.get()) { + statement.BindBlob(3, &thumbnail.thumbnail->data.front(), + static_cast<int>(thumbnail.thumbnail->data.size())); + } + statement.BindString(4, GetRedirects(url)); + const ThumbnailScore& score = thumbnail.thumbnail_score; + statement.BindDouble(5, score.boring_score); + statement.BindBool(6, score.good_clipping); + statement.BindBool(7, score.at_top); + statement.BindInt64(8, score.time_at_snapshot.ToInternalValue()); + if (!statement.Run()) + NOTREACHED() << db_.GetErrorMessage(); + + UpdatePageRankNoTransaction(url, new_rank); +} + +void TopSitesDatabaseImpl::UpdatePageRank(const MostVisitedURL& url, + int new_rank) { + sql::Transaction transaction(&db_); + transaction.Begin(); + UpdatePageRankNoTransaction(url, new_rank); + transaction.Commit(); +} + +// Caller should have a transaction open. +void TopSitesDatabaseImpl::UpdatePageRankNoTransaction( + const MostVisitedURL& url, int new_rank) { + int prev_rank = GetURLRank(url); + if (prev_rank == -1) { + NOTREACHED() << "Updating rank of an unknown URL: " << url.url.spec(); + return; + } + + // Shift the ranks. + if (prev_rank > new_rank) { + // Shift up + sql::Statement shift_statement(db_.GetCachedStatement( + SQL_FROM_HERE, + "UPDATE thumbnails " + "SET url_rank = url_rank + 1 " + "WHERE url_rank >= ? AND url_rank < ?")); + shift_statement.BindInt(0, new_rank); + shift_statement.BindInt(1, prev_rank); + if (shift_statement) + shift_statement.Run(); + } else if (prev_rank < new_rank) { + // Shift down + sql::Statement shift_statement(db_.GetCachedStatement( + SQL_FROM_HERE, + "UPDATE thumbnails " + "SET url_rank = url_rank - 1 " + "WHERE url_rank > ? AND url_rank <= ?")); + shift_statement.BindInt(0, prev_rank); + shift_statement.BindInt(1, new_rank); + if (shift_statement) + shift_statement.Run(); + } + + // Set the url's rank. + sql::Statement set_statement(db_.GetCachedStatement( + SQL_FROM_HERE, + "UPDATE thumbnails " + "SET url_rank = ? " + "WHERE url == ?")); + set_statement.BindInt(0, new_rank); + set_statement.BindString(1, url.url.spec()); + if (set_statement) + set_statement.Run(); +} + +bool TopSitesDatabaseImpl::GetPageThumbnail(const GURL& url, + TopSites::Images* thumbnail) { + sql::Statement statement(db_.GetCachedStatement( + SQL_FROM_HERE, + "SELECT thumbnail, boring_score, good_clipping, at_top, last_updated " + "FROM thumbnails WHERE url=?")); + + if (!statement) { + LOG(WARNING) << db_.GetErrorMessage(); + return false; + } + + statement.BindString(0, url.spec()); + if (!statement.Step()) + return false; + + std::vector<unsigned char> data; + statement.ColumnBlobAsVector(0, &data); + thumbnail->thumbnail = RefCountedBytes::TakeVector(&data); + thumbnail->thumbnail_score.boring_score = statement.ColumnDouble(1); + thumbnail->thumbnail_score.good_clipping = statement.ColumnBool(2); + thumbnail->thumbnail_score.at_top = statement.ColumnBool(3); + thumbnail->thumbnail_score.time_at_snapshot = + base::Time::FromInternalValue(statement.ColumnInt64(4)); + return true; +} + +int TopSitesDatabaseImpl::GetRowCount() { + int result = 0; + sql::Statement select_statement(db_.GetCachedStatement( + SQL_FROM_HERE, + "SELECT COUNT (url) FROM thumbnails")); + if (!select_statement) { + LOG(WARNING) << db_.GetErrorMessage(); + return result; + } + + if (select_statement.Step()) + result = select_statement.ColumnInt(0); + + return result; +} + +int TopSitesDatabaseImpl::GetURLRank(const MostVisitedURL& url) { + int result = -1; + sql::Statement select_statement(db_.GetCachedStatement( + SQL_FROM_HERE, + "SELECT url_rank " + "FROM thumbnails WHERE url=?")); + if (!select_statement) { + LOG(WARNING) << db_.GetErrorMessage(); + return result; + } + + select_statement.BindString(0, url.url.spec()); + if (select_statement.Step()) + result = select_statement.ColumnInt(0); + + return result; +} + +// Remove the record for this URL. Returns true iff removed successfully. +bool TopSitesDatabaseImpl::RemoveURL(const MostVisitedURL& url) { + int old_rank = GetURLRank(url); + if (old_rank < 0) + return false; + + sql::Transaction transaction(&db_); + transaction.Begin(); + // Decrement all following ranks. + sql::Statement shift_statement(db_.GetCachedStatement( + SQL_FROM_HERE, + "UPDATE thumbnails " + "SET url_rank = url_rank - 1 " + "WHERE url_rank > ?")); + if (!shift_statement) + return false; + shift_statement.BindInt(0, old_rank); + shift_statement.Run(); + + sql::Statement delete_statement( + db_.GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM thumbnails WHERE url = ?")); + if (!delete_statement) + return false; + delete_statement.BindString(0, url.url.spec()); + delete_statement.Run(); + + return transaction.Commit(); +} + +} // namespace history diff --git a/chrome/browser/history/top_sites_database.h b/chrome/browser/history/top_sites_database.h new file mode 100644 index 0000000..cfb362c --- /dev/null +++ b/chrome/browser/history/top_sites_database.h @@ -0,0 +1,136 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_TOP_SITES_DATABASE_H_ +#define CHROME_BROWSER_HISTORY_TOP_SITES_DATABASE_H_ + +#include <map> +#include <string> +#include <vector> + +#include "app/sql/connection.h" +#include "base/ref_counted.h" +#include "chrome/browser/history/history_types.h" +#include "chrome/browser/history/url_database.h" // For DBCloseScoper. + +class FilePath; +class RefCountedMemory; +class SkBitmap; +class TopSites; + +namespace base { +class Time; +} + +namespace history { + +// Interface to be implemented by the real storage layer as well as +// the mockup database for testing. +class TopSitesDatabase { + public: + virtual ~TopSitesDatabase() {} + virtual bool Init(const FilePath& filename) { + return true; + } + + // Returns a list of all URLs currently in the table. + virtual void GetPageThumbnails(MostVisitedURLList* urls, + std::map<GURL, + TopSites::Images>* thumbnails) = 0; + + // Set a thumbnail for a URL. |url_rank| is the position of the URL + // in the list of TopURLs, zero-based. + // If the URL is not in the table, add it. If it is, replace its + // thumbnail. + virtual void SetPageThumbnail(const MostVisitedURL& url, + int url_rank, + const TopSites::Images& thumbnail) = 0; + + // Update rank of a URL that's already in the database. + virtual void UpdatePageRank(const MostVisitedURL& url, int new_rank) = 0; + + // Convenience wrapper. + bool GetPageThumbnail(const MostVisitedURL& url, + TopSites::Images* thumbnail) { + return GetPageThumbnail(url.url, thumbnail); + } + + // Get a thumbnail for a given page. Returns true iff we have the thumbnail. + virtual bool GetPageThumbnail(const GURL& url, + TopSites::Images* thumbnail) = 0; + + // Remove the record for this URL. Returns true iff removed successfully. + virtual bool RemoveURL(const MostVisitedURL& url) = 0; +}; + +class TopSitesDatabaseImpl : public TopSitesDatabase { + public: + TopSitesDatabaseImpl(); + ~TopSitesDatabaseImpl() {} + + // Must be called after creation but before any other methods are called. + // Returns true on success. If false, no other functions should be called. + virtual bool Init(const FilePath& db_name); + + // Thumbnails ---------------------------------------------------------------- + + // Returns a list of all URLs currently in the table. + // WARNING: clears both input arguments. + virtual void GetPageThumbnails(MostVisitedURLList* urls, + std::map<GURL, TopSites::Images>* thumbnails); + + // Set a thumbnail for a URL. |url_rank| is the position of the URL + // in the list of TopURLs, zero-based. + // If the URL is not in the table, add it. If it is, replace its + // thumbnail and rank. Shift the ranks of other URLs if necessary. + virtual void SetPageThumbnail(const MostVisitedURL& url, + int new_rank, + const TopSites::Images& thumbnail); + + // Sets the rank for a given URL. The URL must be in the database. + // Use SetPageThumbnail if it's not. + virtual void UpdatePageRank(const MostVisitedURL& url, int new_rank); + + // Get a thumbnail for a given page. Returns true iff we have the thumbnail. + virtual bool GetPageThumbnail(const GURL& url, + TopSites::Images* thumbnail); + + // Remove the record for this URL. Returns true iff removed successfully. + virtual bool RemoveURL(const MostVisitedURL& url); + + private: + // Creates the thumbnail table, returning true if the table already exists + // or was successfully created. + bool InitThumbnailTable(); + + // Adds a new URL to the database. + void AddPageThumbnail(const MostVisitedURL& url, + int new_rank, + const TopSites::Images& thumbnail); + + // Sets the page rank. Should be called within an open transaction. + void UpdatePageRankNoTransaction(const MostVisitedURL& url, int new_rank); + + // Updates thumbnail of a URL that's already in the database. + void UpdatePageThumbnail(const MostVisitedURL& url, + const TopSites::Images& thumbnail); + + // Returns the URL's current rank or -1 if it is not present. + int GetURLRank(const MostVisitedURL& url); + + // Returns the number of URLs (rows) in the database. + int GetRowCount(); + + // Encodes redirects into a string. + static std::string GetRedirects(const MostVisitedURL& url); + + // Decodes redirects from a string and sets them for the url. + static void SetRedirects(const std::string& redirects, MostVisitedURL* url); + + sql::Connection db_; +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_TOP_SITES_DATABASE_H_ diff --git a/chrome/browser/history/top_sites_unittest.cc b/chrome/browser/history/top_sites_unittest.cc new file mode 100644 index 0000000..a6b7e7b --- /dev/null +++ b/chrome/browser/history/top_sites_unittest.cc @@ -0,0 +1,950 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/scoped_temp_dir.h" +#include "base/string_util.h" +#include "chrome/browser/history/top_sites.h" +#include "chrome/common/chrome_paths.h" +#include "chrome/browser/history/history_marshaling.h" +#include "chrome/browser/history/top_sites_database.h" +#include "chrome/browser/history/history_notifications.h" +#include "chrome/test/testing_profile.h" +#include "chrome/tools/profiles/thumbnail-inl.h" +#include "gfx/codec/jpeg_codec.h" +#include "googleurl/src/gurl.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/skia/include/core/SkBitmap.h" + + +namespace history { + +static const unsigned char kBlob[] = + "12346102356120394751634516591348710478123649165419234519234512349134"; + +class TopSitesTest : public testing::Test { + public: + TopSitesTest() : number_of_callbacks_(0) { + } + ~TopSitesTest() { + } + + TopSites& top_sites() { return *top_sites_; } + MostVisitedURLList& urls() { return urls_; } + Profile& profile() {return *profile_;} + FilePath& file_name() { return file_name_; } + RefCountedBytes* google_thumbnail() { return google_thumbnail_; } + RefCountedBytes* random_thumbnail() { return random_thumbnail_; } + RefCountedBytes* weewar_thumbnail() { return weewar_thumbnail_; } + CancelableRequestConsumer* consumer() { return &consumer_; } + size_t number_of_callbacks() {return number_of_callbacks_; } + + virtual void SetUp() { + profile_.reset(new TestingProfile); + top_sites_ = new TopSites(profile_.get()); + + ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); + file_name_ = temp_dir_.path().AppendASCII("TopSites.db"); + EXPECT_TRUE(file_util::Delete(file_name_, false)); + + std::vector<unsigned char> random_data(kBlob, kBlob + sizeof(kBlob)); + std::vector<unsigned char> google_data(kGoogleThumbnail, + kGoogleThumbnail + + sizeof(kGoogleThumbnail)); + std::vector<unsigned char> weewar_data(kWeewarThumbnail, + kWeewarThumbnail + + sizeof(kWeewarThumbnail)); + random_thumbnail_ = new RefCountedBytes(random_data); + google_thumbnail_ = new RefCountedBytes(google_data); + weewar_thumbnail_ = new RefCountedBytes(weewar_data); + } + + virtual void TearDown() { + profile_.reset(); + top_sites_ = NULL; + EXPECT_TRUE(file_util::Delete(file_name_, false)); + } + + // Callback for TopSites::GetMostVisitedURLs. + void OnTopSitesAvailable(const history::MostVisitedURLList& data) { + urls_ = data; + number_of_callbacks_++; + } + + // Wrappers that allow private TopSites functions to be called from the + // individual tests without making them all be friends. + GURL GetCanonicalURL(const GURL& url) const { + AutoLock lock(top_sites_->lock_); // The function asserts it's in the lock. + return top_sites_->GetCanonicalURL(url); + } + + void StoreMostVisited(std::vector<MostVisitedURL>* urls) { + AutoLock lock(top_sites_->lock_); // The function asserts it's in the lock. + top_sites_->StoreMostVisited(urls); + } + + static void DiffMostVisited(const std::vector<MostVisitedURL>& old_list, + const std::vector<MostVisitedURL>& new_list, + std::vector<size_t>* added_urls, + std::vector<size_t>* deleted_urls, + std::vector<size_t>* moved_urls) { + TopSites::DiffMostVisited(old_list, new_list, + added_urls, deleted_urls, moved_urls); + } + + private: + scoped_refptr<TopSites> top_sites_; + MostVisitedURLList urls_; + size_t number_of_callbacks_; + scoped_ptr<TestingProfile> profile_; + ScopedTempDir temp_dir_; + FilePath file_name_; // Database filename. + scoped_refptr<RefCountedBytes> google_thumbnail_; + scoped_refptr<RefCountedBytes> random_thumbnail_; + scoped_refptr<RefCountedBytes> weewar_thumbnail_; + MessageLoop message_loop_; + CancelableRequestConsumer consumer_; + + DISALLOW_COPY_AND_ASSIGN(TopSitesTest); +}; + +// A mockup of a HistoryService used for testing TopSites. +class MockHistoryServiceImpl : public TopSites::MockHistoryService { + public: + MockHistoryServiceImpl() : num_thumbnail_requests_(0) {} + + // Calls the callback directly with the results. + HistoryService::Handle QueryMostVisitedURLs( + int result_count, int days_back, + CancelableRequestConsumerBase* consumer, + HistoryService::QueryMostVisitedURLsCallback* callback) { + callback->Run(CancelableRequestProvider::Handle(0), // Handle is unused. + most_visited_urls_); + delete callback; + return 0; + } + + // Add a page to the end of the pages list. + void AppendMockPage(const GURL& url, + const string16& title) { + MostVisitedURL page; + page.url = url; + page.title = title; + page.redirects = RedirectList(); + page.redirects.push_back(url); + most_visited_urls_.push_back(page); + } + + // Removes the last URL in the list. + void RemoveMostVisitedURL() { + most_visited_urls_.pop_back(); + } + + virtual void GetPageThumbnail( + const GURL& url, + CancelableRequestConsumerTSimple<size_t>* consumer, + HistoryService::ThumbnailDataCallback* callback, + size_t index) { + num_thumbnail_requests_++; + MostVisitedURL mvu; + mvu.url = url; + MostVisitedURLList::iterator pos = std::find(most_visited_urls_.begin(), + most_visited_urls_.end(), + mvu); + EXPECT_TRUE(pos != most_visited_urls_.end()); + scoped_refptr<RefCountedBytes> thumbnail; + callback->Run(index, thumbnail); + delete callback; + } + + void ResetNumberOfThumbnailRequests() { + num_thumbnail_requests_ = 0; + } + + int GetNumberOfThumbnailRequests() { + return num_thumbnail_requests_; + } + + private: + MostVisitedURLList most_visited_urls_; + int num_thumbnail_requests_; // Number of calls to GetPageThumbnail. +}; + + +// A mockup of a TopSitesDatabase used for testing TopSites. +class MockTopSitesDatabaseImpl : public TopSitesDatabase { + public: + virtual void GetPageThumbnails(MostVisitedURLList* urls, + std::map<GURL, TopSites::Images>* thumbnails) { + // Return a copy of the vector. + *urls = top_sites_list_; + *thumbnails = thumbnails_map_; + } + + virtual void SetPageThumbnail(const MostVisitedURL& url, int url_rank, + const TopSites::Images& thumbnail) { + SetPageRank(url, url_rank); + // Update thubmnail + thumbnails_map_[url.url] = thumbnail; + } + + virtual void UpdatePageRank(const MostVisitedURL& url, int new_rank) { + MostVisitedURLList::iterator pos = std::find(top_sites_list_.begin(), + top_sites_list_.end(), + url); + // Is it in the right position? + int rank = pos - top_sites_list_.begin(); + if (rank != new_rank) { + // Move the URL to a new position. + top_sites_list_.erase(pos); + top_sites_list_.insert(top_sites_list_.begin() + new_rank, url); + } + } + + virtual void SetPageRank(const MostVisitedURL& url, int url_rank) { + // Check if this url is in the list, and at which position. + MostVisitedURLList::iterator pos = std::find(top_sites_list_.begin(), + top_sites_list_.end(), + url); + if (pos == top_sites_list_.end()) { + // Add it to the list. + top_sites_list_.insert(top_sites_list_.begin() + url_rank, url); + } else { + UpdatePageRank(url, url_rank); + } + } + + // Get a thumbnail for a given page. Returns true iff we have the thumbnail. + virtual bool GetPageThumbnail(const GURL& url, + TopSites::Images* thumbnail) { + std::map<GURL, TopSites::Images>::const_iterator found = + thumbnails_map_.find(url); + if (found == thumbnails_map_.end()) + return false; // No thumbnail for this URL. + + thumbnail->thumbnail = found->second.thumbnail; + thumbnail->thumbnail_score = found->second.thumbnail_score; + return true; + } + + virtual bool RemoveURL(const MostVisitedURL& url) { + // Comparison by url. + MostVisitedURLList::iterator pos = std::find(top_sites_list_.begin(), + top_sites_list_.end(), + url); + if (pos == top_sites_list_.end()) { + return false; + } + top_sites_list_.erase(pos); + thumbnails_map_.erase(url.url); + return true; + } + + private: + MostVisitedURLList top_sites_list_; // Keeps the URLs sorted by score (rank). + std::map<GURL, TopSites::Images> thumbnails_map_; +}; + + +// Helper function for appending a URL to a vector of "most visited" URLs, +// using the default values for everything but the URL. +static void AppendMostVisitedURL(std::vector<MostVisitedURL>* list, + const GURL& url) { + MostVisitedURL mv; + mv.url = url; + mv.redirects.push_back(url); + list->push_back(mv); +} + +// Returns true if t1 and t2 contain the same data. +static bool ThumbnailsAreEqual(RefCountedBytes* t1, + RefCountedBytes* t2) { + if (!t1 || !t2) + return false; + if (t1->data.size() != t2->data.size()) + return false; + return std::equal(t1->data.begin(), + t1->data.end(), + t2->data.begin()); +} + +// Same as AppendMostVisitedURL except that it adds a redirect from the first +// URL to the second. +static void AppendMostVisitedURLWithRedirect( + std::vector<MostVisitedURL>* list, + const GURL& redirect_source, const GURL& redirect_dest) { + MostVisitedURL mv; + mv.url = redirect_dest; + mv.redirects.push_back(redirect_source); + mv.redirects.push_back(redirect_dest); + list->push_back(mv); +} + +TEST_F(TopSitesTest, GetCanonicalURL) { + ChromeThread db_loop(ChromeThread::DB, MessageLoop::current()); + // Have two chains: + // google.com -> www.google.com + // news.google.com (no redirects) + GURL news("http://news.google.com/"); + GURL source("http://google.com/"); + GURL dest("http://www.google.com/"); + + std::vector<MostVisitedURL> most_visited; + AppendMostVisitedURLWithRedirect(&most_visited, source, dest); + AppendMostVisitedURL(&most_visited, news); + StoreMostVisited(&most_visited); + + // Random URLs not in the database shouldn't be reported as being in there. + GURL result = GetCanonicalURL(GURL("http://fark.com/")); + EXPECT_TRUE(result.is_empty()); + + // Easy case, there are no redirects and the exact URL is stored. + result = GetCanonicalURL(news); + EXPECT_EQ(news, result); + + // The URL in question is the source URL in a redirect list. + result = GetCanonicalURL(source); + EXPECT_EQ(dest, result); + + // The URL in question is the destination of a redirect. + result = GetCanonicalURL(dest); + EXPECT_EQ(dest, result); +} + +TEST_F(TopSitesTest, DiffMostVisited) { + GURL stays_the_same("http://staysthesame/"); + GURL gets_added_1("http://getsadded1/"); + GURL gets_added_2("http://getsadded2/"); + GURL gets_deleted_1("http://getsdeleted2/"); + GURL gets_moved_1("http://getsmoved1/"); + + std::vector<MostVisitedURL> old_list; + AppendMostVisitedURL(&old_list, stays_the_same); // 0 (unchanged) + AppendMostVisitedURL(&old_list, gets_deleted_1); // 1 (deleted) + AppendMostVisitedURL(&old_list, gets_moved_1); // 2 (moved to 3) + + std::vector<MostVisitedURL> new_list; + AppendMostVisitedURL(&new_list, stays_the_same); // 0 (unchanged) + AppendMostVisitedURL(&new_list, gets_added_1); // 1 (added) + AppendMostVisitedURL(&new_list, gets_added_2); // 2 (added) + AppendMostVisitedURL(&new_list, gets_moved_1); // 3 (moved from 2) + + std::vector<size_t> added; + std::vector<size_t> deleted; + std::vector<size_t> moved; + DiffMostVisited(old_list, new_list, &added, &deleted, &moved); + + ASSERT_EQ(2u, added.size()); + ASSERT_EQ(1u, deleted.size()); + ASSERT_EQ(1u, moved.size()); + + // There should be 2 URLs added, we don't assume what order they're in inside + // the result vector. + EXPECT_TRUE(added[0] == 1 || added[1] == 1); + EXPECT_TRUE(added[0] == 2 || added[1] == 2); + + EXPECT_EQ(1u, deleted[0]); + EXPECT_EQ(3u, moved[0]); +} + +TEST_F(TopSitesTest, SetPageThumbnail) { + ChromeThread db_loop(ChromeThread::DB, MessageLoop::current()); + GURL url1a("http://google.com/"); + GURL url1b("http://www.google.com/"); + GURL url2("http://images.google.com/"); + GURL invalid_url("chrome://favicon/http://google.com/"); + + std::vector<MostVisitedURL> list; + AppendMostVisitedURL(&list, url2); + + MostVisitedURL mv; + mv.url = url1b; + mv.redirects.push_back(url1a); + mv.redirects.push_back(url1b); + list.push_back(mv); + + // Save our most visited data containing that one site. + StoreMostVisited(&list); + + // Create a dummy thumbnail. + SkBitmap thumbnail; + thumbnail.setConfig(SkBitmap::kARGB_8888_Config, 4, 4); + thumbnail.allocPixels(); + thumbnail.eraseRGB(0x00, 0x00, 0x00); + + base::Time now = base::Time::Now(); + ThumbnailScore low_score(1.0, true, true, now); + ThumbnailScore medium_score(0.5, true, true, now); + ThumbnailScore high_score(0.0, true, true, now); + + // Setting the thumbnail for invalid pages should fail. + EXPECT_FALSE(top_sites().SetPageThumbnail(invalid_url, + thumbnail, medium_score)); + + // Setting the thumbnail for url2 should succeed, lower scores shouldn't + // replace it, higher scores should. + EXPECT_TRUE(top_sites().SetPageThumbnail(url2, thumbnail, medium_score)); + EXPECT_FALSE(top_sites().SetPageThumbnail(url2, thumbnail, low_score)); + EXPECT_TRUE(top_sites().SetPageThumbnail(url2, thumbnail, high_score)); + + // Set on the redirect source should succeed. It should be replacable by + // the same score on the redirect destination, which in turn should not + // be replaced by the source again. + EXPECT_TRUE(top_sites().SetPageThumbnail(url1a, thumbnail, medium_score)); + EXPECT_TRUE(top_sites().SetPageThumbnail(url1b, thumbnail, medium_score)); + EXPECT_FALSE(top_sites().SetPageThumbnail(url1a, thumbnail, medium_score)); +} + +TEST_F(TopSitesTest, GetMostVisited) { + ChromeThread db_loop(ChromeThread::DB, MessageLoop::current()); + GURL news("http://news.google.com/"); + GURL google("http://google.com/"); + + MockHistoryServiceImpl hs; + hs.AppendMockPage(news, ASCIIToUTF16("Google News")); + hs.AppendMockPage(google, ASCIIToUTF16("Google")); + top_sites().SetMockHistoryService(&hs); + + top_sites().StartQueryForMostVisited(); + MessageLoop::current()->RunAllPending(); + top_sites().GetMostVisitedURLs( + consumer(), + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + ASSERT_EQ(2u, urls().size()); + EXPECT_EQ(news, urls()[0].url); + EXPECT_EQ(google, urls()[1].url); +} + +TEST_F(TopSitesTest, MockDatabase) { + ChromeThread db_loop(ChromeThread::DB, MessageLoop::current()); + MockTopSitesDatabaseImpl* db = new MockTopSitesDatabaseImpl; + // |db| is destroyed when the top_sites is destroyed in TearDown. + top_sites().db_.reset(db); + MostVisitedURL url; + GURL asdf_url("http://asdf.com"); + string16 asdf_title(ASCIIToUTF16("ASDF")); + GURL google_url("http://google.com"); + string16 google_title(ASCIIToUTF16("Google")); + GURL news_url("http://news.google.com"); + string16 news_title(ASCIIToUTF16("Google News")); + + url.url = asdf_url; + url.title = asdf_title; + url.redirects.push_back(url.url); + TopSites::Images thumbnail; + db->SetPageThumbnail(url, 0, thumbnail); + + top_sites().ReadDatabase(); + + top_sites().GetMostVisitedURLs( + consumer(), + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + ASSERT_EQ(1u, urls().size()); + EXPECT_EQ(asdf_url, urls()[0].url); + EXPECT_EQ(asdf_title, urls()[0].title); + + MostVisitedURL url2; + url2.url = google_url; + url2.title = google_title; + url2.redirects.push_back(url2.url); + + // Add new thumbnail at rank 0 and shift the other result to 1. + db->SetPageThumbnail(url2, 0, thumbnail); + + top_sites().ReadDatabase(); + + top_sites().GetMostVisitedURLs( + consumer(), + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + ASSERT_EQ(2u, urls().size()); + EXPECT_EQ(google_url, urls()[0].url); + EXPECT_EQ(google_title, urls()[0].title); + EXPECT_EQ(asdf_url, urls()[1].url); + EXPECT_EQ(asdf_title, urls()[1].title); + + MockHistoryServiceImpl hs; + // Add one old, one new URL to the history. + hs.AppendMockPage(google_url, google_title); + hs.AppendMockPage(news_url, news_title); + top_sites().SetMockHistoryService(&hs); + + // This writes the new data to the DB. + top_sites().StartQueryForMostVisited(); + MessageLoop::current()->RunAllPending(); + + std::map<GURL, TopSites::Images> thumbnails; + MostVisitedURLList result; + db->GetPageThumbnails(&result, &thumbnails); + ASSERT_EQ(2u, result.size()); + EXPECT_EQ(google_title, result[0].title); + EXPECT_EQ(news_title, result[1].title); +} + +// Test TopSitesDatabaseImpl. +TEST_F(TopSitesTest, TopSitesDB) { + TopSitesDatabaseImpl db; + + ASSERT_TRUE(db.Init(file_name())); + + MostVisitedURL url; + GURL asdf_url("http://asdf.com"); + string16 asdf_title(ASCIIToUTF16("ASDF")); + GURL google_url("http://google.com"); + string16 google_title(ASCIIToUTF16("Google")); + GURL news_url("http://news.google.com"); + string16 news_title(ASCIIToUTF16("Google News")); + + url.url = asdf_url; + url.title = asdf_title; + url.redirects.push_back(url.url); + TopSites::Images thumbnail; + thumbnail.thumbnail = random_thumbnail(); + // Add asdf at rank 0. + db.SetPageThumbnail(url, 0, thumbnail); + + TopSites::Images result; + EXPECT_TRUE(db.GetPageThumbnail(url.url, &result)); + EXPECT_EQ(thumbnail.thumbnail->data.size(), result.thumbnail->data.size()); + EXPECT_TRUE(ThumbnailsAreEqual(thumbnail.thumbnail, result.thumbnail)); + + MostVisitedURLList urls; + std::map<GURL, TopSites::Images> thumbnails; + db.GetPageThumbnails(&urls, &thumbnails); + ASSERT_EQ(1u, urls.size()); + EXPECT_EQ(asdf_url, urls[0].url); + EXPECT_EQ(asdf_title, urls[0].title); + + url.url = google_url; + url.title = google_title; + + // Add google at rank 1 - no rank shifting. + db.SetPageThumbnail(url, 1, thumbnail); + db.GetPageThumbnails(&urls, &thumbnails); + ASSERT_EQ(2u, urls.size()); + EXPECT_EQ(asdf_url, urls[0].url); + EXPECT_EQ(asdf_title, urls[0].title); + EXPECT_EQ(google_url, urls[1].url); + EXPECT_EQ(google_title, urls[1].title); + + url.url = news_url; + url.title = news_title; + + // Add news at rank 1 - shift google to rank 2. + db.SetPageThumbnail(url, 1, thumbnail); + db.GetPageThumbnails(&urls, &thumbnails); + ASSERT_EQ(3u, urls.size()); + EXPECT_EQ(asdf_url, urls[0].url); + EXPECT_EQ(news_url, urls[1].url); + EXPECT_EQ(google_url, urls[2].url); + + // Move news at rank 0 - shift the rest up. + db.SetPageThumbnail(url, 0, thumbnail); + db.GetPageThumbnails(&urls, &thumbnails); + ASSERT_EQ(3u, urls.size()); + EXPECT_EQ(news_url, urls[0].url); + EXPECT_EQ(asdf_url, urls[1].url); + EXPECT_EQ(google_url, urls[2].url); + + // Move news at rank 2 - shift the rest down. + db.SetPageThumbnail(url, 2, thumbnail); + db.GetPageThumbnails(&urls, &thumbnails); + ASSERT_EQ(3u, urls.size()); + EXPECT_EQ(asdf_url, urls[0].url); + EXPECT_EQ(google_url, urls[1].url); + EXPECT_EQ(news_url, urls[2].url); + + // Delete asdf. + url.url = asdf_url; + db.RemoveURL(url); + + db.GetPageThumbnails(&urls, &thumbnails); + ASSERT_EQ(2u, urls.size()); + EXPECT_EQ(google_url, urls[0].url); + EXPECT_EQ(news_url, urls[1].url); +} + +// Test TopSites with a real database. +TEST_F(TopSitesTest, RealDatabase) { + ChromeThread db_loop(ChromeThread::DB, MessageLoop::current()); + TopSitesDatabaseImpl* db = new TopSitesDatabaseImpl; + + ASSERT_TRUE(db->Init(file_name())); + // |db| is destroyed when the top_sites is destroyed in TearDown. + top_sites().db_.reset(db); + MostVisitedURL url; + GURL asdf_url("http://asdf.com"); + string16 asdf_title(ASCIIToUTF16("ASDF")); + GURL google1_url("http://google.com"); + GURL google2_url("http://google.com/redirect"); + GURL google3_url("http://www.google.com"); + string16 google_title(ASCIIToUTF16("Google")); + GURL news_url("http://news.google.com"); + string16 news_title(ASCIIToUTF16("Google News")); + + url.url = asdf_url; + url.title = asdf_title; + url.redirects.push_back(url.url); + TopSites::Images thumbnail; + thumbnail.thumbnail = random_thumbnail(); + db->SetPageThumbnail(url, 0, thumbnail); + + top_sites().ReadDatabase(); + + top_sites().GetMostVisitedURLs( + consumer(), + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + ASSERT_EQ(1u, urls().size()); + EXPECT_EQ(asdf_url, urls()[0].url); + EXPECT_EQ(asdf_title, urls()[0].title); + + TopSites::Images img_result; + db->GetPageThumbnail(asdf_url, &img_result); + EXPECT_TRUE(img_result.thumbnail != NULL); + EXPECT_TRUE(ThumbnailsAreEqual(random_thumbnail(), img_result.thumbnail)); + + RefCountedBytes* thumbnail_result; + EXPECT_TRUE(top_sites().GetPageThumbnail(asdf_url, &thumbnail_result)); + EXPECT_TRUE(thumbnail_result != NULL); + EXPECT_TRUE(ThumbnailsAreEqual(random_thumbnail(), thumbnail_result)); + + MostVisitedURL url2; + url2.url = google1_url; + url2.title = google_title; + url2.redirects.push_back(google1_url); + url2.redirects.push_back(google2_url); + url2.redirects.push_back(google3_url); + + // Add new thumbnail at rank 0 and shift the other result to 1. + TopSites::Images g_thumbnail; + g_thumbnail.thumbnail = google_thumbnail(); + db->SetPageThumbnail(url2, 0, g_thumbnail); + + top_sites().ReadDatabase(); + + top_sites().GetMostVisitedURLs( + consumer(), + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + ASSERT_EQ(2u, urls().size()); + EXPECT_EQ(google1_url, urls()[0].url); + EXPECT_EQ(google_title, urls()[0].title); + EXPECT_TRUE(top_sites().GetPageThumbnail(google1_url, &thumbnail_result)); + EXPECT_TRUE(ThumbnailsAreEqual(google_thumbnail(), thumbnail_result)); + ASSERT_EQ(3u, urls()[0].redirects.size()); + EXPECT_EQ(google1_url, urls()[0].redirects[0]); + EXPECT_EQ(google2_url, urls()[0].redirects[1]); + EXPECT_EQ(google3_url, urls()[0].redirects[2]); + + EXPECT_EQ(asdf_url, urls()[1].url); + EXPECT_EQ(asdf_title, urls()[1].title); + + MockHistoryServiceImpl hs; + // Add one old, one new URL to the history. + hs.AppendMockPage(google1_url, google_title); + hs.AppendMockPage(news_url, news_title); + top_sites().SetMockHistoryService(&hs); + + // This requests data from History Service and writes it to the DB. + top_sites().StartQueryForMostVisited(); + MessageLoop::current()->RunAllPending(); + + std::map<GURL, TopSites::Images> thumbnails; + MostVisitedURLList results; + db->GetPageThumbnails(&results, &thumbnails); + ASSERT_EQ(2u, results.size()); + EXPECT_EQ(google_title, results[0].title); + EXPECT_EQ(news_title, results[1].title); + + scoped_ptr<SkBitmap> weewar_bitmap( + gfx::JPEGCodec::Decode(weewar_thumbnail()->front(), + weewar_thumbnail()->size())); + + base::Time now = base::Time::Now(); + ThumbnailScore low_score(1.0, true, true, now); + ThumbnailScore medium_score(0.5, true, true, now); + ThumbnailScore high_score(0.0, true, true, now); + + // 1. Set to weewar. (Writes the thumbnail to the DB.) + EXPECT_TRUE(top_sites().SetPageThumbnail(google1_url, + *weewar_bitmap, + medium_score)); + RefCountedBytes* out_1; + TopSites::Images out_2; + EXPECT_TRUE(top_sites().GetPageThumbnail(google1_url, &out_1)); + + MessageLoop::current()->RunAllPending(); + + db->GetPageThumbnail(url2.url, &out_2); + EXPECT_TRUE(ThumbnailsAreEqual(out_1, out_2.thumbnail)); + + scoped_ptr<SkBitmap> google_bitmap( + gfx::JPEGCodec::Decode(google_thumbnail()->front(), + google_thumbnail()->size())); + + // 2. Set to google - low score. + EXPECT_FALSE(top_sites().SetPageThumbnail(google1_url, + *google_bitmap, + low_score)); + + // 3. Set to google - high score. + EXPECT_TRUE(top_sites().SetPageThumbnail(google1_url, + *google_bitmap, + high_score)); + // Check that the thumbnail was updated. + EXPECT_TRUE(top_sites().GetPageThumbnail(google1_url, &out_1)); + EXPECT_FALSE(ThumbnailsAreEqual(out_1, out_2.thumbnail)); + MessageLoop::current()->RunAllPending(); + + // Read the new thumbnail from the DB - should match what's in TopSites. + db->GetPageThumbnail(url2.url, &out_2); + EXPECT_TRUE(ThumbnailsAreEqual(out_1, out_2.thumbnail)); + EXPECT_TRUE(high_score.Equals(out_2.thumbnail_score)); +} + +// This test has been crashing unit_tests on Mac 10.6. +// See http://crbug.com/49799 +TEST_F(TopSitesTest, DISABLED_DeleteNotifications) { + ChromeThread db_loop(ChromeThread::DB, MessageLoop::current()); + GURL google1_url("http://google.com"); + GURL google2_url("http://google.com/redirect"); + GURL google3_url("http://www.google.com"); + string16 google_title(ASCIIToUTF16("Google")); + GURL news_url("http://news.google.com"); + string16 news_title(ASCIIToUTF16("Google News")); + + MockHistoryServiceImpl hs; + + top_sites().Init(file_name()); + + hs.AppendMockPage(google1_url, google_title); + hs.AppendMockPage(news_url, news_title); + top_sites().SetMockHistoryService(&hs); + + top_sites().StartQueryForMostVisited(); + MessageLoop::current()->RunAllPending(); + + top_sites().GetMostVisitedURLs( + consumer(), + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + ASSERT_EQ(2u, urls().size()); + + hs.RemoveMostVisitedURL(); + + history::URLsDeletedDetails details; + details.all_history = false; + top_sites().Observe(NotificationType::HISTORY_URLS_DELETED, + Source<Profile> (&profile()), + (const NotificationDetails&)details); + MessageLoop::current()->RunAllPending(); + + top_sites().GetMostVisitedURLs( + consumer(), + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + ASSERT_EQ(1u, urls().size()); + EXPECT_EQ(google_title, urls()[0].title); + + hs.RemoveMostVisitedURL(); + details.all_history = true; + top_sites().Observe(NotificationType::HISTORY_URLS_DELETED, + Source<Profile> (&profile()), + (const NotificationDetails&)details); + MessageLoop::current()->RunAllPending(); + top_sites().GetMostVisitedURLs( + consumer(), + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + ASSERT_EQ(0u, urls().size()); +} + +TEST_F(TopSitesTest, GetUpdateDelay) { + top_sites().last_num_urls_changed_ = 0; + EXPECT_EQ(30, top_sites().GetUpdateDelay().InSeconds()); + + top_sites().top_sites_.resize(20); + top_sites().last_num_urls_changed_ = 0; + EXPECT_EQ(60, top_sites().GetUpdateDelay().InMinutes()); + + top_sites().last_num_urls_changed_ = 3; + EXPECT_EQ(52, top_sites().GetUpdateDelay().InMinutes()); + + top_sites().last_num_urls_changed_ = 20; + EXPECT_EQ(1, top_sites().GetUpdateDelay().InMinutes()); +} + +TEST_F(TopSitesTest, Migration) { + ChromeThread db_loop(ChromeThread::DB, MessageLoop::current()); + GURL google1_url("http://google.com"); + GURL google2_url("http://google.com/redirect"); + GURL google3_url("http://www.google.com"); + string16 google_title(ASCIIToUTF16("Google")); + GURL news_url("http://news.google.com"); + string16 news_title(ASCIIToUTF16("Google News")); + + MockHistoryServiceImpl hs; + + top_sites().Init(file_name()); + + hs.AppendMockPage(google1_url, google_title); + hs.AppendMockPage(news_url, news_title); + top_sites().SetMockHistoryService(&hs); + + top_sites().StartMigration(); + EXPECT_TRUE(top_sites().migration_in_progress_); + MessageLoop::current()->RunAllPending(); + EXPECT_EQ(2, hs.GetNumberOfThumbnailRequests()); + EXPECT_FALSE(top_sites().migration_in_progress_); +} + +TEST_F(TopSitesTest, QueueingRequestsForTopSites) { + ChromeThread db_loop(ChromeThread::DB, MessageLoop::current()); + CancelableRequestConsumer c1; + CancelableRequestConsumer c2; + CancelableRequestConsumer c3; + top_sites().GetMostVisitedURLs( + &c1, + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + + top_sites().GetMostVisitedURLs( + &c2, + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + + top_sites().GetMostVisitedURLs( + &c3, + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + + EXPECT_EQ(0u, number_of_callbacks()); + EXPECT_EQ(0u, urls().size()); + + MostVisitedURLList pages; + MostVisitedURL url; + url.url = GURL("http://1.com/"); + url.redirects.push_back(url.url); + pages.push_back(url); + url.url = GURL("http://2.com/"); + url.redirects.push_back(url.url); + pages.push_back(url); + top_sites().OnTopSitesAvailable(0, pages); + MessageLoop::current()->RunAllPending(); + + EXPECT_EQ(3u, number_of_callbacks()); + + ASSERT_EQ(2u, urls().size()); + EXPECT_EQ("http://1.com/", urls()[0].url.spec()); + EXPECT_EQ("http://2.com/", urls()[1].url.spec()); + + url.url = GURL("http://3.com/"); + url.redirects.push_back(url.url); + pages.push_back(url); + top_sites().OnTopSitesAvailable(0, pages); + MessageLoop::current()->RunAllPending(); + + top_sites().GetMostVisitedURLs( + &c3, + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + + EXPECT_EQ(4u, number_of_callbacks()); + + ASSERT_EQ(3u, urls().size()); + EXPECT_EQ("http://1.com/", urls()[0].url.spec()); + EXPECT_EQ("http://2.com/", urls()[1].url.spec()); + EXPECT_EQ("http://3.com/", urls()[2].url.spec()); +} + +TEST_F(TopSitesTest, CancelingRequestsForTopSites) { + CancelableRequestConsumer c1; + CancelableRequestConsumer c2; + top_sites().GetMostVisitedURLs( + &c1, + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + + top_sites().GetMostVisitedURLs( + &c2, + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + + { + CancelableRequestConsumer c3; + top_sites().GetMostVisitedURLs( + &c3, + NewCallback(static_cast<TopSitesTest*>(this), + &TopSitesTest::OnTopSitesAvailable)); + // c3 is out of scope, and the request should be cancelled. + } + + // No requests until OnTopSitesAvailable is called. + EXPECT_EQ(0u, number_of_callbacks()); + EXPECT_EQ(0u, urls().size()); + + MostVisitedURLList pages; + MostVisitedURL url; + url.url = GURL("http://1.com/"); + url.redirects.push_back(url.url); + pages.push_back(url); + url.url = GURL("http://2.com/"); + pages.push_back(url); + + top_sites().OnTopSitesAvailable(0, pages); + + // 1 request was canceled. + EXPECT_EQ(2u, number_of_callbacks()); + + ASSERT_EQ(2u, urls().size()); + EXPECT_EQ("http://1.com/", urls()[0].url.spec()); + EXPECT_EQ("http://2.com/", urls()[1].url.spec()); +} + +TEST_F(TopSitesTest, AddTemporaryThumbnail) { + ChromeThread db_loop(ChromeThread::DB, MessageLoop::current()); + GURL unknown_url("http://news.google.com/"); + GURL invalid_url("chrome://thumb/http://google.com/"); + GURL url1a("http://google.com/"); + GURL url1b("http://www.google.com/"); + + // Create a dummy thumbnail. + SkBitmap thumbnail; + thumbnail.setConfig(SkBitmap::kARGB_8888_Config, 4, 4); + thumbnail.allocPixels(); + thumbnail.eraseRGB(0x00, 0x00, 0x00); + + ThumbnailScore medium_score(0.5, true, true, base::Time::Now()); + + // Don't store thumbnails for Javascript URLs. + EXPECT_FALSE(top_sites().SetPageThumbnail(invalid_url, + thumbnail, medium_score)); + // Store thumbnails for unknown (but valid) URLs temporarily - calls + // AddTemporaryThumbnail. + EXPECT_TRUE(top_sites().SetPageThumbnail(unknown_url, + thumbnail, medium_score)); + + std::vector<MostVisitedURL> list; + + MostVisitedURL mv; + mv.url = unknown_url; + mv.redirects.push_back(mv.url); + mv.redirects.push_back(url1a); + mv.redirects.push_back(url1b); + list.push_back(mv); + + // Update URLs - use temporary thumbnails. + top_sites().UpdateMostVisited(list); + + RefCountedBytes* out = NULL; + ASSERT_TRUE(top_sites().GetPageThumbnail(unknown_url, &out)); + scoped_ptr<SkBitmap> out_bitmap(gfx::JPEGCodec::Decode(out->front(), + out->size())); + EXPECT_EQ(0, memcmp(thumbnail.getPixels(), out_bitmap->getPixels(), + thumbnail.getSize())); +} + +} // namespace history diff --git a/chrome/browser/history/url_database.cc b/chrome/browser/history/url_database.cc new file mode 100644 index 0000000..07f8881 --- /dev/null +++ b/chrome/browser/history/url_database.cc @@ -0,0 +1,498 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/url_database.h" + +#include <algorithm> +#include <limits> +#include <string> +#include <vector> + +#include "app/l10n_util.h" +#include "app/sql/connection.h" +#include "app/sql/statement.h" +#include "base/utf_string_conversions.h" +#include "chrome/common/url_constants.h" +#include "googleurl/src/gurl.h" + +namespace history { + +const char URLDatabase::kURLRowFields[] = HISTORY_URL_ROW_FIELDS; +const int URLDatabase::kNumURLRowFields = 9; + +bool URLDatabase::URLEnumerator::GetNextURL(URLRow* r) { + if (statement_.Step()) { + FillURLRow(statement_, r); + return true; + } + return false; +} + +URLDatabase::URLDatabase() : has_keyword_search_terms_(false) { +} + +URLDatabase::~URLDatabase() { +} + +// static +std::string URLDatabase::GURLToDatabaseURL(const GURL& gurl) { + // TODO(brettw): do something fancy here with encoding, etc. + + // Strip username and password from URL before sending to DB. + GURL::Replacements replacements; + replacements.ClearUsername(); + replacements.ClearPassword(); + + return (gurl.ReplaceComponents(replacements)).spec(); +} + +// Convenience to fill a history::URLRow. Must be in sync with the fields in +// kURLRowFields. +void URLDatabase::FillURLRow(sql::Statement& s, history::URLRow* i) { + DCHECK(i); + i->id_ = s.ColumnInt64(0); + i->url_ = GURL(s.ColumnString(1)); + i->title_ = s.ColumnString16(2); + i->visit_count_ = s.ColumnInt(3); + i->typed_count_ = s.ColumnInt(4); + i->last_visit_ = base::Time::FromInternalValue(s.ColumnInt64(5)); + i->hidden_ = s.ColumnInt(6) != 0; + i->favicon_id_ = s.ColumnInt64(7); +} + +bool URLDatabase::GetURLRow(URLID url_id, URLRow* info) { + // TODO(brettw) We need check for empty URLs to handle the case where + // there are old URLs in the database that are empty that got in before + // we added any checks. We should eventually be able to remove it + // when all inputs are using GURL (which prohibit empty input). + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE id=?")); + if (!statement) + return false; + + statement.BindInt64(0, url_id); + if (statement.Step()) { + FillURLRow(statement, info); + return true; + } + return false; +} + +bool URLDatabase::GetAllTypedUrls(std::vector<history::URLRow>* urls) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE typed_count > 0")); + if (!statement) + return false; + + while (statement.Step()) { + URLRow info; + FillURLRow(statement, &info); + urls->push_back(info); + } + return true; +} + +URLID URLDatabase::GetRowForURL(const GURL& url, history::URLRow* info) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE url=?")); + if (!statement) + return 0; + + std::string url_string = GURLToDatabaseURL(url); + statement.BindString(0, url_string); + if (!statement.Step()) + return 0; // no data + + if (info) + FillURLRow(statement, info); + return statement.ColumnInt64(0); +} + +bool URLDatabase::UpdateURLRow(URLID url_id, + const history::URLRow& info) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE urls SET title=?,visit_count=?,typed_count=?,last_visit_time=?," + "hidden=?,favicon_id=?" + "WHERE id=?")); + if (!statement) + return false; + + statement.BindString16(0, info.title()); + statement.BindInt(1, info.visit_count()); + statement.BindInt(2, info.typed_count()); + statement.BindInt64(3, info.last_visit().ToInternalValue()); + statement.BindInt(4, info.hidden() ? 1 : 0); + statement.BindInt64(5, info.favicon_id()); + statement.BindInt64(6, url_id); + return statement.Run(); +} + +URLID URLDatabase::AddURLInternal(const history::URLRow& info, + bool is_temporary) { + // This function is used to insert into two different tables, so we have to + // do some shuffling. Unfortinately, we can't use the macro + // HISTORY_URL_ROW_FIELDS because that specifies the table name which is + // invalid in the insert syntax. + #define ADDURL_COMMON_SUFFIX \ + " (url, title, visit_count, typed_count, "\ + "last_visit_time, hidden, favicon_id) "\ + "VALUES (?,?,?,?,?,?,?)" + const char* statement_name; + const char* statement_sql; + if (is_temporary) { + statement_name = "AddURLTemporary"; + statement_sql = "INSERT INTO temp_urls" ADDURL_COMMON_SUFFIX; + } else { + statement_name = "AddURL"; + statement_sql = "INSERT INTO urls" ADDURL_COMMON_SUFFIX; + } + #undef ADDURL_COMMON_SUFFIX + + sql::Statement statement(GetDB().GetCachedStatement( + sql::StatementID(statement_name), statement_sql)); + if (!statement) { + NOTREACHED() << GetDB().GetErrorMessage(); + return 0; + } + + statement.BindString(0, GURLToDatabaseURL(info.url())); + statement.BindString16(1, info.title()); + statement.BindInt(2, info.visit_count()); + statement.BindInt(3, info.typed_count()); + statement.BindInt64(4, info.last_visit().ToInternalValue()); + statement.BindInt(5, info.hidden() ? 1 : 0); + statement.BindInt64(6, info.favicon_id()); + + if (!statement.Run()) + return 0; + return GetDB().GetLastInsertRowId(); +} + +bool URLDatabase::DeleteURLRow(URLID id) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM urls WHERE id = ?")); + if (!statement) + return false; + + statement.BindInt64(0, id); + if (!statement.Run()) + return false; + + // And delete any keyword visits. + if (!has_keyword_search_terms_) + return true; + + sql::Statement del_keyword_visit(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM keyword_search_terms WHERE url_id=?")); + if (!del_keyword_visit) + return false; + del_keyword_visit.BindInt64(0, id); + return del_keyword_visit.Run(); +} + +bool URLDatabase::CreateTemporaryURLTable() { + return CreateURLTable(true); +} + +bool URLDatabase::CommitTemporaryURLTable() { + // See the comments in the header file as well as + // HistoryBackend::DeleteAllHistory() for more information on how this works + // and why it does what it does. + // + // Note that the main database overrides this to additionally create the + // supplimentary indices that the archived database doesn't need. + + // Swap the url table out and replace it with the temporary one. + if (!GetDB().Execute("DROP TABLE urls")) { + NOTREACHED() << GetDB().GetErrorMessage(); + return false; + } + if (!GetDB().Execute("ALTER TABLE temp_urls RENAME TO urls")) { + NOTREACHED() << GetDB().GetErrorMessage(); + return false; + } + + // Create the index over URLs. This is needed for the main, in-memory, and + // archived databases, so we always do it. The supplimentary indices used by + // the main database are not created here. When deleting all history, they + // are created by HistoryDatabase::RecreateAllButStarAndURLTables(). + CreateMainURLIndex(); + + return true; +} + +bool URLDatabase::InitURLEnumeratorForEverything(URLEnumerator* enumerator) { + DCHECK(!enumerator->initialized_); + std::string sql("SELECT "); + sql.append(kURLRowFields); + sql.append(" FROM urls"); + enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str())); + if (!enumerator->statement_) { + NOTREACHED() << GetDB().GetErrorMessage(); + return false; + } + enumerator->initialized_ = true; + return true; +} + +bool URLDatabase::IsFavIconUsed(FavIconID favicon_id) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT id FROM urls WHERE favicon_id=? LIMIT 1")); + if (!statement) + return false; + + statement.BindInt64(0, favicon_id); + return statement.Step(); +} + +void URLDatabase::AutocompleteForPrefix(const string16& prefix, + size_t max_results, + std::vector<history::URLRow>* results) { + // NOTE: this query originally sorted by starred as the second parameter. But + // as bookmarks is no longer part of the db we no longer include the order + // by clause. + results->clear(); + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls " + "WHERE url >= ? AND url < ? AND hidden = 0 " + "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC " + "LIMIT ?")); + if (!statement) + return; + + // We will find all strings between "prefix" and this string, which is prefix + // followed by the maximum character size. Use 8-bit strings for everything + // so we can be sure sqlite is comparing everything in 8-bit mode. Otherwise, + // it will have to convert strings either to UTF-8 or UTF-16 for comparison. + std::string prefix_utf8(UTF16ToUTF8(prefix)); + std::string end_query(prefix_utf8); + end_query.push_back(std::numeric_limits<unsigned char>::max()); + + statement.BindString(0, prefix_utf8); + statement.BindString(1, end_query); + statement.BindInt(2, static_cast<int>(max_results)); + + while (statement.Step()) { + history::URLRow info; + FillURLRow(statement, &info); + if (info.url().is_valid()) + results->push_back(info); + } +} + +bool URLDatabase::FindShortestURLFromBase(const std::string& base, + const std::string& url, + int min_visits, + int min_typed, + bool allow_base, + history::URLRow* info) { + // Select URLs that start with |base| and are prefixes of |url|. All parts + // of this query except the substr() call can be done using the index. We + // could do this query with a couple of LIKE or GLOB statements as well, but + // those wouldn't use the index, and would run into problems with "wildcard" + // characters that appear in URLs (% for LIKE, or *, ? for GLOB). + std::string sql("SELECT "); + sql.append(kURLRowFields); + sql.append(" FROM urls WHERE url "); + sql.append(allow_base ? ">=" : ">"); + sql.append(" ? AND url < :end AND url = substr(:end, 1, length(url)) " + "AND hidden = 0 AND visit_count >= ? AND typed_count >= ? " + "ORDER BY url LIMIT 1"); + sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str())); + if (!statement) { + NOTREACHED() << GetDB().GetErrorMessage(); + return false; + } + + statement.BindString(0, base); + statement.BindString(1, url); // :end + statement.BindInt(2, min_visits); + statement.BindInt(3, min_typed); + + if (!statement.Step()) + return false; + + DCHECK(info); + FillURLRow(statement, info); + return true; +} + +bool URLDatabase::InitKeywordSearchTermsTable() { + has_keyword_search_terms_ = true; + if (!GetDB().DoesTableExist("keyword_search_terms")) { + if (!GetDB().Execute("CREATE TABLE keyword_search_terms (" + "keyword_id INTEGER NOT NULL," // ID of the TemplateURL. + "url_id INTEGER NOT NULL," // ID of the url. + "lower_term LONGVARCHAR NOT NULL," // The search term, in lower case. + "term LONGVARCHAR NOT NULL)")) // The actual search term. + return false; + } + + // For searching. + GetDB().Execute("CREATE INDEX keyword_search_terms_index1 ON " + "keyword_search_terms (keyword_id, lower_term)"); + + // For deletion. + GetDB().Execute("CREATE INDEX keyword_search_terms_index2 ON " + "keyword_search_terms (url_id)"); + + return true; +} + +bool URLDatabase::DropKeywordSearchTermsTable() { + // This will implicitly delete the indices over the table. + return GetDB().Execute("DROP TABLE keyword_search_terms"); +} + +bool URLDatabase::SetKeywordSearchTermsForURL(URLID url_id, + TemplateURL::IDType keyword_id, + const string16& term) { + DCHECK(url_id && keyword_id && !term.empty()); + + sql::Statement exist_statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT term FROM keyword_search_terms " + "WHERE keyword_id = ? AND url_id = ?")); + if (!exist_statement) + return false; + exist_statement.BindInt64(0, keyword_id); + exist_statement.BindInt64(1, url_id); + if (exist_statement.Step()) + return true; // Term already exists, no need to add it. + + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO keyword_search_terms (keyword_id, url_id, lower_term, term) " + "VALUES (?,?,?,?)")); + if (!statement) + return false; + + statement.BindInt64(0, keyword_id); + statement.BindInt64(1, url_id); + statement.BindString16(2, l10n_util::ToLower(term)); + statement.BindString16(3, term); + return statement.Run(); +} + +void URLDatabase::DeleteAllSearchTermsForKeyword( + TemplateURL::IDType keyword_id) { + DCHECK(keyword_id); + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM keyword_search_terms WHERE keyword_id=?")); + if (!statement) + return; + + statement.BindInt64(0, keyword_id); + statement.Run(); +} + +void URLDatabase::GetMostRecentKeywordSearchTerms( + TemplateURL::IDType keyword_id, + const string16& prefix, + int max_count, + std::vector<KeywordSearchTermVisit>* matches) { + // NOTE: the keyword_id can be zero if on first run the user does a query + // before the TemplateURLModel has finished loading. As the chances of this + // occurring are small, we ignore it. + if (!keyword_id) + return; + + DCHECK(!prefix.empty()); + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT DISTINCT kv.term, u.last_visit_time " + "FROM keyword_search_terms kv " + "JOIN urls u ON kv.url_id = u.id " + "WHERE kv.keyword_id = ? AND kv.lower_term >= ? AND kv.lower_term < ? " + "ORDER BY u.last_visit_time DESC LIMIT ?")); + if (!statement) + return; + + // NOTE: Keep this ToLower() call in sync with search_provider.cc. + string16 lower_prefix = l10n_util::ToLower(prefix); + // This magic gives us a prefix search. + string16 next_prefix = lower_prefix; + next_prefix[next_prefix.size() - 1] = + next_prefix[next_prefix.size() - 1] + 1; + statement.BindInt64(0, keyword_id); + statement.BindString16(1, lower_prefix); + statement.BindString16(2, next_prefix); + statement.BindInt(3, max_count); + + KeywordSearchTermVisit visit; + while (statement.Step()) { + visit.term = statement.ColumnString16(0); + visit.time = base::Time::FromInternalValue(statement.ColumnInt64(1)); + matches->push_back(visit); + } +} + +bool URLDatabase::MigrateFromVersion11ToVersion12() { + URLRow about_row; + if (GetRowForURL(GURL(chrome::kAboutBlankURL), &about_row)) { + about_row.set_favicon_id(0); + return UpdateURLRow(about_row.id(), about_row); + } + return true; +} + +bool URLDatabase::DropStarredIDFromURLs() { + if (!GetDB().DoesColumnExist("urls", "starred_id")) + return true; // urls is already updated, no need to continue. + + // Create a temporary table to contain the new URLs table. + if (!CreateTemporaryURLTable()) { + NOTREACHED(); + return false; + } + + // Copy the contents. + if (!GetDB().Execute( + "INSERT INTO temp_urls (id, url, title, visit_count, typed_count, " + "last_visit_time, hidden, favicon_id) " + "SELECT id, url, title, visit_count, typed_count, last_visit_time, " + "hidden, favicon_id FROM urls")) { + NOTREACHED() << GetDB().GetErrorMessage(); + return false; + } + + // Rename/commit the tmp table. + CommitTemporaryURLTable(); + + // This isn't created by CommitTemporaryURLTable. + CreateSupplimentaryURLIndices(); + + return true; +} + +bool URLDatabase::CreateURLTable(bool is_temporary) { + const char* name = is_temporary ? "temp_urls" : "urls"; + if (GetDB().DoesTableExist(name)) + return true; + + std::string sql; + sql.append("CREATE TABLE "); + sql.append(name); + sql.append("(" + "id INTEGER PRIMARY KEY," + "url LONGVARCHAR," + "title LONGVARCHAR," + "visit_count INTEGER DEFAULT 0 NOT NULL," + "typed_count INTEGER DEFAULT 0 NOT NULL," + "last_visit_time INTEGER NOT NULL," + "hidden INTEGER DEFAULT 0 NOT NULL," + "favicon_id INTEGER DEFAULT 0 NOT NULL)"); + + return GetDB().Execute(sql.c_str()); +} + +void URLDatabase::CreateMainURLIndex() { + // Index over URLs so we can quickly look up based on URL. Ignore errors as + // this likely already exists (and the same below). + GetDB().Execute("CREATE INDEX urls_url_index ON urls (url)"); +} + +void URLDatabase::CreateSupplimentaryURLIndices() { + // Add a favicon index. This is useful when we delete urls. + GetDB().Execute("CREATE INDEX urls_favicon_id_INDEX ON urls (favicon_id)"); +} + +} // namespace history diff --git a/chrome/browser/history/url_database.h b/chrome/browser/history/url_database.h new file mode 100644 index 0000000..84c8dde --- /dev/null +++ b/chrome/browser/history/url_database.h @@ -0,0 +1,258 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_URL_DATABASE_H_ +#define CHROME_BROWSER_HISTORY_URL_DATABASE_H_ + +#include "app/sql/statement.h" +#include "base/basictypes.h" +#include "chrome/browser/history/history_types.h" +#include "chrome/browser/search_engines/template_url.h" + +class GURL; + +namespace sql { +class Connection; +} + +namespace history { + +class VisitDatabase; // For friend statement. + +// Encapsulates an SQL database that holds URL info. This is a subset of the +// full history data. We split this class' functionality out from the larger +// HistoryDatabase class to support maintaining separate databases of URLs with +// different capabilities (for example, in-memory, or archived). +// +// This is refcounted to support calling InvokeLater() with some of its methods +// (necessary to maintain ordering of DB operations). +class URLDatabase { + public: + // Must call CreateURLTable() and CreateURLIndexes() before using to make + // sure the database is initialized. + URLDatabase(); + + // This object must be destroyed on the thread where all accesses are + // happening to avoid thread-safety problems. + virtual ~URLDatabase(); + + // Converts a GURL to a string used in the history database. We plan to + // do more complex operations than just getting the spec out involving + // punycode, so this function should be used instead of url.spec() when + // interacting with the database. + // + // TODO(brettw) this should be moved out of the public section and the + // entire public HistoryDatabase interface should use GURL. This should + // also probably return a string instead since that is what the DB uses + // internally and we can avoid the extra conversion. + static std::string GURLToDatabaseURL(const GURL& url); + + // URL table functions ------------------------------------------------------- + + // Looks up a url given an id. Fills info with the data. Returns true on + // success and false otherwise. + bool GetURLRow(URLID url_id, URLRow* info); + + // Looks up all urls that were typed in manually. Fills info with the data. + // Returns true on success and false otherwise. + bool GetAllTypedUrls(std::vector<history::URLRow>* urls); + + // Looks up the given URL and if it exists, fills the given pointers with the + // associated info and returns the ID of that URL. If the info pointer is + // NULL, no information about the URL will be filled in, only the ID will be + // returned. Returns 0 if the URL was not found. + URLID GetRowForURL(const GURL& url, URLRow* info); + + // Given an already-existing row in the URL table, updates that URL's stats. + // This can not change the URL. Returns true on success. + // + // This will NOT update the title used for full text indexing. If you are + // setting the title, call SetPageIndexedData with the new title. + bool UpdateURLRow(URLID url_id, const URLRow& info); + + // Adds a line to the URL database with the given information and returns the + // row ID. A row with the given URL must not exist. Returns 0 on error. + // + // This does NOT add a row to the full text search database. Use + // HistoryDatabase::SetPageIndexedData to do this. + URLID AddURL(const URLRow& info) { + return AddURLInternal(info, false); + } + + // Delete the row of the corresponding URL. Only the row in the URL table + // will be deleted, not any other data that may refer to it. Returns true if + // the row existed and was deleted. + bool DeleteURLRow(URLID id); + + // URL mass-deleting --------------------------------------------------------- + + // Begins the mass-deleting operation by creating a temporary URL table. + // The caller than adds the URLs it wants to preseve to the temporary table, + // and then deletes everything else by calling CommitTemporaryURLTable(). + // Returns true on success. + bool CreateTemporaryURLTable(); + + // Adds a row to the temporary URL table. This must be called between + // CreateTemporaryURLTable() and CommitTemporaryURLTable() (see those for more + // info). The ID of the URL will change in the temporary table, so the new ID + // is returned. Returns 0 on failure. + URLID AddTemporaryURL(const URLRow& row) { + return AddURLInternal(row, true); + } + + // Ends the mass-deleting by replacing the original URL table with the + // temporary one created in CreateTemporaryURLTable. Returns true on success. + // + // This function does not create the supplimentary indices. It is virtual so + // that the main history database can provide this additional behavior. + virtual bool CommitTemporaryURLTable(); + + // Enumeration --------------------------------------------------------------- + + // A basic enumerator to enumerate urls + class URLEnumerator { + public: + URLEnumerator() : initialized_(false) { + } + + // Retreives the next url. Returns false if no more urls are available + bool GetNextURL(history::URLRow* r); + + private: + friend class URLDatabase; + + bool initialized_; + sql::Statement statement_; + }; + + // Initializes the given enumerator to enumerator all URLs in the database + bool InitURLEnumeratorForEverything(URLEnumerator* enumerator); + + // Favicons ------------------------------------------------------------------ + + // Check whether a favicon is used by any URLs in the database. + bool IsFavIconUsed(FavIconID favicon_id); + + // Autocomplete -------------------------------------------------------------- + + // Fills the given array with URLs matching the given prefix. They will be + // sorted by typed count, then by visit count, then by visit date (most + // recent first) up to the given maximum number. Called by HistoryURLProvider. + void AutocompleteForPrefix(const string16& prefix, + size_t max_results, + std::vector<URLRow>* results); + + // Tries to find the shortest URL beginning with |base| that strictly + // prefixes |url|, and has minimum visit_ and typed_counts as specified. + // If found, fills in |info| and returns true; otherwise returns false, + // leaving |info| unchanged. + // We allow matches of exactly |base| iff |allow_base| is true. + bool FindShortestURLFromBase(const std::string& base, + const std::string& url, + int min_visits, + int min_typed, + bool allow_base, + history::URLRow* info); + + // Keyword Search Terms ------------------------------------------------------ + + // Sets the search terms for the specified url/keyword pair. + bool SetKeywordSearchTermsForURL(URLID url_id, + TemplateURL::IDType keyword_id, + const string16& term); + + // Deletes all search terms for the specified keyword that have been added by + // way of SetKeywordSearchTermsForURL. + void DeleteAllSearchTermsForKeyword(TemplateURL::IDType keyword_id); + + // Returns up to max_count of the most recent search terms for the specified + // keyword. + void GetMostRecentKeywordSearchTerms( + TemplateURL::IDType keyword_id, + const string16& prefix, + int max_count, + std::vector<KeywordSearchTermVisit>* matches); + + // Migration ----------------------------------------------------------------- + + // Do to a bug we were setting the favicon of about:blank. This forces + // about:blank to have no icon or title. Returns true on success, false if + // the favicon couldn't be updated. + bool MigrateFromVersion11ToVersion12(); + + protected: + friend class VisitDatabase; + + // See HISTORY_URL_ROW_FIELDS below. + static const char kURLRowFields[]; + + // The number of fiends in kURLRowFields. If callers need additional + // fields, they can add their 0-based index to this value to get the index of + // fields following kURLRowFields. + static const int kNumURLRowFields; + + // Drops the starred_id column from urls, returning true on success. This does + // nothing (and returns true) if the urls doesn't contain the starred_id + // column. + bool DropStarredIDFromURLs(); + + // Initialization functions. The indexing functions are separate from the + // table creation functions so the in-memory database and the temporary tables + // used when clearing history can populate the table and then create the + // index, which is faster than the reverse. + // + // is_temporary is false when generating the "regular" URLs table. The expirer + // sets this to true to generate the temporary table, which will have a + // different name but the same schema. + bool CreateURLTable(bool is_temporary); + // We have two tiers of indices for the URL table. The main tier is used by + // all URL databases, and is an index over the URL itself. The main history + // DB also creates indices over the favicons and bookmark IDs. The archived + // and in-memory databases don't need these supplimentary indices so we can + // save space by not creating them. + void CreateMainURLIndex(); + void CreateSupplimentaryURLIndices(); + + // Ensures the keyword search terms table exists. + bool InitKeywordSearchTermsTable(); + + // Deletes the keyword search terms table. + bool DropKeywordSearchTermsTable(); + + // Inserts the given URL row into the URLs table, using the regular table + // if is_temporary is false, or the temporary URL table if is temporary is + // true. The temporary table may only be used in between + // CreateTemporaryURLTable() and CommitTemporaryURLTable(). + URLID AddURLInternal(const URLRow& info, bool is_temporary); + + // Convenience to fill a history::URLRow. Must be in sync with the fields in + // kHistoryURLRowFields. + static void FillURLRow(sql::Statement& s, URLRow* i); + + // Returns the database for the functions in this interface. The decendent of + // this class implements these functions to return its objects. + virtual sql::Connection& GetDB() = 0; + + private: + // True if InitKeywordSearchTermsTable() has been invoked. Not all subclasses + // have keyword search terms. + bool has_keyword_search_terms_; + + DISALLOW_COPY_AND_ASSIGN(URLDatabase); +}; + +// The fields and order expected by FillURLRow(). ID is guaranteed to be first +// so that DISTINCT can be prepended to get distinct URLs. +// +// This is available BOTH as a macro and a static string (kURLRowFields). Use +// the macro if you want to put this in the middle of an otherwise constant +// string, it will save time doing string appends. If you have to build a SQL +// string dynamically anyway, use the constant, it will save space. +#define HISTORY_URL_ROW_FIELDS \ + " urls.id, urls.url, urls.title, urls.visit_count, urls.typed_count, " \ + "urls.last_visit_time, urls.hidden, urls.favicon_id " + +} // history + +#endif // CHROME_BROWSER_HISTORY_URL_DATABASE_H_ diff --git a/chrome/browser/history/url_database_unittest.cc b/chrome/browser/history/url_database_unittest.cc new file mode 100644 index 0000000..32ded0c --- /dev/null +++ b/chrome/browser/history/url_database_unittest.cc @@ -0,0 +1,179 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "app/sql/connection.h" +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/history/url_database.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::Time; +using base::TimeDelta; + +namespace history { + +namespace { + +bool IsURLRowEqual(const URLRow& a, + const URLRow& b) { + // TODO(brettw) when the database stores an actual Time value rather than + // a time_t, do a reaul comparison. Instead, we have to do a more rough + // comparison since the conversion reduces the precision. + return a.title() == b.title() && + a.visit_count() == b.visit_count() && + a.typed_count() == b.typed_count() && + a.last_visit() - b.last_visit() <= TimeDelta::FromSeconds(1) && + a.hidden() == b.hidden(); +} + +} // namespace + +class URLDatabaseTest : public testing::Test, + public URLDatabase { + public: + URLDatabaseTest() { + } + + private: + // Test setup. + void SetUp() { + FilePath temp_dir; + PathService::Get(base::DIR_TEMP, &temp_dir); + db_file_ = temp_dir.AppendASCII("URLTest.db"); + + EXPECT_TRUE(db_.Open(db_file_)); + + // Initialize the tables for this test. + CreateURLTable(false); + CreateMainURLIndex(); + CreateSupplimentaryURLIndices(); + InitKeywordSearchTermsTable(); + } + void TearDown() { + db_.Close(); + file_util::Delete(db_file_, false); + } + + // Provided for URL/VisitDatabase. + virtual sql::Connection& GetDB() { + return db_; + } + + FilePath db_file_; + sql::Connection db_; +}; + +// Test add and query for the URL table in the HistoryDatabase +TEST_F(URLDatabaseTest, AddURL) { + // first, add two URLs + const GURL url1("http://www.google.com/"); + URLRow url_info1(url1); + url_info1.set_title(UTF8ToUTF16("Google")); + url_info1.set_visit_count(4); + url_info1.set_typed_count(2); + url_info1.set_last_visit(Time::Now() - TimeDelta::FromDays(1)); + url_info1.set_hidden(false); + EXPECT_TRUE(AddURL(url_info1)); + + const GURL url2("http://mail.google.com/"); + URLRow url_info2(url2); + url_info2.set_title(UTF8ToUTF16("Google Mail")); + url_info2.set_visit_count(3); + url_info2.set_typed_count(0); + url_info2.set_last_visit(Time::Now() - TimeDelta::FromDays(2)); + url_info2.set_hidden(true); + EXPECT_TRUE(AddURL(url_info2)); + + // query both of them + URLRow info; + EXPECT_TRUE(GetRowForURL(url1, &info)); + EXPECT_TRUE(IsURLRowEqual(url_info1, info)); + URLID id2 = GetRowForURL(url2, &info); + EXPECT_TRUE(id2); + EXPECT_TRUE(IsURLRowEqual(url_info2, info)); + + // update the second + url_info2.set_title(UTF8ToUTF16("Google Mail Too")); + url_info2.set_visit_count(4); + url_info2.set_typed_count(1); + url_info2.set_typed_count(91011); + url_info2.set_hidden(false); + EXPECT_TRUE(UpdateURLRow(id2, url_info2)); + + // make sure it got updated + URLRow info2; + EXPECT_TRUE(GetRowForURL(url2, &info2)); + EXPECT_TRUE(IsURLRowEqual(url_info2, info2)); + + // query a nonexistant URL + EXPECT_EQ(0, GetRowForURL(GURL("http://news.google.com/"), &info)); + + // Delete all urls in the domain + // TODO(acw): test the new url based delete domain + // EXPECT_TRUE(db.DeleteDomain(kDomainID)); + + // Make sure the urls have been properly removed + // TODO(acw): commented out because remove no longer works. + // EXPECT_TRUE(db.GetURLInfo(url1, NULL) == NULL); + // EXPECT_TRUE(db.GetURLInfo(url2, NULL) == NULL); +} + +// Tests adding, querying and deleting keyword visits. +TEST_F(URLDatabaseTest, KeywordSearchTermVisit) { + const GURL url1("http://www.google.com/"); + URLRow url_info1(url1); + url_info1.set_title(UTF8ToUTF16("Google")); + url_info1.set_visit_count(4); + url_info1.set_typed_count(2); + url_info1.set_last_visit(Time::Now() - TimeDelta::FromDays(1)); + url_info1.set_hidden(false); + URLID url_id = AddURL(url_info1); + ASSERT_TRUE(url_id != 0); + + // Add a keyword visit. + ASSERT_TRUE(SetKeywordSearchTermsForURL(url_id, 1, UTF8ToUTF16("visit"))); + + // Make sure we get it back. + std::vector<KeywordSearchTermVisit> matches; + GetMostRecentKeywordSearchTerms(1, UTF8ToUTF16("visit"), 10, &matches); + ASSERT_EQ(1U, matches.size()); + ASSERT_EQ(UTF8ToUTF16("visit"), matches[0].term); + + // Delete the keyword visit. + DeleteAllSearchTermsForKeyword(1); + + // Make sure we don't get it back when querying. + matches.clear(); + GetMostRecentKeywordSearchTerms(1, UTF8ToUTF16("visit"), 10, &matches); + ASSERT_EQ(0U, matches.size()); +} + +// Make sure deleting a URL also deletes a keyword visit. +TEST_F(URLDatabaseTest, DeleteURLDeletesKeywordSearchTermVisit) { + const GURL url1("http://www.google.com/"); + URLRow url_info1(url1); + url_info1.set_title(UTF8ToUTF16("Google")); + url_info1.set_visit_count(4); + url_info1.set_typed_count(2); + url_info1.set_last_visit(Time::Now() - TimeDelta::FromDays(1)); + url_info1.set_hidden(false); + URLID url_id = AddURL(url_info1); + ASSERT_TRUE(url_id != 0); + + // Add a keyword visit. + ASSERT_TRUE(SetKeywordSearchTermsForURL(url_id, 1, UTF8ToUTF16("visit"))); + + // Delete the url. + ASSERT_TRUE(DeleteURLRow(url_id)); + + // Make sure the keyword visit was deleted. + std::vector<KeywordSearchTermVisit> matches; + GetMostRecentKeywordSearchTerms(1, UTF8ToUTF16("visit"), 10, &matches); + ASSERT_EQ(0U, matches.size()); +} + +} // namespace history diff --git a/chrome/browser/history/visit_database.cc b/chrome/browser/history/visit_database.cc new file mode 100644 index 0000000..80fa8c8 --- /dev/null +++ b/chrome/browser/history/visit_database.cc @@ -0,0 +1,440 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/visit_database.h" + +#include <algorithm> +#include <limits> +#include <map> +#include <set> + +#include "app/sql/connection.h" +#include "app/sql/statement.h" +#include "base/logging.h" +#include "chrome/browser/history/url_database.h" +#include "chrome/common/page_transition_types.h" +#include "chrome/common/url_constants.h" + +// Rows, in order, of the visit table. +#define HISTORY_VISIT_ROW_FIELDS \ + " id,url,visit_time,from_visit,transition,segment_id,is_indexed " + +namespace history { + +VisitDatabase::VisitDatabase() { +} + +VisitDatabase::~VisitDatabase() { +} + +bool VisitDatabase::InitVisitTable() { + if (!GetDB().DoesTableExist("visits")) { + if (!GetDB().Execute("CREATE TABLE visits(" + "id INTEGER PRIMARY KEY," + "url INTEGER NOT NULL," // key of the URL this corresponds to + "visit_time INTEGER NOT NULL," + "from_visit INTEGER," + "transition INTEGER DEFAULT 0 NOT NULL," + "segment_id INTEGER," + // True when we have indexed data for this visit. + "is_indexed BOOLEAN)")) + return false; + } else if (!GetDB().DoesColumnExist("visits", "is_indexed")) { + // Old versions don't have the is_indexed column, we can just add that and + // not worry about different database revisions, since old ones will + // continue to work. + // + // TODO(brettw) this should be removed once we think everybody has been + // updated (added early Mar 2008). + if (!GetDB().Execute("ALTER TABLE visits ADD COLUMN is_indexed BOOLEAN")) + return false; + } + + // Index over url so we can quickly find visits for a page. This will just + // fail if it already exists and we'll ignore it. + GetDB().Execute("CREATE INDEX visits_url_index ON visits (url)"); + + // Create an index over from visits so that we can efficiently find + // referrers and redirects. Ignore failures because it likely already exists. + GetDB().Execute("CREATE INDEX visits_from_index ON visits (from_visit)"); + + // Create an index over time so that we can efficiently find the visits in a + // given time range (most history views are time-based). Ignore failures + // because it likely already exists. + GetDB().Execute("CREATE INDEX visits_time_index ON visits (visit_time)"); + + return true; +} + +bool VisitDatabase::DropVisitTable() { + // This will also drop the indices over the table. + return GetDB().Execute("DROP TABLE visits"); +} + +// Must be in sync with HISTORY_VISIT_ROW_FIELDS. +// static +void VisitDatabase::FillVisitRow(sql::Statement& statement, VisitRow* visit) { + visit->visit_id = statement.ColumnInt64(0); + visit->url_id = statement.ColumnInt64(1); + visit->visit_time = base::Time::FromInternalValue(statement.ColumnInt64(2)); + visit->referring_visit = statement.ColumnInt64(3); + visit->transition = PageTransition::FromInt(statement.ColumnInt(4)); + visit->segment_id = statement.ColumnInt64(5); + visit->is_indexed = !!statement.ColumnInt(6); +} + +// static +void VisitDatabase::FillVisitVector(sql::Statement& statement, + VisitVector* visits) { + while (statement.Step()) { + history::VisitRow visit; + FillVisitRow(statement, &visit); + visits->push_back(visit); + } +} + +VisitID VisitDatabase::AddVisit(VisitRow* visit) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO visits " + "(url, visit_time, from_visit, transition, segment_id, is_indexed) " + "VALUES (?,?,?,?,?,?)")); + if (!statement) + return 0; + + statement.BindInt64(0, visit->url_id); + statement.BindInt64(1, visit->visit_time.ToInternalValue()); + statement.BindInt64(2, visit->referring_visit); + statement.BindInt64(3, visit->transition); + statement.BindInt64(4, visit->segment_id); + statement.BindInt64(5, visit->is_indexed); + if (!statement.Run()) + return 0; + + visit->visit_id = GetDB().GetLastInsertRowId(); + return visit->visit_id; +} + +void VisitDatabase::DeleteVisit(const VisitRow& visit) { + // Patch around this visit. Any visits that this went to will now have their + // "source" be the deleted visit's source. + sql::Statement update_chain(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE visits SET from_visit=? WHERE from_visit=?")); + if (!update_chain) + return; + update_chain.BindInt64(0, visit.referring_visit); + update_chain.BindInt64(1, visit.visit_id); + update_chain.Run(); + + // Now delete the actual visit. + sql::Statement del(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM visits WHERE id=?")); + if (!del) + return; + del.BindInt64(0, visit.visit_id); + del.Run(); +} + +bool VisitDatabase::GetRowForVisit(VisitID visit_id, VisitRow* out_visit) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits WHERE id=?")); + if (!statement) + return false; + + statement.BindInt64(0, visit_id); + if (!statement.Step()) + return false; + + FillVisitRow(statement, out_visit); + + // We got a different visit than we asked for, something is wrong. + DCHECK_EQ(visit_id, out_visit->visit_id); + if (visit_id != out_visit->visit_id) + return false; + + return true; +} + +bool VisitDatabase::UpdateVisitRow(const VisitRow& visit) { + // Don't store inconsistent data to the database. + DCHECK_NE(visit.visit_id, visit.referring_visit); + if (visit.visit_id == visit.referring_visit) + return false; + + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE visits SET " + "url=?,visit_time=?,from_visit=?,transition=?,segment_id=?,is_indexed=? " + "WHERE id=?")); + if (!statement) + return false; + + statement.BindInt64(0, visit.url_id); + statement.BindInt64(1, visit.visit_time.ToInternalValue()); + statement.BindInt64(2, visit.referring_visit); + statement.BindInt64(3, visit.transition); + statement.BindInt64(4, visit.segment_id); + statement.BindInt64(5, visit.is_indexed); + statement.BindInt64(6, visit.visit_id); + return statement.Run(); +} + +bool VisitDatabase::GetVisitsForURL(URLID url_id, VisitVector* visits) { + visits->clear(); + + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS + "FROM visits " + "WHERE url=? " + "ORDER BY visit_time ASC")); + if (!statement) + return false; + + statement.BindInt64(0, url_id); + FillVisitVector(statement, visits); + return true; +} + +void VisitDatabase::GetAllVisitsInRange(base::Time begin_time, + base::Time end_time, + int max_results, + VisitVector* visits) { + visits->clear(); + + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " + "WHERE visit_time >= ? AND visit_time < ?" + "ORDER BY visit_time LIMIT ?")); + if (!statement) + return; + + // See GetVisibleVisitsInRange for more info on how these times are bound. + int64 end = end_time.ToInternalValue(); + statement.BindInt64(0, begin_time.ToInternalValue()); + statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max()); + statement.BindInt64(2, + max_results ? max_results : std::numeric_limits<int64>::max()); + + FillVisitVector(statement, visits); +} + +void VisitDatabase::GetVisitsInRangeForTransition( + base::Time begin_time, + base::Time end_time, + int max_results, + PageTransition::Type transition, + VisitVector* visits) { + DCHECK(visits); + visits->clear(); + + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " + "WHERE visit_time >= ? AND visit_time < ? " + "AND (transition & ?) == ?" + "ORDER BY visit_time LIMIT ?")); + if (!statement) + return; + + // See GetVisibleVisitsInRange for more info on how these times are bound. + int64 end = end_time.ToInternalValue(); + statement.BindInt64(0, begin_time.ToInternalValue()); + statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max()); + statement.BindInt(2, PageTransition::CORE_MASK); + statement.BindInt(3, transition); + statement.BindInt64(4, + max_results ? max_results : std::numeric_limits<int64>::max()); + + FillVisitVector(statement, visits); +} + +void VisitDatabase::GetVisibleVisitsInRange(base::Time begin_time, + base::Time end_time, + int max_count, + VisitVector* visits) { + visits->clear(); + // The visit_time values can be duplicated in a redirect chain, so we sort + // by id too, to ensure a consistent ordering just in case. + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " + "WHERE visit_time >= ? AND visit_time < ? " + "AND (transition & ?) != 0 " // CHAIN_END + "AND (transition & ?) NOT IN (?, ?, ?) " // NO SUBFRAME or + // KEYWORD_GENERATED + "ORDER BY visit_time DESC, id DESC")); + if (!statement) + return; + + // Note that we use min/max values for querying unlimited ranges of time using + // the same statement. Since the time has an index, this will be about the + // same amount of work as just doing a query for everything with no qualifier. + int64 end = end_time.ToInternalValue(); + statement.BindInt64(0, begin_time.ToInternalValue()); + statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max()); + statement.BindInt(2, PageTransition::CHAIN_END); + statement.BindInt(3, PageTransition::CORE_MASK); + statement.BindInt(4, PageTransition::AUTO_SUBFRAME); + statement.BindInt(5, PageTransition::MANUAL_SUBFRAME); + statement.BindInt(6, PageTransition::KEYWORD_GENERATED); + + std::set<URLID> found_urls; + while (statement.Step()) { + VisitRow visit; + FillVisitRow(statement, &visit); + // Make sure the URL this visit corresponds to is unique. + if (found_urls.find(visit.url_id) != found_urls.end()) + continue; + found_urls.insert(visit.url_id); + visits->push_back(visit); + + if (max_count > 0 && static_cast<int>(visits->size()) >= max_count) + break; + } +} + +VisitID VisitDatabase::GetMostRecentVisitForURL(URLID url_id, + VisitRow* visit_row) { + // The visit_time values can be duplicated in a redirect chain, so we sort + // by id too, to ensure a consistent ordering just in case. + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " + "WHERE url=? " + "ORDER BY visit_time DESC, id DESC " + "LIMIT 1")); + if (!statement) + return 0; + + statement.BindInt64(0, url_id); + if (!statement.Step()) + return 0; // No visits for this URL. + + if (visit_row) { + FillVisitRow(statement, visit_row); + return visit_row->visit_id; + } + return statement.ColumnInt64(0); +} + +bool VisitDatabase::GetMostRecentVisitsForURL(URLID url_id, + int max_results, + VisitVector* visits) { + visits->clear(); + + // The visit_time values can be duplicated in a redirect chain, so we sort + // by id too, to ensure a consistent ordering just in case. + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS + "FROM visits " + "WHERE url=? " + "ORDER BY visit_time DESC, id DESC " + "LIMIT ?")); + if (!statement) + return false; + + statement.BindInt64(0, url_id); + statement.BindInt(1, max_results); + FillVisitVector(statement, visits); + return true; +} + +bool VisitDatabase::GetRedirectFromVisit(VisitID from_visit, + VisitID* to_visit, + GURL* to_url) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT v.id,u.url " + "FROM visits v JOIN urls u ON v.url = u.id " + "WHERE v.from_visit = ? " + "AND (v.transition & ?) != 0")); // IS_REDIRECT_MASK + if (!statement) + return false; + + statement.BindInt64(0, from_visit); + statement.BindInt(1, PageTransition::IS_REDIRECT_MASK); + + if (!statement.Step()) + return false; // No redirect from this visit. + if (to_visit) + *to_visit = statement.ColumnInt64(0); + if (to_url) + *to_url = GURL(statement.ColumnString(1)); + return true; +} + +bool VisitDatabase::GetRedirectToVisit(VisitID to_visit, + VisitID* from_visit, + GURL* from_url) { + VisitRow row; + if (!GetRowForVisit(to_visit, &row)) + return false; + + if (from_visit) + *from_visit = row.referring_visit; + + if (from_url) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT u.url " + "FROM visits v JOIN urls u ON v.url = u.id " + "WHERE v.id = ?")); + statement.BindInt64(0, row.referring_visit); + + if (!statement.Step()) + return false; + + *from_url = GURL(statement.ColumnString(0)); + } + return true; +} + +bool VisitDatabase::GetVisitCountToHost(const GURL& url, + int* count, + base::Time* first_visit) { + if (!url.SchemeIs(chrome::kHttpScheme) && !url.SchemeIs(chrome::kHttpsScheme)) + return false; + + // We need to search for URLs with a matching host/port. One way to query for + // this is to use the LIKE operator, eg 'url LIKE http://google.com/%'. This + // is inefficient though in that it doesn't use the index and each entry must + // be visited. The same query can be executed by using >= and < operator. + // The query becomes: + // 'url >= http://google.com/' and url < http://google.com0'. + // 0 is used as it is one character greater than '/'. + GURL search_url(url); + const std::string host_query_min = search_url.GetOrigin().spec(); + + if (host_query_min.empty()) + return false; + + std::string host_query_max = host_query_min; + host_query_max[host_query_max.size() - 1] = '0'; + + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT MIN(v.visit_time), COUNT(*) " + "FROM visits v INNER JOIN urls u ON v.url = u.id " + "WHERE (u.url >= ? AND u.url < ?)")); + if (!statement) + return false; + + statement.BindString(0, host_query_min); + statement.BindString(1, host_query_max); + + if (!statement.Step()) { + // We've never been to this page before. + *count = 0; + return true; + } + + *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0)); + *count = statement.ColumnInt(1); + return true; +} + +bool VisitDatabase::GetStartDate(base::Time* first_visit) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT MIN(visit_time) FROM visits WHERE visit_time != 0")); + if (!statement || !statement.Step() || statement.ColumnInt64(0) == 0) { + *first_visit = base::Time::Now(); + return false; + } + *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0)); + return true; +} + +} // namespace history diff --git a/chrome/browser/history/visit_database.h b/chrome/browser/history/visit_database.h new file mode 100644 index 0000000..a6dbf3c --- /dev/null +++ b/chrome/browser/history/visit_database.h @@ -0,0 +1,168 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_VISIT_DATABASE_H_ +#define CHROME_BROWSER_HISTORY_VISIT_DATABASE_H_ + +#include "chrome/browser/history/history_types.h" + +namespace sql { +class Connection; +class Statement; +} + +namespace history { + +// A visit database is one which stores visits for URLs, that is, times and +// linking information. A visit database must also be a URLDatabase, as this +// modifies tables used by URLs directly and could be thought of as inheriting +// from URLDatabase. However, this inheritance is not explicit as things would +// get too complicated and have multiple inheritance. +class VisitDatabase { + public: + // Must call InitVisitTable() before using to make sure the database is + // initialized. + VisitDatabase(); + virtual ~VisitDatabase(); + + // Deletes the visit table. Used for rapidly clearing all visits. In this + // case, InitVisitTable would be called immediately afterward to re-create it. + // Returns true on success. + bool DropVisitTable(); + + // Adds a line to the visit database with the given information, returning + // the added row ID on success, 0 on failure. The given visit is updated with + // the new row ID on success. + VisitID AddVisit(VisitRow* visit); + + // Deletes the given visit from the database. If a visit with the given ID + // doesn't exist, it will not do anything. + void DeleteVisit(const VisitRow& visit); + + // Query a VisitInfo giving an visit id, filling the given VisitRow. + // Returns true on success. + bool GetRowForVisit(VisitID visit_id, VisitRow* out_visit); + + // Updates an existing row. The new information is set on the row, using the + // VisitID as the key. The visit must exist. Returns true on success. + bool UpdateVisitRow(const VisitRow& visit); + + // Fills in the given vector with all of the visits for the given page ID, + // sorted in ascending order of date. Returns true on success (although there + // may still be no matches). + bool GetVisitsForURL(URLID url_id, VisitVector* visits); + + // Fills all visits in the time range [begin, end) to the given vector. Either + // time can be is_null(), in which case the times in that direction are + // unbounded. + // + // If |max_results| is non-zero, up to that many results will be returned. If + // there are more results than that, the oldest ones will be returned. (This + // is used for history expiration.) + // + // The results will be in increasing order of date. + void GetAllVisitsInRange(base::Time begin_time, base::Time end_time, + int max_results, VisitVector* visits); + + // Fills all visits with specified transition in the time range [begin, end) + // to the given vector. Either time can be is_null(), in which case the times + // in that direction are unbounded. + // + // If |max_results| is non-zero, up to that many results will be returned. If + // there are more results than that, the oldest ones will be returned. (This + // is used for history expiration.) + // + // The results will be in increasing order of date. + void GetVisitsInRangeForTransition(base::Time begin_time, + base::Time end_time, + int max_results, + PageTransition::Type transition, + VisitVector* visits); + + // Fills all visits in the given time range into the given vector that should + // be user-visible, which excludes things like redirects and subframes. The + // begin time is inclusive, the end time is exclusive. Either time can be + // is_null(), in which case the times in that direction are unbounded. + // + // Up to |max_count| visits will be returned. If there are more visits than + // that, the most recent |max_count| will be returned. If 0, all visits in the + // range will be computed. + // + // Only one visit for each URL will be returned, and it will be the most + // recent one in the time range. + void GetVisibleVisitsInRange(base::Time begin_time, base::Time end_time, + int max_count, + VisitVector* visits); + + // Returns the visit ID for the most recent visit of the given URL ID, or 0 + // if there is no visit for the URL. + // + // If non-NULL, the given visit row will be filled with the information of + // the found visit. When no visit is found, the row will be unchanged. + VisitID GetMostRecentVisitForURL(URLID url_id, + VisitRow* visit_row); + + // Returns the |max_results| most recent visit sessions for |url_id|. + // + // Returns false if there's a failure preparing the statement. True + // otherwise. (No results are indicated with an empty |visits| + // vector.) + bool GetMostRecentVisitsForURL(URLID url_id, + int max_results, + VisitVector* visits); + + // Finds a redirect coming from the given |from_visit|. If a redirect is + // found, it fills the visit ID and URL into the out variables and returns + // true. If there is no redirect from the given visit, returns false. + // + // If there is more than one redirect, this will compute a random one. But + // duplicates should be very rare, and we don't actually care which one we + // get in most cases. These will occur when the user goes back and gets + // redirected again. + // + // to_visit and to_url can be NULL in which case they are ignored. + bool GetRedirectFromVisit(VisitID from_visit, + VisitID* to_visit, + GURL* to_url); + + // Similar to the above function except finds a redirect going to a given + // |to_visit|. + bool GetRedirectToVisit(VisitID to_visit, + VisitID* from_visit, + GURL* from_url); + + // Returns the number of visits to all urls on the scheme/host/post + // identified by url. This is only valid for http and https urls (all other + // schemes are ignored and false is returned). + // count is set to the number of visits, first_visit is set to the first time + // the host was visited. Returns true on success. + bool GetVisitCountToHost(const GURL& url, int* count, + base::Time* first_visit); + + // Get the time of the first item in our database. + bool GetStartDate(base::Time* first_visit); + + protected: + // Returns the database for the functions in this interface. + virtual sql::Connection& GetDB() = 0; + + // Called by the derived classes on initialization to make sure the tables + // and indices are properly set up. Must be called before anything else. + bool InitVisitTable(); + + // Convenience to fill a VisitRow. Assumes the visit values are bound starting + // at index 0. + static void FillVisitRow(sql::Statement& statement, VisitRow* visit); + + // Convenience to fill a VisitVector. Assumes that statement.step() + // hasn't happened yet. + static void FillVisitVector(sql::Statement& statement, VisitVector* visits); + + private: + DISALLOW_COPY_AND_ASSIGN(VisitDatabase); +}; + +} // history + +#endif // CHROME_BROWSER_HISTORY_VISIT_DATABASE_H_ diff --git a/chrome/browser/history/visit_database_unittest.cc b/chrome/browser/history/visit_database_unittest.cc new file mode 100644 index 0000000..ebc2e1b --- /dev/null +++ b/chrome/browser/history/visit_database_unittest.cc @@ -0,0 +1,230 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "app/sql/connection.h" +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/string_util.h" +#include "chrome/browser/history/url_database.h" +#include "chrome/browser/history/visit_database.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/platform_test.h" + +using base::Time; +using base::TimeDelta; + +namespace history { + +namespace { + +bool IsVisitInfoEqual(const VisitRow& a, + const VisitRow& b) { + return a.visit_id == b.visit_id && + a.url_id == b.url_id && + a.visit_time == b.visit_time && + a.referring_visit == b.referring_visit && + a.transition == b.transition && + a.is_indexed == b.is_indexed; +} + +} // namespace + +class VisitDatabaseTest : public PlatformTest, + public URLDatabase, + public VisitDatabase { + public: + VisitDatabaseTest() { + } + + private: + // Test setup. + void SetUp() { + PlatformTest::SetUp(); + FilePath temp_dir; + PathService::Get(base::DIR_TEMP, &temp_dir); + db_file_ = temp_dir.AppendASCII("VisitTest.db"); + file_util::Delete(db_file_, false); + + EXPECT_TRUE(db_.Open(db_file_)); + + // Initialize the tables for this test. + CreateURLTable(false); + CreateMainURLIndex(); + CreateSupplimentaryURLIndices(); + InitVisitTable(); + } + void TearDown() { + db_.Close(); + file_util::Delete(db_file_, false); + PlatformTest::TearDown(); + } + + // Provided for URL/VisitDatabase. + virtual sql::Connection& GetDB() { + return db_; + } + + FilePath db_file_; + sql::Connection db_; +}; + +TEST_F(VisitDatabaseTest, Add) { + // Add one visit. + VisitRow visit_info1(1, Time::Now(), 0, PageTransition::LINK, 0); + EXPECT_TRUE(AddVisit(&visit_info1)); + + // Add second visit for the same page. + VisitRow visit_info2(visit_info1.url_id, + visit_info1.visit_time + TimeDelta::FromSeconds(1), 1, + PageTransition::TYPED, 0); + EXPECT_TRUE(AddVisit(&visit_info2)); + + // Add third visit for a different page. + VisitRow visit_info3(2, + visit_info1.visit_time + TimeDelta::FromSeconds(2), 0, + PageTransition::LINK, 0); + EXPECT_TRUE(AddVisit(&visit_info3)); + + // Query the first two. + std::vector<VisitRow> matches; + EXPECT_TRUE(GetVisitsForURL(visit_info1.url_id, &matches)); + EXPECT_EQ(static_cast<size_t>(2), matches.size()); + + // Make sure we got both (order in result set is visit time). + EXPECT_TRUE(IsVisitInfoEqual(matches[0], visit_info1) && + IsVisitInfoEqual(matches[1], visit_info2)); +} + +TEST_F(VisitDatabaseTest, Delete) { + // Add three visits that form a chain of navigation, and then delete the + // middle one. We should be left with the outer two visits, and the chain + // should link them. + static const int kTime1 = 1000; + VisitRow visit_info1(1, Time::FromInternalValue(kTime1), 0, + PageTransition::LINK, 0); + EXPECT_TRUE(AddVisit(&visit_info1)); + + static const int kTime2 = kTime1 + 1; + VisitRow visit_info2(1, Time::FromInternalValue(kTime2), + visit_info1.visit_id, PageTransition::LINK, 0); + EXPECT_TRUE(AddVisit(&visit_info2)); + + static const int kTime3 = kTime2 + 1; + VisitRow visit_info3(1, Time::FromInternalValue(kTime3), + visit_info2.visit_id, PageTransition::LINK, 0); + EXPECT_TRUE(AddVisit(&visit_info3)); + + // First make sure all the visits are there. + std::vector<VisitRow> matches; + EXPECT_TRUE(GetVisitsForURL(visit_info1.url_id, &matches)); + EXPECT_EQ(static_cast<size_t>(3), matches.size()); + EXPECT_TRUE(IsVisitInfoEqual(matches[0], visit_info1) && + IsVisitInfoEqual(matches[1], visit_info2) && + IsVisitInfoEqual(matches[2], visit_info3)); + + // Delete the middle one. + DeleteVisit(visit_info2); + + // The outer two should be left, and the last one should have the first as + // the referrer. + visit_info3.referring_visit = visit_info1.visit_id; + matches.clear(); + EXPECT_TRUE(GetVisitsForURL(visit_info1.url_id, &matches)); + EXPECT_EQ(static_cast<size_t>(2), matches.size()); + EXPECT_TRUE(IsVisitInfoEqual(matches[0], visit_info1) && + IsVisitInfoEqual(matches[1], visit_info3)); +} + +TEST_F(VisitDatabaseTest, Update) { + // Make something in the database. + VisitRow original(1, Time::Now(), 23, 22, 19); + AddVisit(&original); + + // Mutate that row. + VisitRow modification(original); + modification.url_id = 2; + modification.transition = PageTransition::TYPED; + modification.visit_time = Time::Now() + TimeDelta::FromDays(1); + modification.referring_visit = 9292; + modification.is_indexed = true; + UpdateVisitRow(modification); + + // Check that the mutated version was written. + VisitRow final; + GetRowForVisit(original.visit_id, &final); + EXPECT_TRUE(IsVisitInfoEqual(modification, final)); +} + +// TODO(brettw) write test for GetMostRecentVisitForURL! + +TEST_F(VisitDatabaseTest, GetVisibleVisitsInRange) { + // Add one visit. + VisitRow visit_info1(1, Time::Now(), 0, + static_cast<PageTransition::Type>(PageTransition::LINK | + PageTransition::CHAIN_START | + PageTransition::CHAIN_END), + 0); + visit_info1.visit_id = 1; + EXPECT_TRUE(AddVisit(&visit_info1)); + + // Add second visit for the same page. + VisitRow visit_info2(visit_info1.url_id, + visit_info1.visit_time + TimeDelta::FromSeconds(1), 1, + static_cast<PageTransition::Type>(PageTransition::TYPED | + PageTransition::CHAIN_START | + PageTransition::CHAIN_END), + 0); + visit_info2.visit_id = 2; + EXPECT_TRUE(AddVisit(&visit_info2)); + + // Add third visit for a different page. + VisitRow visit_info3(2, + visit_info1.visit_time + TimeDelta::FromSeconds(2), 0, + static_cast<PageTransition::Type>(PageTransition::LINK | + PageTransition::CHAIN_START), + 0); + visit_info3.visit_id = 3; + EXPECT_TRUE(AddVisit(&visit_info3)); + + // Add a redirect visit from the last page. + VisitRow visit_info4(3, + visit_info1.visit_time + TimeDelta::FromSeconds(3), visit_info3.visit_id, + static_cast<PageTransition::Type>(PageTransition::SERVER_REDIRECT | + PageTransition::CHAIN_END), + 0); + visit_info4.visit_id = 4; + EXPECT_TRUE(AddVisit(&visit_info4)); + + // Add a subframe visit. + VisitRow visit_info5(4, + visit_info1.visit_time + TimeDelta::FromSeconds(4), visit_info4.visit_id, + static_cast<PageTransition::Type>(PageTransition::AUTO_SUBFRAME | + PageTransition::CHAIN_START | + PageTransition::CHAIN_END), + 0); + visit_info5.visit_id = 5; + EXPECT_TRUE(AddVisit(&visit_info5)); + + // Query the visits for all time, we should not get the first (duplicate of + // the second) or the redirect or subframe visits. + VisitVector results; + GetVisibleVisitsInRange(Time(), Time(), 0, &results); + ASSERT_EQ(static_cast<size_t>(2), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], visit_info4) && + IsVisitInfoEqual(results[1], visit_info2)); + + // Query a time range and make sure beginning is inclusive and ending is + // exclusive. + GetVisibleVisitsInRange(visit_info2.visit_time, visit_info4.visit_time, 0, + &results); + ASSERT_EQ(static_cast<size_t>(1), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], visit_info2)); + + // Query for a max count and make sure we get only that number. + GetVisibleVisitsInRange(Time(), Time(), 1, &results); + ASSERT_EQ(static_cast<size_t>(1), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], visit_info4)); +} +} // namespace history diff --git a/chrome/browser/history/visit_tracker.cc b/chrome/browser/history/visit_tracker.cc new file mode 100644 index 0000000..7bb0011 --- /dev/null +++ b/chrome/browser/history/visit_tracker.cc @@ -0,0 +1,106 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/visit_tracker.h" + +#include "base/stl_util-inl.h" + +namespace history { + +// When the list gets longer than 'MaxItems', CleanupTransitionList will resize +// the list down to 'ResizeTo' size. This is so we only do few block moves of +// the data rather than constantly shuffle stuff around in the vector. +static const size_t kMaxItemsInTransitionList = 96; +static const size_t kResizeBigTransitionListTo = 64; +COMPILE_ASSERT(kResizeBigTransitionListTo < kMaxItemsInTransitionList, + max_items_must_be_larger_than_resize_to); + +VisitTracker::VisitTracker() { +} + +VisitTracker::~VisitTracker() { + STLDeleteContainerPairSecondPointers(hosts_.begin(), hosts_.end()); +} + +// This function is potentially slow because it may do up to two brute-force +// searches of the transitions list. This transitions list is kept to a +// relatively small number by CleanupTransitionList so it shouldn't be a big +// deal. However, if this ends up being noticable for performance, we may want +// to optimize lookup. +VisitID VisitTracker::GetLastVisit(const void* host, + int32 page_id, + const GURL& referrer) { + if (referrer.is_empty() || !host) + return 0; + + HostList::iterator i = hosts_.find(host); + if (i == hosts_.end()) + return 0; // We don't have any entries for this host. + TransitionList& transitions = *i->second; + + // Recall that a page ID is associated with a single session history entry. + // In the case of automatically loaded iframes, many visits/URLs can have the + // same page ID. + // + // We search backwards, starting at the current page ID, for the referring + // URL. This won't always be correct. For example, if a render process has + // the same page open in two different tabs, or even in two different frames, + // we can get confused about which was which. We can have the renderer + // report more precise referrer information in the future, but this is a + // hard problem and doesn't affect much in terms of real-world issues. + // + // We assume that the page IDs are increasing over time, so larger IDs than + // the current input ID happened in the future (this will occur if the user + // goes back). We can ignore future transitions because if you navigate, go + // back, and navigate some more, we'd like to have one node with two out + // edges in our visit graph. + for (int i = static_cast<int>(transitions.size()) - 1; i >= 0; i--) { + if (transitions[i].page_id <= page_id && transitions[i].url == referrer) { + // Found it. + return transitions[i].visit_id; + } + } + + // We can't find the referrer. + return 0; +} + +void VisitTracker::AddVisit(const void* host, + int32 page_id, + const GURL& url, + VisitID visit_id) { + TransitionList* transitions = hosts_[host]; + if (!transitions) { + transitions = new TransitionList; + hosts_[host] = transitions; + } + + Transition t; + t.url = url; + t.page_id = page_id; + t.visit_id = visit_id; + transitions->push_back(t); + + CleanupTransitionList(transitions); +} + +void VisitTracker::NotifyRenderProcessHostDestruction(const void* host) { + HostList::iterator i = hosts_.find(host); + if (i == hosts_.end()) + return; // We don't have any entries for this host. + + delete i->second; + hosts_.erase(i); +} + + +void VisitTracker::CleanupTransitionList(TransitionList* transitions) { + if (transitions->size() <= kMaxItemsInTransitionList) + return; // Nothing to do. + + transitions->erase(transitions->begin(), + transitions->begin() + kResizeBigTransitionListTo); +} + +} // namespace history diff --git a/chrome/browser/history/visit_tracker.h b/chrome/browser/history/visit_tracker.h new file mode 100644 index 0000000..43de7a0 --- /dev/null +++ b/chrome/browser/history/visit_tracker.h @@ -0,0 +1,66 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_VISIT_TRACKER_H__ +#define CHROME_BROWSER_HISTORY_VISIT_TRACKER_H__ + +#include <map> +#include <vector> + +#include "base/basictypes.h" +#include "chrome/browser/history/history_types.h" + +namespace history { + +// Tracks history transitions between pages. The history backend uses this to +// link up page transitions to form a chain of page visits, and to set the +// transition type properly. +// +// This class is not thread safe. +class VisitTracker { + public: + VisitTracker(); + ~VisitTracker(); + + // Notifications ------------------------------------------------------------- + + void AddVisit(const void* host, + int32 page_id, + const GURL& url, + VisitID visit_id); + + // When a RenderProcessHost is destroyed, we want to clear out our saved + // transitions/visit IDs for it. + void NotifyRenderProcessHostDestruction(const void* host); + + // Querying ------------------------------------------------------------------ + + // Returns the visit ID for the transition given information about the visit + // supplied by the renderer. We will return 0 if there is no appropriate + // referring visit. + VisitID GetLastVisit(const void* host, int32 page_id, const GURL& url); + + private: + struct Transition { + GURL url; // URL that the event happened to. + int32 page_id; // ID generated by the render process host. + VisitID visit_id; // Visit ID generated by history. + }; + typedef std::vector<Transition> TransitionList; + typedef std::map<const void*, TransitionList*> HostList; + + // Expires oldish items in the given transition list. This keeps the list + // size small by removing items that are unlikely to be needed, which is + // important for GetReferrer which does brute-force searches of this list. + void CleanupTransitionList(TransitionList* transitions); + + // Maps render view hosts to lists of recent transitions. + HostList hosts_; + + DISALLOW_COPY_AND_ASSIGN(VisitTracker); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_VISIT_TRACKER_H__ diff --git a/chrome/browser/history/visit_tracker_unittest.cc b/chrome/browser/history/visit_tracker_unittest.cc new file mode 100644 index 0000000..5c84d28 --- /dev/null +++ b/chrome/browser/history/visit_tracker_unittest.cc @@ -0,0 +1,138 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/visit_tracker.h" +#include "base/basictypes.h" +#include "testing/gtest/include/gtest/gtest.h" + +using history::VisitTracker; + +namespace { + +struct VisitToTest { + // Identifies the host, we'll cast this to a pointer when querying (the + // tracker isn't allowed to dereference this pointer). + int host; + int32 page_id; + + // Used when adding this to the tracker + const char* url; + const history::VisitID visit_id; + + // Used when finding the referrer + const char* referrer; + + // the correct referring visit ID to compare to the computed one + history::VisitID referring_visit_id; +}; + +// The tracker uses RenderProcessHost pointers for scoping but never +// dereferences them. We use ints because it's easier. This function converts +// between the two. +void* MakeFakeHost(int id) { + void* host = 0; + memcpy(&host, &id, sizeof(int)); + return host; +} + +void RunTest(VisitTracker* tracker, VisitToTest* test, int test_count) { + for (int i = 0; i < test_count; i++) { + // Our host pointer is actually just an int, convert it (it will not get + // dereferenced). + void* host = MakeFakeHost(test[i].host); + + // Check the referrer for this visit. + history::VisitID ref_visit = tracker->GetLastVisit( + host, test[i].page_id, GURL(test[i].referrer)); + EXPECT_EQ(test[i].referring_visit_id, ref_visit); + + // Now add this visit. + tracker->AddVisit(host, test[i].page_id, GURL(test[i].url), + test[i].visit_id); + } +} + +} // namespace + +// A simple test that makes sure we transition between main pages in the +// presence of back/forward. +TEST(VisitTracker, SimpleTransitions) { + VisitToTest test_simple[] = { + // Started here: + {1, 1, "http://www.google.com/", 1, "", 0}, + // Clicked a link: + {1, 2, "http://images.google.com/", 2, "http://www.google.com/", 1}, + // Went back, then clicked a link: + {1, 3, "http://video.google.com/", 3, "http://www.google.com/", 1}, + }; + + VisitTracker tracker; + RunTest(&tracker, test_simple, arraysize(test_simple)); +} + +// Test that referrer is properly computed when there are different frame +// navigations happening. +TEST(VisitTracker, Frames) { + VisitToTest test_frames[] = { + // Started here: + {1, 1, "http://foo.com/", 1, "", 0}, + // Which had an auto-loaded subframe: + {1, 1, "http://foo.com/ad.html", 2, "http://foo.com/", 1}, + // ...and another auto-loaded subframe: + {1, 1, "http://foo.com/ad2.html", 3, "http://foo.com/", 1}, + // ...and the user navigated the first subframe to somwhere else + {1, 2, "http://bar.com/", 4, "http://foo.com/ad.html", 2}, + // ...and then the second subframe somewhere else + {1, 3, "http://fud.com/", 5, "http://foo.com/ad2.html", 3}, + // ...and then the main frame somewhere else. + {1, 4, "http://www.google.com/", 6, "http://foo.com/", 1}, + }; + + VisitTracker tracker; + RunTest(&tracker, test_frames, arraysize(test_frames)); +} + +// Test frame navigation to make sure that the referrer is properly computed +// when there are multiple processes navigating the same pages. +TEST(VisitTracker, MultiProcess) { + VisitToTest test_processes[] = { + // Process 1 and 2 start here: + {1, 1, "http://foo.com/", 1, "", 0}, + {2, 1, "http://foo.com/", 2, "", 0}, + // They have some subframes: + {1, 1, "http://foo.com/ad.html", 3, "http://foo.com/", 1}, + {2, 1, "http://foo.com/ad.html", 4, "http://foo.com/", 2}, + // Subframes are navigated: + {1, 2, "http://bar.com/", 5, "http://foo.com/ad.html", 3}, + {2, 2, "http://bar.com/", 6, "http://foo.com/ad.html", 4}, + // Main frame is navigated: + {1, 3, "http://www.google.com/", 7, "http://foo.com/", 1}, + {2, 3, "http://www.google.com/", 8, "http://foo.com/", 2}, + }; + + VisitTracker tracker; + RunTest(&tracker, test_processes, arraysize(test_processes)); +} + +// Test that processes get removed properly. +TEST(VisitTracker, ProcessRemove) { + // Simple navigation from one process. + VisitToTest part1[] = { + {1, 1, "http://www.google.com/", 1, "", 0}, + {1, 2, "http://images.google.com/", 2, "http://www.google.com/", 1}, + }; + + VisitTracker tracker; + RunTest(&tracker, part1, arraysize(part1)); + + // Say that process has been destroyed. + tracker.NotifyRenderProcessHostDestruction(MakeFakeHost(1)); + + // Simple navigation from a new process with the same ID, it should not find + // a referrer. + VisitToTest part2[] = { + {1, 1, "http://images.google.com/", 2, "http://www.google.com/", 0}, + }; + RunTest(&tracker, part2, arraysize(part2)); +} diff --git a/chrome/browser/history/visitsegment_database.cc b/chrome/browser/history/visitsegment_database.cc new file mode 100644 index 0000000..f94d713 --- /dev/null +++ b/chrome/browser/history/visitsegment_database.cc @@ -0,0 +1,386 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/visitsegment_database.h" + +#include <math.h> + +#include <algorithm> +#include <string> +#include <vector> + +#include "app/sql/connection.h" +#include "app/sql/statement.h" +#include "base/logging.h" +#include "base/stl_util-inl.h" +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/history/page_usage_data.h" + +// The following tables are used to store url segment information. +// +// segments +// id Primary key +// name A unique string to represent that segment. (URL derived) +// url_id ID of the url currently used to represent this segment. +// pres_index index used to store a fixed presentation position. +// +// segment_usage +// id Primary key +// segment_id Corresponding segment id +// time_slot time stamp identifying for what day this entry is about +// visit_count Number of visit in the segment +// + +namespace history { + +VisitSegmentDatabase::VisitSegmentDatabase() { +} + +VisitSegmentDatabase::~VisitSegmentDatabase() { +} + +bool VisitSegmentDatabase::InitSegmentTables() { + // Segments table. + if (!GetDB().DoesTableExist("segments")) { + if (!GetDB().Execute("CREATE TABLE segments (" + "id INTEGER PRIMARY KEY," + "name VARCHAR," + "url_id INTEGER NON NULL," + "pres_index INTEGER DEFAULT -1 NOT NULL)")) { + NOTREACHED(); + return false; + } + + if (!GetDB().Execute("CREATE INDEX segments_name ON segments(name)")) { + NOTREACHED(); + return false; + } + } + + // This was added later, so we need to try to create it even if the table + // already exists. + GetDB().Execute("CREATE INDEX segments_url_id ON segments(url_id)"); + + // Segment usage table. + if (!GetDB().DoesTableExist("segment_usage")) { + if (!GetDB().Execute("CREATE TABLE segment_usage (" + "id INTEGER PRIMARY KEY," + "segment_id INTEGER NOT NULL," + "time_slot INTEGER NOT NULL," + "visit_count INTEGER DEFAULT 0 NOT NULL)")) { + NOTREACHED(); + return false; + } + if (!GetDB().Execute( + "CREATE INDEX segment_usage_time_slot_segment_id ON " + "segment_usage(time_slot, segment_id)")) { + NOTREACHED(); + return false; + } + } + + // Added in a later version, so we always need to try to creat this index. + GetDB().Execute("CREATE INDEX segments_usage_seg_id " + "ON segment_usage(segment_id)"); + + // Presentation index table. + // + // Important note: + // Right now, this table is only used to store the presentation index. + // If you need to add more columns, keep in mind that rows are currently + // deleted when the presentation index is changed to -1. + // See SetPagePresentationIndex() in this file + if (!GetDB().DoesTableExist("presentation")) { + if (!GetDB().Execute("CREATE TABLE presentation(" + "url_id INTEGER PRIMARY KEY," + "pres_index INTEGER NOT NULL)")) + return false; + } + return true; +} + +bool VisitSegmentDatabase::DropSegmentTables() { + // Dropping the tables will implicitly delete the indices. + return GetDB().Execute("DROP TABLE segments") && + GetDB().Execute("DROP TABLE segment_usage"); +} + +// Note: the segment name is derived from the URL but is not a URL. It is +// a string that can be easily recreated from various URLS. Maybe this should +// be an MD5 to limit the length. +// +// static +std::string VisitSegmentDatabase::ComputeSegmentName(const GURL& url) { + // TODO(brettw) this should probably use the registry controlled + // domains service. + GURL::Replacements r; + const char kWWWDot[] = "www."; + const int kWWWDotLen = arraysize(kWWWDot) - 1; + + std::string host = url.host(); + const char* host_c = host.c_str(); + // Remove www. to avoid some dups. + if (static_cast<int>(host.size()) > kWWWDotLen && + LowerCaseEqualsASCII(host_c, host_c + kWWWDotLen, kWWWDot)) { + r.SetHost(host.c_str(), + url_parse::Component(kWWWDotLen, + static_cast<int>(host.size()) - kWWWDotLen)); + } + // Remove other stuff we don't want. + r.ClearUsername(); + r.ClearPassword(); + r.ClearQuery(); + r.ClearRef(); + r.ClearPort(); + + return url.ReplaceComponents(r).spec(); +} + +SegmentID VisitSegmentDatabase::GetSegmentNamed( + const std::string& segment_name) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT id FROM segments WHERE name = ?")); + if (!statement) + return 0; + + statement.BindString(0, segment_name); + if (statement.Step()) + return statement.ColumnInt64(0); + return 0; +} + +bool VisitSegmentDatabase::UpdateSegmentRepresentationURL(SegmentID segment_id, + URLID url_id) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE segments SET url_id = ? WHERE id = ?")); + if (!statement) + return false; + + statement.BindInt64(0, url_id); + statement.BindInt64(1, segment_id); + return statement.Run(); +} + +URLID VisitSegmentDatabase::GetSegmentRepresentationURL(SegmentID segment_id) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT url_id FROM segments WHERE id = ?")); + if (!statement) + return 0; + + statement.BindInt64(0, segment_id); + if (statement.Step()) + return statement.ColumnInt64(0); + return 0; +} + +SegmentID VisitSegmentDatabase::CreateSegment(URLID url_id, + const std::string& segment_name) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO segments (name, url_id) VALUES (?,?)")); + if (!statement) + return false; + + statement.BindString(0, segment_name); + statement.BindInt64(1, url_id); + if (statement.Run()) + return GetDB().GetLastInsertRowId(); + return false; +} + +bool VisitSegmentDatabase::IncreaseSegmentVisitCount(SegmentID segment_id, + base::Time ts, + int amount) { + base::Time t = ts.LocalMidnight(); + + sql::Statement select(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT id, visit_count FROM segment_usage " + "WHERE time_slot = ? AND segment_id = ?")); + if (!select) + return false; + + select.BindInt64(0, t.ToInternalValue()); + select.BindInt64(1, segment_id); + if (select.Step()) { + sql::Statement update(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE segment_usage SET visit_count = ? WHERE id = ?")); + if (!update) + return false; + + update.BindInt64(0, select.ColumnInt64(1) + static_cast<int64>(amount)); + update.BindInt64(1, select.ColumnInt64(0)); + return update.Run(); + + } else { + sql::Statement insert(GetDB().GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO segment_usage " + "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); + if (!insert) + return false; + + insert.BindInt64(0, segment_id); + insert.BindInt64(1, t.ToInternalValue()); + insert.BindInt64(2, static_cast<int64>(amount)); + return insert.Run(); + } +} + +void VisitSegmentDatabase::QuerySegmentUsage( + base::Time from_time, + int max_result_count, + std::vector<PageUsageData*>* results) { + // This function gathers the highest-ranked segments in two queries. + // The first gathers scores for all segments. + // The second gathers segment data (url, title, etc.) for the highest-ranked + // segments. + // TODO(evanm): this disregards the "presentation index", which was what was + // used to lock results into position. But the rest of our code currently + // does as well. + + // Gather all the segment scores. + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT segment_id, time_slot, visit_count " + "FROM segment_usage WHERE time_slot >= ? " + "ORDER BY segment_id")); + if (!statement) { + NOTREACHED() << GetDB().GetErrorMessage(); + return; + } + + base::Time ts = from_time.LocalMidnight(); + statement.BindInt64(0, ts.ToInternalValue()); + + base::Time now = base::Time::Now(); + SegmentID last_segment_id = 0; + PageUsageData* pud = NULL; + float score = 0; + while (statement.Step()) { + SegmentID segment_id = statement.ColumnInt64(0); + if (segment_id != last_segment_id) { + if (pud) { + pud->SetScore(score); + results->push_back(pud); + } + + pud = new PageUsageData(segment_id); + score = 0; + last_segment_id = segment_id; + } + + base::Time timeslot = + base::Time::FromInternalValue(statement.ColumnInt64(1)); + int visit_count = statement.ColumnInt(2); + int days_ago = (now - timeslot).InDays(); + + // Score for this day in isolation. + float day_visits_score = 1.0f + log(static_cast<float>(visit_count)); + // Recent visits count more than historical ones, so we multiply in a boost + // related to how long ago this day was. + // This boost is a curve that smoothly goes through these values: + // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x + // at the limit of how far we reach into the past. + float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); + score += recency_boost * day_visits_score; + } + + if (pud) { + pud->SetScore(score); + results->push_back(pud); + } + + // Limit to the top kResultCount results. + sort(results->begin(), results->end(), PageUsageData::Predicate); + if (static_cast<int>(results->size()) > max_result_count) { + STLDeleteContainerPointers(results->begin() + max_result_count, + results->end()); + results->resize(max_result_count); + } + + // Now fetch the details about the entries we care about. + sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT urls.url, urls.title FROM urls " + "JOIN segments ON segments.url_id = urls.id " + "WHERE segments.id = ?")); + if (!statement2) { + NOTREACHED() << GetDB().GetErrorMessage(); + return; + } + for (size_t i = 0; i < results->size(); ++i) { + PageUsageData* pud = (*results)[i]; + statement2.BindInt64(0, pud->GetID()); + if (statement2.Step()) { + pud->SetURL(GURL(statement2.ColumnString(0))); + pud->SetTitle(UTF8ToUTF16(statement2.ColumnString(1))); + } + statement2.Reset(); + } +} + +void VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM segment_usage WHERE time_slot < ?")); + if (!statement) + return; + + statement.BindInt64(0, older_than.LocalMidnight().ToInternalValue()); + if (!statement.Run()) + NOTREACHED(); +} + +void VisitSegmentDatabase::SetSegmentPresentationIndex(SegmentID segment_id, + int index) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE segments SET pres_index = ? WHERE id = ?")); + if (!statement) + return; + + statement.BindInt(0, index); + statement.BindInt64(1, segment_id); + if (!statement.Run()) + NOTREACHED(); + else + DCHECK_EQ(1, GetDB().GetLastChangeCount()); +} + +bool VisitSegmentDatabase::DeleteSegmentForURL(URLID url_id) { + sql::Statement select(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT id FROM segments WHERE url_id = ?")); + if (!select) + return false; + + sql::Statement delete_seg(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM segments WHERE id = ?")); + if (!delete_seg) + return false; + + sql::Statement delete_usage(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM segment_usage WHERE segment_id = ?")); + if (!delete_usage) + return false; + + bool r = true; + select.BindInt64(0, url_id); + // In theory there could not be more than one segment using that URL but we + // loop anyway to cleanup any inconsistency. + while (select.Step()) { + SegmentID segment_id = select.ColumnInt64(0); + + delete_usage.BindInt64(0, segment_id); + if (!delete_usage.Run()) { + NOTREACHED(); + r = false; + } + + delete_seg.BindInt64(0, segment_id); + if (!delete_seg.Run()) { + NOTREACHED(); + r = false; + } + delete_usage.Reset(); + delete_seg.Reset(); + } + return r; +} + +} // namespace history diff --git a/chrome/browser/history/visitsegment_database.h b/chrome/browser/history/visitsegment_database.h new file mode 100644 index 0000000..16f0417 --- /dev/null +++ b/chrome/browser/history/visitsegment_database.h @@ -0,0 +1,88 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_VISITSEGMENT_DATABASE_H_ +#define CHROME_BROWSER_HISTORY_VISITSEGMENT_DATABASE_H_ + +#include "base/basictypes.h" +#include "chrome/browser/history/history_types.h" + +class PageUsageData; + +namespace sql { +class Connection; +} + +namespace history { + +// Tracks pages used for the most visited view. +class VisitSegmentDatabase { + public: + // Must call InitSegmentTables before using any other part of this class. + VisitSegmentDatabase(); + virtual ~VisitSegmentDatabase(); + + // Compute a segment name given a URL. The segment name is currently the + // source url spec less some information such as query strings. + static std::string ComputeSegmentName(const GURL& url); + + // Returns the ID of the segment with the corresponding name, or 0 if there + // is no segment with that name. + SegmentID GetSegmentNamed(const std::string& segment_name); + + // Update the segment identified by |out_segment_id| with the provided URL ID. + // The URL identifies the page that will now represent the segment. If url_id + // is non zero, it is assumed to be the row id of |url|. + bool UpdateSegmentRepresentationURL(SegmentID segment_id, + URLID url_id); + + // Return the ID of the URL currently used to represent this segment or 0 if + // an error occured. + URLID GetSegmentRepresentationURL(SegmentID segment_id); + + // Create a segment for the provided URL ID with the given name. Returns the + // ID of the newly created segment, or 0 on failure. + SegmentID CreateSegment(URLID url_id, const std::string& segment_name); + + // Increase the segment visit count by the provided amount. Return true on + // success. + bool IncreaseSegmentVisitCount(SegmentID segment_id, base::Time ts, + int amount); + + // Compute the segment usage since |from_time| using the provided aggregator. + // A PageUsageData is added in |result| for the highest-scored segments up to + // |max_result_count|. + void QuerySegmentUsage(base::Time from_time, + int max_result_count, + std::vector<PageUsageData*>* result); + + // Delete all the segment usage data which is older than the provided time + // stamp. + void DeleteSegmentData(base::Time older_than); + + // Change the presentation id for the segment identified by |segment_id| + void SetSegmentPresentationIndex(SegmentID segment_id, int index); + + // Delete the segment currently using the provided url for representation. + // This will also delete any associated segment usage data. + bool DeleteSegmentForURL(URLID url_id); + + protected: + // Returns the database for the functions in this interface. + virtual sql::Connection& GetDB() = 0; + + // Creates the tables used by this class if necessary. Returns true on + // success. + bool InitSegmentTables(); + + // Deletes all the segment tables, returning true on success. + bool DropSegmentTables(); + + private: + DISALLOW_COPY_AND_ASSIGN(VisitSegmentDatabase); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_VISITSEGMENT_DATABASE_H_ |