// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/browser/history/history_database.h" #include #include #include #include "base/command_line.h" #include "base/file_util.h" #include "base/metrics/histogram.h" #include "base/rand_util.h" #include "base/strings/string_util.h" #include "base/time/time.h" #include "sql/transaction.h" #if defined(OS_MACOSX) #include "base/mac/mac_util.h" #endif namespace history { namespace { // Current version number. We write databases at the "current" version number, // but any previous version that can read the "compatible" one can make do with // or database without *too* many bad effects. const int kCurrentVersionNumber = 28; const int kCompatibleVersionNumber = 16; const char kEarlyExpirationThresholdKey[] = "early_expiration_threshold"; } // namespace HistoryDatabase::HistoryDatabase() : needs_version_17_migration_(false) { } HistoryDatabase::~HistoryDatabase() { } sql::InitStatus HistoryDatabase::Init(const base::FilePath& history_name) { db_.set_histogram_tag("History"); // Set the exceptional sqlite error handler. db_.set_error_callback(error_callback_); // Set the database page size to something a little larger to give us // better performance (we're typically seek rather than bandwidth limited). // This only has an effect before any tables have been created, otherwise // this is a NOP. Must be a power of 2 and a max of 8192. db_.set_page_size(4096); // Set the cache size. The page size, plus a little extra, times this // value, tells us how much memory the cache will use maximum. // 1000 * 4kB = 4MB // TODO(brettw) scale this value to the amount of available memory. db_.set_cache_size(1000); // Note that we don't set exclusive locking here. That's done by // BeginExclusiveMode below which is called later (we have to be in shared // mode to start out for the in-memory backend to read the data). if (!db_.Open(history_name)) return sql::INIT_FAILURE; // Wrap the rest of init in a tranaction. This will prevent the database from // getting corrupted if we crash in the middle of initialization or migration. sql::Transaction committer(&db_); if (!committer.Begin()) return sql::INIT_FAILURE; #if defined(OS_MACOSX) // Exclude the history file from backups. base::mac::SetFileBackupExclusion(history_name); #endif // Prime the cache. db_.Preload(); // Create the tables and indices. // NOTE: If you add something here, also add it to // RecreateAllButStarAndURLTables. if (!meta_table_.Init(&db_, GetCurrentVersion(), kCompatibleVersionNumber)) return sql::INIT_FAILURE; if (!CreateURLTable(false) || !InitVisitTable() || !InitKeywordSearchTermsTable() || !InitDownloadTable() || !InitSegmentTables()) return sql::INIT_FAILURE; CreateMainURLIndex(); CreateKeywordSearchTermsIndices(); // TODO(benjhayden) Remove at some point. meta_table_.DeleteKey("next_download_id"); // Version check. sql::InitStatus version_status = EnsureCurrentVersion(); if (version_status != sql::INIT_OK) return version_status; return committer.Commit() ? sql::INIT_OK : sql::INIT_FAILURE; } void HistoryDatabase::ComputeDatabaseMetrics( const base::FilePath& history_name) { base::TimeTicks start_time = base::TimeTicks::Now(); int64 file_size = 0; if (!base::GetFileSize(history_name, &file_size)) return; int file_mb = static_cast(file_size / (1024 * 1024)); UMA_HISTOGRAM_MEMORY_MB("History.DatabaseFileMB", file_mb); sql::Statement url_count(db_.GetUniqueStatement("SELECT count(*) FROM urls")); if (!url_count.Step()) return; UMA_HISTOGRAM_COUNTS("History.URLTableCount", url_count.ColumnInt(0)); sql::Statement visit_count(db_.GetUniqueStatement( "SELECT count(*) FROM visits")); if (!visit_count.Step()) return; UMA_HISTOGRAM_COUNTS("History.VisitTableCount", visit_count.ColumnInt(0)); base::Time one_week_ago = base::Time::Now() - base::TimeDelta::FromDays(7); sql::Statement weekly_visit_sql(db_.GetUniqueStatement( "SELECT count(*) FROM visits WHERE visit_time > ?")); weekly_visit_sql.BindInt64(0, one_week_ago.ToInternalValue()); int weekly_visit_count = 0; if (weekly_visit_sql.Step()) weekly_visit_count = weekly_visit_sql.ColumnInt(0); UMA_HISTOGRAM_COUNTS("History.WeeklyVisitCount", weekly_visit_count); base::Time one_month_ago = base::Time::Now() - base::TimeDelta::FromDays(30); sql::Statement monthly_visit_sql(db_.GetUniqueStatement( "SELECT count(*) FROM visits WHERE visit_time > ? AND visit_time <= ?")); monthly_visit_sql.BindInt64(0, one_month_ago.ToInternalValue()); monthly_visit_sql.BindInt64(1, one_week_ago.ToInternalValue()); int older_visit_count = 0; if (monthly_visit_sql.Step()) older_visit_count = monthly_visit_sql.ColumnInt(0); UMA_HISTOGRAM_COUNTS("History.MonthlyVisitCount", older_visit_count + weekly_visit_count); UMA_HISTOGRAM_TIMES("History.DatabaseBasicMetricsTime", base::TimeTicks::Now() - start_time); // Compute the advanced metrics even less often, pending timing data showing // that's not necessary. if (base::RandInt(1, 3) == 3) { start_time = base::TimeTicks::Now(); // Collect all URLs visited within the last month. sql::Statement url_sql(db_.GetUniqueStatement( "SELECT url, last_visit_time FROM urls WHERE last_visit_time > ?")); url_sql.BindInt64(0, one_month_ago.ToInternalValue()); // Count URLs (which will always be unique) and unique hosts within the last // week and last month. int week_url_count = 0; int month_url_count = 0; std::set week_hosts; std::set month_hosts; while (url_sql.Step()) { GURL url(url_sql.ColumnString(0)); base::Time visit_time = base::Time::FromInternalValue(url_sql.ColumnInt64(1)); ++month_url_count; month_hosts.insert(url.host()); if (visit_time > one_week_ago) { ++week_url_count; week_hosts.insert(url.host()); } } UMA_HISTOGRAM_COUNTS("History.WeeklyURLCount", week_url_count); UMA_HISTOGRAM_COUNTS_10000("History.WeeklyHostCount", week_hosts.size()); UMA_HISTOGRAM_COUNTS("History.MonthlyURLCount", month_url_count); UMA_HISTOGRAM_COUNTS_10000("History.MonthlyHostCount", month_hosts.size()); UMA_HISTOGRAM_TIMES("History.DatabaseAdvancedMetricsTime", base::TimeTicks::Now() - start_time); } } void HistoryDatabase::BeginExclusiveMode() { // We can't use set_exclusive_locking() since that only has an effect before // the DB is opened. ignore_result(db_.Execute("PRAGMA locking_mode=EXCLUSIVE")); } // static int HistoryDatabase::GetCurrentVersion() { return kCurrentVersionNumber; } void HistoryDatabase::BeginTransaction() { db_.BeginTransaction(); } void HistoryDatabase::CommitTransaction() { db_.CommitTransaction(); } void HistoryDatabase::RollbackTransaction() { db_.RollbackTransaction(); } bool HistoryDatabase::RecreateAllTablesButURL() { if (!DropVisitTable()) return false; if (!InitVisitTable()) return false; if (!DropKeywordSearchTermsTable()) return false; if (!InitKeywordSearchTermsTable()) return false; if (!DropSegmentTables()) return false; if (!InitSegmentTables()) return false; // We also add the supplementary URL indices at this point. This index is // over parts of the URL table that weren't automatically created when the // temporary URL table was CreateKeywordSearchTermsIndices(); return true; } void HistoryDatabase::Vacuum() { DCHECK_EQ(0, db_.transaction_nesting()) << "Can not have a transaction when vacuuming."; ignore_result(db_.Execute("VACUUM")); } void HistoryDatabase::TrimMemory(bool aggressively) { db_.TrimMemory(aggressively); } bool HistoryDatabase::Raze() { return db_.Raze(); } bool HistoryDatabase::SetSegmentID(VisitID visit_id, SegmentID segment_id) { sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE, "UPDATE visits SET segment_id = ? WHERE id = ?")); s.BindInt64(0, segment_id); s.BindInt64(1, visit_id); DCHECK(db_.GetLastChangeCount() == 1); return s.Run(); } SegmentID HistoryDatabase::GetSegmentID(VisitID visit_id) { sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE, "SELECT segment_id FROM visits WHERE id = ?")); s.BindInt64(0, visit_id); if (s.Step()) { if (s.ColumnType(0) == sql::COLUMN_TYPE_NULL) return 0; else return s.ColumnInt64(0); } return 0; } base::Time HistoryDatabase::GetEarlyExpirationThreshold() { if (!cached_early_expiration_threshold_.is_null()) return cached_early_expiration_threshold_; int64 threshold; if (!meta_table_.GetValue(kEarlyExpirationThresholdKey, &threshold)) { // Set to a very early non-zero time, so it's before all history, but not // zero to avoid re-retrieval. threshold = 1L; } cached_early_expiration_threshold_ = base::Time::FromInternalValue(threshold); return cached_early_expiration_threshold_; } void HistoryDatabase::UpdateEarlyExpirationThreshold(base::Time threshold) { meta_table_.SetValue(kEarlyExpirationThresholdKey, threshold.ToInternalValue()); cached_early_expiration_threshold_ = threshold; } sql::Connection& HistoryDatabase::GetDB() { return db_; } // Migration ------------------------------------------------------------------- sql::InitStatus HistoryDatabase::EnsureCurrentVersion() { // We can't read databases newer than we were designed for. if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) { LOG(WARNING) << "History database is too new."; return sql::INIT_TOO_NEW; } // NOTICE: If you are changing structures for things shared with the archived // history file like URLs, visits, or downloads, that will need migration as // well. Instead of putting such migration code in this class, it should be // in the corresponding file (url_database.cc, etc.) and called from here and // from the archived_database.cc. int cur_version = meta_table_.GetVersionNumber(); // Put migration code here if (cur_version == 15) { if (!db_.Execute("DROP TABLE starred") || !DropStarredIDFromURLs()) { LOG(WARNING) << "Unable to update history database to version 16."; return sql::INIT_FAILURE; } ++cur_version; meta_table_.SetVersionNumber(cur_version); meta_table_.SetCompatibleVersionNumber( std::min(cur_version, kCompatibleVersionNumber)); } if (cur_version == 16) { #if !defined(OS_WIN) // In this version we bring the time format on Mac & Linux in sync with the // Windows version so that profiles can be moved between computers. MigrateTimeEpoch(); #endif // On all platforms we bump the version number, so on Windows this // migration is a NOP. We keep the compatible version at 16 since things // will basically still work, just history will be in the future if an // old version reads it. ++cur_version; meta_table_.SetVersionNumber(cur_version); } if (cur_version == 17) { // Version 17 was for thumbnails to top sites migration. We ended up // disabling it though, so 17->18 does nothing. ++cur_version; meta_table_.SetVersionNumber(cur_version); } if (cur_version == 18) { // This is the version prior to adding url_source column. We need to // migrate the database. cur_version = 19; meta_table_.SetVersionNumber(cur_version); } if (cur_version == 19) { cur_version++; meta_table_.SetVersionNumber(cur_version); // This was the thumbnail migration. Obsolete. } if (cur_version == 20) { // This is the version prior to adding the visit_duration field in visits // database. We need to migrate the database. if (!MigrateVisitsWithoutDuration()) { LOG(WARNING) << "Unable to update history database to version 21."; return sql::INIT_FAILURE; } ++cur_version; meta_table_.SetVersionNumber(cur_version); } if (cur_version == 21) { // The android_urls table's data schemal was changed in version 21. #if defined(OS_ANDROID) if (!MigrateToVersion22()) { LOG(WARNING) << "Unable to migrate the android_urls table to version 22"; } #endif ++cur_version; meta_table_.SetVersionNumber(cur_version); } if (cur_version == 22) { if (!MigrateDownloadsState()) { LOG(WARNING) << "Unable to fix invalid downloads state values"; // Invalid state values may cause crashes. return sql::INIT_FAILURE; } cur_version++; meta_table_.SetVersionNumber(cur_version); } if (cur_version == 23) { if (!MigrateDownloadsReasonPathsAndDangerType()) { LOG(WARNING) << "Unable to upgrade download interrupt reason and paths"; // Invalid state values may cause crashes. return sql::INIT_FAILURE; } cur_version++; meta_table_.SetVersionNumber(cur_version); } if (cur_version == 24) { if (!MigratePresentationIndex()) { LOG(WARNING) << "Unable to migrate history to version 25"; return sql::INIT_FAILURE; } cur_version++; meta_table_.SetVersionNumber(cur_version); } if (cur_version == 25) { if (!MigrateReferrer()) { LOG(WARNING) << "Unable to migrate history to version 26"; return sql::INIT_FAILURE; } cur_version++; meta_table_.SetVersionNumber(cur_version); } if (cur_version == 26) { if (!MigrateDownloadedByExtension()) { LOG(WARNING) << "Unable to migrate history to version 27"; return sql::INIT_FAILURE; } cur_version++; meta_table_.SetVersionNumber(cur_version); } if (cur_version == 27) { if (!MigrateDownloadValidators()) { LOG(WARNING) << "Unable to migrate history to version 28"; return sql::INIT_FAILURE; } cur_version++; meta_table_.SetVersionNumber(cur_version); } // When the version is too old, we just try to continue anyway, there should // not be a released product that makes a database too old for us to handle. LOG_IF(WARNING, cur_version < GetCurrentVersion()) << "History database version " << cur_version << " is too old to handle."; return sql::INIT_OK; } #if !defined(OS_WIN) void HistoryDatabase::MigrateTimeEpoch() { // Update all the times in the URLs and visits table in the main database. ignore_result(db_.Execute( "UPDATE urls " "SET last_visit_time = last_visit_time + 11644473600000000 " "WHERE id IN (SELECT id FROM urls WHERE last_visit_time > 0);")); ignore_result(db_.Execute( "UPDATE visits " "SET visit_time = visit_time + 11644473600000000 " "WHERE id IN (SELECT id FROM visits WHERE visit_time > 0);")); ignore_result(db_.Execute( "UPDATE segment_usage " "SET time_slot = time_slot + 11644473600000000 " "WHERE id IN (SELECT id FROM segment_usage WHERE time_slot > 0);")); // Erase all the full text index files. These will take a while to update and // are less important, so we just blow them away. Same with the archived // database. needs_version_17_migration_ = true; } #endif } // namespace history