summaryrefslogtreecommitdiffstats
path: root/chrome/browser/history/text_database_manager.cc
diff options
context:
space:
mode:
authorinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-26 23:55:29 +0000
committerinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-26 23:55:29 +0000
commit09911bf300f1a419907a9412154760efd0b7abc3 (patch)
treef131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/browser/history/text_database_manager.cc
parent586acc5fe142f498261f52c66862fa417c3d52d2 (diff)
downloadchromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip
chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz
chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2
Add chrome to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/history/text_database_manager.cc')
-rw-r--r--chrome/browser/history/text_database_manager.cc510
1 files changed, 510 insertions, 0 deletions
diff --git a/chrome/browser/history/text_database_manager.cc b/chrome/browser/history/text_database_manager.cc
new file mode 100644
index 0000000..e9588f6
--- /dev/null
+++ b/chrome/browser/history/text_database_manager.cc
@@ -0,0 +1,510 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "chrome/browser/history/text_database_manager.h"
+
+#include "base/file_util.h"
+#include "base/histogram.h"
+#include "base/logging.h"
+#include "base/message_loop.h"
+#include "base/string_util.h"
+#include "chrome/common/mru_cache.h"
+
+namespace history {
+
+namespace {
+
+// The number of database files we will be attached to at once.
+const int kCacheDBSize = 5;
+
+std::string ConvertStringForIndexer(
+ const std::wstring& input) {
+ // TODO(evanm): other transformations here?
+ return WideToUTF8(CollapseWhitespace(input, false));
+}
+
+// Data older than this will be committed to the full text index even if we
+// haven't gotten a title and/or body.
+const int kExpirationSec = 20;
+
+} // namespace
+
+// TextDatabaseManager::PageInfo -----------------------------------------------
+
+TextDatabaseManager::PageInfo::PageInfo(URLID url_id,
+ VisitID visit_id,
+ Time visit_time)
+ : url_id_(url_id),
+ visit_id_(visit_id),
+ visit_time_(visit_time) {
+ added_time_ = TimeTicks::Now();
+}
+
+void TextDatabaseManager::PageInfo::set_title(const std::wstring& ttl) {
+ if (ttl.empty()) // Make the title nonempty when we set it for EverybodySet.
+ title_ = L" ";
+ else
+ title_ = ttl;
+}
+
+void TextDatabaseManager::PageInfo::set_body(const std::wstring& bdy) {
+ if (bdy.empty()) // Make the body nonempty when we set it for EverybodySet.
+ body_ = L" ";
+ else
+ body_ = bdy;
+}
+
+bool TextDatabaseManager::PageInfo::Expired(TimeTicks now) const {
+ return now - added_time_ > TimeDelta::FromSeconds(kExpirationSec);
+}
+
+// TextDatabaseManager ---------------------------------------------------------
+
+TextDatabaseManager::TextDatabaseManager(const std::wstring& dir,
+ VisitDatabase* visit_database)
+ : dir_(dir),
+ db_(NULL),
+ visit_database_(visit_database),
+ recent_changes_(RecentChangeList::NO_AUTO_EVICT),
+ transaction_nesting_(0),
+ db_cache_(DBCache::NO_AUTO_EVICT),
+ present_databases_loaded_(false),
+#pragma warning(suppress: 4355) // Okay to pass "this" here.
+ factory_(this) {
+}
+
+TextDatabaseManager::~TextDatabaseManager() {
+ if (transaction_nesting_)
+ CommitTransaction();
+}
+
+// static
+TextDatabase::DBIdent TextDatabaseManager::TimeToID(Time time) {
+ Time::Exploded exploded;
+ time.UTCExplode(&exploded);
+
+ // We combine the month and year into a 6-digit number (200801 for
+ // January, 2008). The month is 1-based.
+ return exploded.year * 100 + exploded.month;
+}
+
+// static
+Time TextDatabaseManager::IDToTime(TextDatabase::DBIdent id) {
+ Time::Exploded exploded;
+ memset(&exploded, 0, sizeof(Time::Exploded));
+ exploded.year = id / 100;
+ exploded.month = id % 100;
+ return Time::FromUTCExploded(exploded);
+}
+
+bool TextDatabaseManager::Init() {
+ // Start checking recent changes and committing them.
+ ScheduleFlushOldChanges();
+ return true;
+}
+
+void TextDatabaseManager::BeginTransaction() {
+ transaction_nesting_++;
+}
+
+void TextDatabaseManager::CommitTransaction() {
+ DCHECK(transaction_nesting_);
+ transaction_nesting_--;
+ if (transaction_nesting_)
+ return; // Still more nesting of transactions before committing.
+
+ // Commit all databases with open transactions on them.
+ for (DBIdentSet::const_iterator i = open_transactions_.begin();
+ i != open_transactions_.end(); ++i) {
+ DBCache::iterator iter = db_cache_.Get(*i);
+ if (iter == db_cache_.end()) {
+ NOTREACHED() << "All open transactions should be cached.";
+ continue;
+ }
+ iter->second->CommitTransaction();
+ }
+ open_transactions_.clear();
+
+ // Now that the transaction is over, we can expire old connections.
+ db_cache_.ShrinkToSize(kCacheDBSize);
+}
+
+void TextDatabaseManager::InitDBList() {
+ if (present_databases_loaded_)
+ return;
+
+ present_databases_loaded_ = true;
+
+ // Find files on disk matching our pattern so we can quickly test for them.
+ file_util::FileEnumerator enumerator(dir_, false,
+ file_util::FileEnumerator::FILES,
+ std::wstring(TextDatabase::file_base()) + L"*");
+ std::wstring cur_file;
+ while (!(cur_file = enumerator.Next()).empty()) {
+ // Convert to the number representing this file.
+ TextDatabase::DBIdent id = TextDatabase::FileNameToID(cur_file);
+ if (id) // Will be 0 on error.
+ present_databases_.insert(id);
+ }
+}
+
+void TextDatabaseManager::AddPageURL(const GURL& url,
+ URLID url_id,
+ VisitID visit_id,
+ Time time) {
+ // Delete any existing page info.
+ RecentChangeList::iterator found = recent_changes_.Peek(url);
+ if (found != recent_changes_.end())
+ recent_changes_.Erase(found);
+
+ // Just save this info for later. We will save it when it expires or when all
+ // the data is complete.
+ recent_changes_.Put(url, PageInfo(url_id, visit_id, time));
+}
+
+void TextDatabaseManager::AddPageTitle(const GURL& url,
+ const std::wstring& title) {
+ RecentChangeList::iterator found = recent_changes_.Peek(url);
+ if (found == recent_changes_.end())
+ return; // We don't know about this page, give up.
+
+ PageInfo& info = found->second;
+ if (info.has_body()) {
+ // This info is complete, write to the database.
+ AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(),
+ title, info.body());
+ recent_changes_.Erase(found);
+ return;
+ }
+
+ info.set_title(title);
+}
+
+void TextDatabaseManager::AddPageContents(const GURL& url,
+ const std::wstring& body) {
+ RecentChangeList::iterator found = recent_changes_.Peek(url);
+ if (found == recent_changes_.end())
+ return; // We don't know about this page, give up.
+
+ PageInfo& info = found->second;
+ if (info.has_title()) {
+ // This info is complete, write to the database.
+ AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(),
+ info.title(), body);
+ recent_changes_.Erase(found);
+ return;
+ }
+
+ info.set_body(body);
+}
+
+bool TextDatabaseManager::AddPageData(const GURL& url,
+ URLID url_id,
+ VisitID visit_id,
+ Time visit_time,
+ const std::wstring& title,
+ const std::wstring& body) {
+ TextDatabase* db = GetDBForTime(visit_time, true);
+ if (!db)
+ return false;
+
+ TimeTicks beginning_time = TimeTicks::Now();
+
+ // First delete any recently-indexed data for this page. This will delete
+ // anything in the main database, but we don't bother looking through the
+ // archived database.
+ VisitVector visits;
+ visit_database_->GetVisitsForURL(url_id, &visits);
+ size_t our_visit_row_index = visits.size();
+ for (size_t i = 0; i < visits.size(); i++) {
+ // While we're going trough all the visits, also find our row so we can
+ // avoid another DB query.
+ if (visits[i].visit_id == visit_id) {
+ our_visit_row_index = i;
+ } else if (visits[i].is_indexed) {
+ visits[i].is_indexed = false;
+ visit_database_->UpdateVisitRow(visits[i]);
+ DeletePageData(visits[i].visit_time, url, NULL);
+ }
+ }
+
+ if (visit_id) {
+ // We're supposed to update the visit database.
+ if (our_visit_row_index >= visits.size()) {
+ NOTREACHED() << "We should always have found a visit when given an ID.";
+ return false;
+ }
+
+ DCHECK(visit_time == visits[our_visit_row_index].visit_time);
+
+ // Update the visit database to reference our addition.
+ visits[our_visit_row_index].is_indexed = true;
+ if (!visit_database_->UpdateVisitRow(visits[our_visit_row_index]))
+ return false;
+ }
+
+ // Now index the data.
+ std::string url_str = URLDatabase::GURLToDatabaseURL(url);
+ bool success = db->AddPageData(visit_time, url_str,
+ ConvertStringForIndexer(title),
+ ConvertStringForIndexer(body));
+
+ HISTOGRAM_TIMES(L"History.AddFTSData",
+ TimeTicks::Now() - beginning_time);
+ return success;
+}
+
+void TextDatabaseManager::DeletePageData(Time time, const GURL& url,
+ ChangeSet* change_set) {
+ TextDatabase::DBIdent db_ident = TimeToID(time);
+
+ // We want to open the database for writing, but only if it exists. To
+ // achieve this, we check whether it exists by saying we're not going to
+ // write to it (avoiding the autocreation code normally called when writing)
+ // and then access it for writing only if it succeeds.
+ TextDatabase* db = GetDB(db_ident, false);
+ if (!db)
+ return;
+ db = GetDB(db_ident, true);
+
+ if (change_set)
+ change_set->Add(db_ident);
+
+ db->DeletePageData(time, URLDatabase::GURLToDatabaseURL(url));
+}
+
+void TextDatabaseManager::DeleteFromUncommitted(Time begin, Time end) {
+ // First find the beginning of the range to delete. Recall that the list
+ // has the most recent item at the beginning. There won't normally be very
+ // many items, so a brute-force search is fine.
+ RecentChangeList::iterator cur = recent_changes_.begin();
+ if (!end.is_null()) {
+ // Walk from the beginning of the list backwards in time to find the newest
+ // entry that should be deleted.
+ while (cur != recent_changes_.end() && cur->second.visit_time() >= end)
+ ++cur;
+ }
+
+ // Now delete all visits up to the oldest one we were supposed to delete.
+ // Note that if begin is_null, it will be less than or equal to any other
+ // time.
+ while (cur != recent_changes_.end() && cur->second.visit_time() >= begin)
+ cur = recent_changes_.Erase(cur);
+}
+
+void TextDatabaseManager::DeleteURLFromUncommitted(const GURL& url) {
+ RecentChangeList::iterator found = recent_changes_.Peek(url);
+ if (found == recent_changes_.end())
+ return; // We don't know about this page, give up.
+ recent_changes_.Erase(found);
+}
+
+void TextDatabaseManager::DeleteAll() {
+ DCHECK(transaction_nesting_ == 0) << "Calling deleteAll in a transaction.";
+
+ InitDBList();
+
+ // Close all open databases.
+ db_cache_.ShrinkToSize(0);
+
+ // Now go through and delete all the files.
+ for (DBIdentSet::iterator i = present_databases_.begin();
+ i != present_databases_.end(); ++i) {
+ std::wstring file_name(dir_);
+ file_util::AppendToPath(&file_name, TextDatabase::IDToFileName(*i));
+ file_util::Delete(file_name, false);
+ }
+}
+
+void TextDatabaseManager::OptimizeChangedDatabases(
+ const ChangeSet& change_set) {
+ for (ChangeSet::DBSet::const_iterator i =
+ change_set.changed_databases_.begin();
+ i != change_set.changed_databases_.end(); ++i) {
+ // We want to open the database for writing, but only if it exists. To
+ // achieve this, we check whether it exists by saying we're not going to
+ // write to it (avoiding the autocreation code normally called when writing)
+ // and then access it for writing only if it succeeds.
+ TextDatabase* db = GetDB(*i, false);
+ if (!db)
+ continue;
+ db = GetDB(*i, true);
+ if (!db)
+ continue; // The file may have changed or something.
+ db->Optimize();
+ }
+}
+
+void TextDatabaseManager::GetTextMatches(
+ const std::wstring& query,
+ const QueryOptions& options,
+ std::vector<TextDatabase::Match>* results,
+ Time* first_time_searched) {
+ results->clear();
+
+ InitDBList();
+ if (present_databases_.empty()) {
+ // Nothing to search.
+ *first_time_searched = options.begin_time;
+ return;
+ }
+
+ // Get the query into the proper format for the individual DBs.
+ std::wstring fts_query_wide;
+ query_parser_.ParseQuery(query, &fts_query_wide);
+ std::string fts_query = WideToUTF8(fts_query_wide);
+
+ // Need a copy of the options so we can modify the max count for each call
+ // to the individual databases.
+ QueryOptions cur_options(options);
+
+ // Compute the minimum and maximum values for the identifiers that could
+ // encompass the input time range.
+ TextDatabase::DBIdent min_ident = options.begin_time.is_null() ?
+ *present_databases_.begin() :
+ TimeToID(options.begin_time);
+ TextDatabase::DBIdent max_ident = options.end_time.is_null() ?
+ *present_databases_.rbegin() :
+ TimeToID(options.end_time);
+
+ // Iterate over the databases from the most recent backwards.
+ bool checked_one = false;
+ TextDatabase::URLSet found_urls;
+ for (DBIdentSet::reverse_iterator i = present_databases_.rbegin();
+ i != present_databases_.rend();
+ ++i) {
+ // TODO(brettw) allow canceling the query in the middle.
+ // if (canceled_or_something)
+ // break;
+
+ // This code is stupid, we just loop until we find the correct starting
+ // time range rather than search in an intelligent way. Users will have a
+ // few dozen files at most, so this should not be an issue.
+ if (*i > max_ident)
+ continue; // Haven't gotten to the time range yet.
+ if (*i < min_ident)
+ break; // Covered all the time range.
+
+ TextDatabase* cur_db = GetDB(*i, false);
+ if (!cur_db)
+ continue;
+
+ // Adjust the max count according to how many results we've already got.
+ if (options.max_count) {
+ cur_options.max_count = options.max_count -
+ static_cast<int>(results->size());
+ }
+
+ // Since we are going backwards in time, it is always OK to pass the
+ // current first_time_searched, since it will always be smaller than
+ // any previous set.
+ cur_db->GetTextMatches(fts_query, cur_options,
+ results, &found_urls, first_time_searched);
+ checked_one = true;
+
+ DCHECK(options.max_count == 0 ||
+ static_cast<int>(results->size()) <= options.max_count);
+ if (options.max_count &&
+ static_cast<int>(results->size()) >= options.max_count)
+ break; // Got the max number of results.
+ }
+
+ // When there were no databases in the range, we need to fix up the min time.
+ if (!checked_one)
+ *first_time_searched = options.begin_time;
+}
+
+TextDatabase* TextDatabaseManager::GetDB(TextDatabase::DBIdent id,
+ bool for_writing) {
+ DBCache::iterator found_db = db_cache_.Get(id);
+ if (found_db != db_cache_.end()) {
+ if (transaction_nesting_ && for_writing &&
+ open_transactions_.find(id) == open_transactions_.end()) {
+ // If we currently have an open transaction, that database is not yet
+ // part of the transaction, and the database will be written to, it needs
+ // to be part of our transaction.
+ found_db->second->BeginTransaction();
+ open_transactions_.insert(id);
+ }
+ return found_db->second;
+ }
+
+ // Need to make the database.
+ TextDatabase* new_db = new TextDatabase(dir_, id, for_writing);
+ if (!new_db->Init()) {
+ delete new_db;
+ return NULL;
+ }
+ db_cache_.Put(id, new_db);
+ present_databases_.insert(id);
+
+ if (transaction_nesting_ && for_writing) {
+ // If we currently have an open transaction and the new database will be
+ // written to, it needs to be part of our transaction.
+ new_db->BeginTransaction();
+ open_transactions_.insert(id);
+ }
+
+ // When no transaction is open, allow this new one to kick out an old one.
+ if (!transaction_nesting_)
+ db_cache_.ShrinkToSize(kCacheDBSize);
+
+ return new_db;
+}
+
+TextDatabase* TextDatabaseManager::GetDBForTime(Time time,
+ bool create_if_necessary) {
+ return GetDB(TimeToID(time), create_if_necessary);
+}
+
+void TextDatabaseManager::ScheduleFlushOldChanges() {
+ factory_.RevokeAll();
+ MessageLoop::current()->PostDelayedTask(FROM_HERE, factory_.NewRunnableMethod(
+ &TextDatabaseManager::FlushOldChanges),
+ kExpirationSec * Time::kMillisecondsPerSecond);
+}
+
+void TextDatabaseManager::FlushOldChanges() {
+ FlushOldChangesForTime(TimeTicks::Now());
+}
+
+void TextDatabaseManager::FlushOldChangesForTime(TimeTicks now) {
+ // The end of the list is the oldest, so we just start from there committing
+ // things until we get something too new.
+ RecentChangeList::reverse_iterator i = recent_changes_.rbegin();
+ while (i != recent_changes_.rend() && i->second.Expired(now)) {
+ AddPageData(i->first, i->second.url_id(), i->second.visit_id(),
+ i->second.visit_time(), i->second.title(), i->second.body());
+ i = recent_changes_.Erase(i);
+ }
+
+ ScheduleFlushOldChanges();
+}
+
+} // namespace history