diff options
author | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-26 23:55:29 +0000 |
---|---|---|
committer | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-26 23:55:29 +0000 |
commit | 09911bf300f1a419907a9412154760efd0b7abc3 (patch) | |
tree | f131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/browser/history/text_database_manager.cc | |
parent | 586acc5fe142f498261f52c66862fa417c3d52d2 (diff) | |
download | chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2 |
Add chrome to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/history/text_database_manager.cc')
-rw-r--r-- | chrome/browser/history/text_database_manager.cc | 510 |
1 files changed, 510 insertions, 0 deletions
diff --git a/chrome/browser/history/text_database_manager.cc b/chrome/browser/history/text_database_manager.cc new file mode 100644 index 0000000..e9588f6 --- /dev/null +++ b/chrome/browser/history/text_database_manager.cc @@ -0,0 +1,510 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "chrome/browser/history/text_database_manager.h" + +#include "base/file_util.h" +#include "base/histogram.h" +#include "base/logging.h" +#include "base/message_loop.h" +#include "base/string_util.h" +#include "chrome/common/mru_cache.h" + +namespace history { + +namespace { + +// The number of database files we will be attached to at once. +const int kCacheDBSize = 5; + +std::string ConvertStringForIndexer( + const std::wstring& input) { + // TODO(evanm): other transformations here? + return WideToUTF8(CollapseWhitespace(input, false)); +} + +// Data older than this will be committed to the full text index even if we +// haven't gotten a title and/or body. +const int kExpirationSec = 20; + +} // namespace + +// TextDatabaseManager::PageInfo ----------------------------------------------- + +TextDatabaseManager::PageInfo::PageInfo(URLID url_id, + VisitID visit_id, + Time visit_time) + : url_id_(url_id), + visit_id_(visit_id), + visit_time_(visit_time) { + added_time_ = TimeTicks::Now(); +} + +void TextDatabaseManager::PageInfo::set_title(const std::wstring& ttl) { + if (ttl.empty()) // Make the title nonempty when we set it for EverybodySet. + title_ = L" "; + else + title_ = ttl; +} + +void TextDatabaseManager::PageInfo::set_body(const std::wstring& bdy) { + if (bdy.empty()) // Make the body nonempty when we set it for EverybodySet. + body_ = L" "; + else + body_ = bdy; +} + +bool TextDatabaseManager::PageInfo::Expired(TimeTicks now) const { + return now - added_time_ > TimeDelta::FromSeconds(kExpirationSec); +} + +// TextDatabaseManager --------------------------------------------------------- + +TextDatabaseManager::TextDatabaseManager(const std::wstring& dir, + VisitDatabase* visit_database) + : dir_(dir), + db_(NULL), + visit_database_(visit_database), + recent_changes_(RecentChangeList::NO_AUTO_EVICT), + transaction_nesting_(0), + db_cache_(DBCache::NO_AUTO_EVICT), + present_databases_loaded_(false), +#pragma warning(suppress: 4355) // Okay to pass "this" here. + factory_(this) { +} + +TextDatabaseManager::~TextDatabaseManager() { + if (transaction_nesting_) + CommitTransaction(); +} + +// static +TextDatabase::DBIdent TextDatabaseManager::TimeToID(Time time) { + Time::Exploded exploded; + time.UTCExplode(&exploded); + + // We combine the month and year into a 6-digit number (200801 for + // January, 2008). The month is 1-based. + return exploded.year * 100 + exploded.month; +} + +// static +Time TextDatabaseManager::IDToTime(TextDatabase::DBIdent id) { + Time::Exploded exploded; + memset(&exploded, 0, sizeof(Time::Exploded)); + exploded.year = id / 100; + exploded.month = id % 100; + return Time::FromUTCExploded(exploded); +} + +bool TextDatabaseManager::Init() { + // Start checking recent changes and committing them. + ScheduleFlushOldChanges(); + return true; +} + +void TextDatabaseManager::BeginTransaction() { + transaction_nesting_++; +} + +void TextDatabaseManager::CommitTransaction() { + DCHECK(transaction_nesting_); + transaction_nesting_--; + if (transaction_nesting_) + return; // Still more nesting of transactions before committing. + + // Commit all databases with open transactions on them. + for (DBIdentSet::const_iterator i = open_transactions_.begin(); + i != open_transactions_.end(); ++i) { + DBCache::iterator iter = db_cache_.Get(*i); + if (iter == db_cache_.end()) { + NOTREACHED() << "All open transactions should be cached."; + continue; + } + iter->second->CommitTransaction(); + } + open_transactions_.clear(); + + // Now that the transaction is over, we can expire old connections. + db_cache_.ShrinkToSize(kCacheDBSize); +} + +void TextDatabaseManager::InitDBList() { + if (present_databases_loaded_) + return; + + present_databases_loaded_ = true; + + // Find files on disk matching our pattern so we can quickly test for them. + file_util::FileEnumerator enumerator(dir_, false, + file_util::FileEnumerator::FILES, + std::wstring(TextDatabase::file_base()) + L"*"); + std::wstring cur_file; + while (!(cur_file = enumerator.Next()).empty()) { + // Convert to the number representing this file. + TextDatabase::DBIdent id = TextDatabase::FileNameToID(cur_file); + if (id) // Will be 0 on error. + present_databases_.insert(id); + } +} + +void TextDatabaseManager::AddPageURL(const GURL& url, + URLID url_id, + VisitID visit_id, + Time time) { + // Delete any existing page info. + RecentChangeList::iterator found = recent_changes_.Peek(url); + if (found != recent_changes_.end()) + recent_changes_.Erase(found); + + // Just save this info for later. We will save it when it expires or when all + // the data is complete. + recent_changes_.Put(url, PageInfo(url_id, visit_id, time)); +} + +void TextDatabaseManager::AddPageTitle(const GURL& url, + const std::wstring& title) { + RecentChangeList::iterator found = recent_changes_.Peek(url); + if (found == recent_changes_.end()) + return; // We don't know about this page, give up. + + PageInfo& info = found->second; + if (info.has_body()) { + // This info is complete, write to the database. + AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(), + title, info.body()); + recent_changes_.Erase(found); + return; + } + + info.set_title(title); +} + +void TextDatabaseManager::AddPageContents(const GURL& url, + const std::wstring& body) { + RecentChangeList::iterator found = recent_changes_.Peek(url); + if (found == recent_changes_.end()) + return; // We don't know about this page, give up. + + PageInfo& info = found->second; + if (info.has_title()) { + // This info is complete, write to the database. + AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(), + info.title(), body); + recent_changes_.Erase(found); + return; + } + + info.set_body(body); +} + +bool TextDatabaseManager::AddPageData(const GURL& url, + URLID url_id, + VisitID visit_id, + Time visit_time, + const std::wstring& title, + const std::wstring& body) { + TextDatabase* db = GetDBForTime(visit_time, true); + if (!db) + return false; + + TimeTicks beginning_time = TimeTicks::Now(); + + // First delete any recently-indexed data for this page. This will delete + // anything in the main database, but we don't bother looking through the + // archived database. + VisitVector visits; + visit_database_->GetVisitsForURL(url_id, &visits); + size_t our_visit_row_index = visits.size(); + for (size_t i = 0; i < visits.size(); i++) { + // While we're going trough all the visits, also find our row so we can + // avoid another DB query. + if (visits[i].visit_id == visit_id) { + our_visit_row_index = i; + } else if (visits[i].is_indexed) { + visits[i].is_indexed = false; + visit_database_->UpdateVisitRow(visits[i]); + DeletePageData(visits[i].visit_time, url, NULL); + } + } + + if (visit_id) { + // We're supposed to update the visit database. + if (our_visit_row_index >= visits.size()) { + NOTREACHED() << "We should always have found a visit when given an ID."; + return false; + } + + DCHECK(visit_time == visits[our_visit_row_index].visit_time); + + // Update the visit database to reference our addition. + visits[our_visit_row_index].is_indexed = true; + if (!visit_database_->UpdateVisitRow(visits[our_visit_row_index])) + return false; + } + + // Now index the data. + std::string url_str = URLDatabase::GURLToDatabaseURL(url); + bool success = db->AddPageData(visit_time, url_str, + ConvertStringForIndexer(title), + ConvertStringForIndexer(body)); + + HISTOGRAM_TIMES(L"History.AddFTSData", + TimeTicks::Now() - beginning_time); + return success; +} + +void TextDatabaseManager::DeletePageData(Time time, const GURL& url, + ChangeSet* change_set) { + TextDatabase::DBIdent db_ident = TimeToID(time); + + // We want to open the database for writing, but only if it exists. To + // achieve this, we check whether it exists by saying we're not going to + // write to it (avoiding the autocreation code normally called when writing) + // and then access it for writing only if it succeeds. + TextDatabase* db = GetDB(db_ident, false); + if (!db) + return; + db = GetDB(db_ident, true); + + if (change_set) + change_set->Add(db_ident); + + db->DeletePageData(time, URLDatabase::GURLToDatabaseURL(url)); +} + +void TextDatabaseManager::DeleteFromUncommitted(Time begin, Time end) { + // First find the beginning of the range to delete. Recall that the list + // has the most recent item at the beginning. There won't normally be very + // many items, so a brute-force search is fine. + RecentChangeList::iterator cur = recent_changes_.begin(); + if (!end.is_null()) { + // Walk from the beginning of the list backwards in time to find the newest + // entry that should be deleted. + while (cur != recent_changes_.end() && cur->second.visit_time() >= end) + ++cur; + } + + // Now delete all visits up to the oldest one we were supposed to delete. + // Note that if begin is_null, it will be less than or equal to any other + // time. + while (cur != recent_changes_.end() && cur->second.visit_time() >= begin) + cur = recent_changes_.Erase(cur); +} + +void TextDatabaseManager::DeleteURLFromUncommitted(const GURL& url) { + RecentChangeList::iterator found = recent_changes_.Peek(url); + if (found == recent_changes_.end()) + return; // We don't know about this page, give up. + recent_changes_.Erase(found); +} + +void TextDatabaseManager::DeleteAll() { + DCHECK(transaction_nesting_ == 0) << "Calling deleteAll in a transaction."; + + InitDBList(); + + // Close all open databases. + db_cache_.ShrinkToSize(0); + + // Now go through and delete all the files. + for (DBIdentSet::iterator i = present_databases_.begin(); + i != present_databases_.end(); ++i) { + std::wstring file_name(dir_); + file_util::AppendToPath(&file_name, TextDatabase::IDToFileName(*i)); + file_util::Delete(file_name, false); + } +} + +void TextDatabaseManager::OptimizeChangedDatabases( + const ChangeSet& change_set) { + for (ChangeSet::DBSet::const_iterator i = + change_set.changed_databases_.begin(); + i != change_set.changed_databases_.end(); ++i) { + // We want to open the database for writing, but only if it exists. To + // achieve this, we check whether it exists by saying we're not going to + // write to it (avoiding the autocreation code normally called when writing) + // and then access it for writing only if it succeeds. + TextDatabase* db = GetDB(*i, false); + if (!db) + continue; + db = GetDB(*i, true); + if (!db) + continue; // The file may have changed or something. + db->Optimize(); + } +} + +void TextDatabaseManager::GetTextMatches( + const std::wstring& query, + const QueryOptions& options, + std::vector<TextDatabase::Match>* results, + Time* first_time_searched) { + results->clear(); + + InitDBList(); + if (present_databases_.empty()) { + // Nothing to search. + *first_time_searched = options.begin_time; + return; + } + + // Get the query into the proper format for the individual DBs. + std::wstring fts_query_wide; + query_parser_.ParseQuery(query, &fts_query_wide); + std::string fts_query = WideToUTF8(fts_query_wide); + + // Need a copy of the options so we can modify the max count for each call + // to the individual databases. + QueryOptions cur_options(options); + + // Compute the minimum and maximum values for the identifiers that could + // encompass the input time range. + TextDatabase::DBIdent min_ident = options.begin_time.is_null() ? + *present_databases_.begin() : + TimeToID(options.begin_time); + TextDatabase::DBIdent max_ident = options.end_time.is_null() ? + *present_databases_.rbegin() : + TimeToID(options.end_time); + + // Iterate over the databases from the most recent backwards. + bool checked_one = false; + TextDatabase::URLSet found_urls; + for (DBIdentSet::reverse_iterator i = present_databases_.rbegin(); + i != present_databases_.rend(); + ++i) { + // TODO(brettw) allow canceling the query in the middle. + // if (canceled_or_something) + // break; + + // This code is stupid, we just loop until we find the correct starting + // time range rather than search in an intelligent way. Users will have a + // few dozen files at most, so this should not be an issue. + if (*i > max_ident) + continue; // Haven't gotten to the time range yet. + if (*i < min_ident) + break; // Covered all the time range. + + TextDatabase* cur_db = GetDB(*i, false); + if (!cur_db) + continue; + + // Adjust the max count according to how many results we've already got. + if (options.max_count) { + cur_options.max_count = options.max_count - + static_cast<int>(results->size()); + } + + // Since we are going backwards in time, it is always OK to pass the + // current first_time_searched, since it will always be smaller than + // any previous set. + cur_db->GetTextMatches(fts_query, cur_options, + results, &found_urls, first_time_searched); + checked_one = true; + + DCHECK(options.max_count == 0 || + static_cast<int>(results->size()) <= options.max_count); + if (options.max_count && + static_cast<int>(results->size()) >= options.max_count) + break; // Got the max number of results. + } + + // When there were no databases in the range, we need to fix up the min time. + if (!checked_one) + *first_time_searched = options.begin_time; +} + +TextDatabase* TextDatabaseManager::GetDB(TextDatabase::DBIdent id, + bool for_writing) { + DBCache::iterator found_db = db_cache_.Get(id); + if (found_db != db_cache_.end()) { + if (transaction_nesting_ && for_writing && + open_transactions_.find(id) == open_transactions_.end()) { + // If we currently have an open transaction, that database is not yet + // part of the transaction, and the database will be written to, it needs + // to be part of our transaction. + found_db->second->BeginTransaction(); + open_transactions_.insert(id); + } + return found_db->second; + } + + // Need to make the database. + TextDatabase* new_db = new TextDatabase(dir_, id, for_writing); + if (!new_db->Init()) { + delete new_db; + return NULL; + } + db_cache_.Put(id, new_db); + present_databases_.insert(id); + + if (transaction_nesting_ && for_writing) { + // If we currently have an open transaction and the new database will be + // written to, it needs to be part of our transaction. + new_db->BeginTransaction(); + open_transactions_.insert(id); + } + + // When no transaction is open, allow this new one to kick out an old one. + if (!transaction_nesting_) + db_cache_.ShrinkToSize(kCacheDBSize); + + return new_db; +} + +TextDatabase* TextDatabaseManager::GetDBForTime(Time time, + bool create_if_necessary) { + return GetDB(TimeToID(time), create_if_necessary); +} + +void TextDatabaseManager::ScheduleFlushOldChanges() { + factory_.RevokeAll(); + MessageLoop::current()->PostDelayedTask(FROM_HERE, factory_.NewRunnableMethod( + &TextDatabaseManager::FlushOldChanges), + kExpirationSec * Time::kMillisecondsPerSecond); +} + +void TextDatabaseManager::FlushOldChanges() { + FlushOldChangesForTime(TimeTicks::Now()); +} + +void TextDatabaseManager::FlushOldChangesForTime(TimeTicks now) { + // The end of the list is the oldest, so we just start from there committing + // things until we get something too new. + RecentChangeList::reverse_iterator i = recent_changes_.rbegin(); + while (i != recent_changes_.rend() && i->second.Expired(now)) { + AddPageData(i->first, i->second.url_id(), i->second.visit_id(), + i->second.visit_time(), i->second.title(), i->second.body()); + i = recent_changes_.Erase(i); + } + + ScheduleFlushOldChanges(); +} + +} // namespace history |