Add chrome to the repository.

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15 0039d316-1c4b-4281-b951-d872f2087c98
author: initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> 2008-07-26 23:55:29 +0000
committer: initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> 2008-07-26 23:55:29 +0000
commit: 09911bf300f1a419907a9412154760efd0b7abc3 (patch)
tree: f131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/browser/history/text_database.cc
parent: 586acc5fe142f498261f52c66862fa417c3d52d2 (diff)
download: chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip
chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz
chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2
1 files changed, 412 insertions, 0 deletions
diff --git a/chrome/browser/history/text_database.cc b/chrome/browser/history/text_database.cc
new file mode 100644
index 0000000..0c1242f
--- /dev/null
+++ b/chrome/browser/history/text_database.cc
@@ -0,0 +1,412 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <limits>
+#include <set>
+
+#include "chrome/browser/history/text_database.h"
+
+#include "base/file_util.h"
+#include "base/logging.h"
+#include "base/string_util.h"
+#include "chrome/common/sqlite_utils.h"
+
+// There are two tables in each database, one full-text search (FTS) table which
+// indexes the contents and title of the pages. The other is a regular SQLITE
+// table which contains non-indexed information about the page. All columns of
+// a FTS table are indexed using the text search algorithm, which isn't what we
+// want for things like times. If this were in the FTS table, there would be
+// different words in the index for each time number.
+//
+// "pages" FTS table:
+//   url    URL of the page so searches will match the URL.
+//   title  Title of the page.
+//   body   Body of the page.
+//
+// "info" regular table:
+//   time     Time the corresponding FTS entry was visited.
+//
+// We do joins across these two tables by using their internal rowids, which we
+// keep in sync between the two tables. The internal rowid is the only part of
+// an FTS table that is indexed like a normal table, and the index over it is
+// free since sqlite always indexes the internal rowid.
+
+namespace history {
+
+namespace {
+
+const int kCurrentVersionNumber = 1;
+
+// Snippet computation relies on the index of the columns in the original
+// create statement. These are the 0-based indices (as strings) of the
+// corresponding columns.
+const char kTitleColumnIndex[] = "1";
+const char kBodyColumnIndex[] = "2";
+
+// The string prepended to the database identifier to generate the filename.
+const wchar_t kFilePrefix[] = L"History Index ";
+const size_t kFilePrefixLen = arraysize(kFilePrefix) - 1;  // Don't count NULL.
+
+// We do not allow rollback, but this simple scoper makes it easy to always
+// remember to commit a begun transaction. This protects against some errors
+// caused by a crash in the middle of a transaction, although doesn't give us
+// the full protection of a transaction's rollback abilities.
+class ScopedTransactionCommitter {
+ public:
+   ScopedTransactionCommitter(TextDatabase* db) : db_(db) {
+    db_->BeginTransaction();
+  }
+  ~ScopedTransactionCommitter() {
+    db_->CommitTransaction();
+  }
+ private:
+  TextDatabase* db_;
+};
+
+
+}  // namespace
+
+TextDatabase::TextDatabase(const std::wstring& path,
+                           DBIdent id,
+                           bool allow_create)
+    : db_(NULL),
+      statement_cache_(NULL),
+      path_(path),
+      ident_(id),
+      allow_create_(allow_create),
+      transaction_nesting_(0) {
+  // Compute the file name.
+  file_name_ = path_;
+  file_util::AppendToPath(&file_name_, IDToFileName(ident_));
+}
+
+TextDatabase::~TextDatabase() {
+  if (statement_cache_) {
+    // Must release these statements before closing the DB.
+    delete statement_cache_;
+    statement_cache_ = NULL;
+  }
+  if (db_) {
+    sqlite3_close(db_);
+    db_ = NULL;
+  }
+}
+
+// static
+const wchar_t* TextDatabase::file_base() {
+  return kFilePrefix;
+}
+
+// static
+std::wstring TextDatabase::IDToFileName(DBIdent id) {
+  // Identifiers are intended to be a combination of the year and month, for
+  // example, 200801 for January 2008. We convert this to
+  // "History Index 2008-01". However, we don't make assumptions about this
+  // scheme: the caller should assign IDs as it feels fit with the knowledge
+  // that they will apppear on disk in this form.
+  return StringPrintf(L"%s%d-%02d", file_base(), id / 100, id % 100);
+}
+
+// static
+TextDatabase::DBIdent TextDatabase::FileNameToID(const std::wstring& file_path){
+  std::wstring file_name = file_util::GetFilenameFromPath(file_path);
+
+  // We don't actually check the prefix here. Since the file system could
+  // be case insensitive in ways we can't predict (NTFS), checking could
+  // potentially be the wrong thing to do. Instead, we just look for a suffix.
+  static const int kIDStringLength = 7;  // Room for "xxxx-xx".
+  if (file_name.length() < kIDStringLength)
+    return 0;
+  const wchar_t* number_begin =
+      &file_name[file_name.length() - kIDStringLength];
+
+  int year, month;
+  if (swscanf_s(number_begin, L"%d-%d", &year, &month) != 2)
+    return 0;  // Unable to get both numbers.
+
+  return year * 100 + month;
+}
+
+bool TextDatabase::Init() {
+  // Make sure, if we're not allowed to create the file, that it exists.
+  if (!allow_create_) {
+    if (!file_util::PathExists(file_name_))
+      return false;
+  }
+
+  // Attach the database to our index file.
+  if (sqlite3_open(WideToUTF8(file_name_).c_str(), &db_) != SQLITE_OK)
+    return false;
+  statement_cache_ = new SqliteStatementCache(db_);
+
+  // Set the database page size to something a little larger to give us
+  // better performance (we're typically seek rather than bandwidth limited).
+  // This only has an effect before any tables have been created, otherwise
+  // this is a NOP. Must be a power of 2 and a max of 8192.
+  sqlite3_exec(db_, "PRAGMA page_size=4096", NULL, NULL, NULL);
+
+  // The default cache size is 2000 which give >8MB of data. Since we will often
+  // have 2-3 of these objects, each with their own 8MB, this adds up very fast.
+  // We therefore reduce the size so when there are multiple objects, we're not
+  // too big.
+  sqlite3_exec(db_, "PRAGMA cache_size=512", NULL, NULL, NULL);
+
+  // Run the database in exclusive mode. Nobody else should be accessing the
+  // database while we're running, and this will give somewhat improved perf.
+  sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL);
+
+  // Meta table tracking version information.
+  if (!meta_table_.Init(std::string(), kCurrentVersionNumber, db_))
+    return false;
+  if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
+    // This version is too new. We don't bother notifying the user on this
+    // error, and just fail to use the file. Normally if they have version skew,
+    // they will get it for the main history file and it won't be necessary
+    // here. If that's not the case, since this is only indexed data, it's
+    // probably better to just not give FTS results than strange errors when
+    // everything else is working OK.
+    return false;
+  }
+
+  return CreateTables();
+}
+
+void TextDatabase::BeginTransaction() {
+  if (!transaction_nesting_)
+    sqlite3_exec(db_, "BEGIN TRANSACTION", NULL, NULL, NULL);
+  transaction_nesting_++;
+}
+
+void TextDatabase::CommitTransaction() {
+  DCHECK(transaction_nesting_);
+  transaction_nesting_--;
+  if (!transaction_nesting_)
+    sqlite3_exec(db_, "COMMIT", NULL, NULL, NULL);
+}
+
+bool TextDatabase::CreateTables() {
+  // FTS table of page contents.
+  if (!DoesSqliteTableExist(db_, "pages")) {
+    if (sqlite3_exec(db_,
+                     "CREATE VIRTUAL TABLE pages USING fts2("
+                     "TOKENIZE icu,"
+                     "url LONGVARCHAR,"
+                     "title LONGVARCHAR,"
+                     "body LONGVARCHAR)", NULL, NULL, NULL) != SQLITE_OK)
+      return false;
+  }
+
+  // Non-FTS table containing URLs and times so we can efficiently find them
+  // using a regular index (all FTS columns are special and are treated as
+  // full-text-search, which is not what we want when retrieving this data).
+  if (!DoesSqliteTableExist(db_, "info")) {
+    // Note that there is no point in creating an index over time. Since
+    // we must always query the entire FTS table (it can not efficiently do
+    // subsets), we will always end up doing that first, and joining the info
+    // table off of that.
+    if (sqlite3_exec(db_, "CREATE TABLE info(time INTEGER NOT NULL)",
+                     NULL, NULL, NULL) != SQLITE_OK)
+      return false;
+  }
+
+  // Create the index. This will fail when the index already exists, so we just
+  // ignore the error.
+  sqlite3_exec(db_, "CREATE INDEX info_time ON info(time)", NULL, NULL, NULL);
+  return true;
+}
+
+bool TextDatabase::AddPageData(Time time,
+                               const std::string& url,
+                               const std::string& title,
+                               const std::string& contents) {
+  ScopedTransactionCommitter committer(this);
+
+  // Add to the pages table.
+  SQLITE_UNIQUE_STATEMENT(add_to_pages, *statement_cache_,
+      "INSERT INTO pages(url,title,body)VALUES(?,?,?)");
+  if (!add_to_pages.is_valid())
+    return false;
+  add_to_pages->bind_string(0, url);
+  add_to_pages->bind_string(1, title);
+  add_to_pages->bind_string(2, contents);
+  if (add_to_pages->step() != SQLITE_DONE) {
+    NOTREACHED() << sqlite3_errmsg(db_);
+    return false;
+  }
+
+  int64 rowid = sqlite3_last_insert_rowid(db_);
+
+  // Add to the info table with the same rowid.
+  SQLITE_UNIQUE_STATEMENT(add_to_info, *statement_cache_,
+      "INSERT INTO info(rowid,time) VALUES(?,?)");
+  if (!add_to_info.is_valid())
+    return false;
+  add_to_info->bind_int64(0, rowid);
+  add_to_info->bind_int64(1, time.ToInternalValue());
+  if (add_to_info->step() != SQLITE_DONE) {
+    NOTREACHED() << sqlite3_errmsg(db_);
+    return false;
+  }
+
+  return true;
+}
+
+void TextDatabase::DeletePageData(Time time, const std::string& url) {
+  // First get all rows that match. Selecing on time (which has an index) allows
+  // us to avoid brute-force searches on the full-text-index table (there will
+  // generally be only one match per time).
+  SQLITE_UNIQUE_STATEMENT(select_ids, *statement_cache_,
+      "SELECT info.rowid "
+      "FROM info JOIN pages ON info.rowid = pages.rowid "
+      "WHERE info.time=? AND pages.url=?");
+  if (!select_ids.is_valid())
+    return;
+  select_ids->bind_int64(0, time.ToInternalValue());
+  select_ids->bind_string(1, url);
+  std::set<int64> rows_to_delete;
+  while (select_ids->step() == SQLITE_ROW)
+    rows_to_delete.insert(select_ids->column_int64(0));
+
+  // Delete from the pages table.
+  SQLITE_UNIQUE_STATEMENT(delete_page, *statement_cache_,
+      "DELETE FROM pages WHERE rowid=?");
+  if (!delete_page.is_valid())
+    return;
+  for (std::set<int64>::const_iterator i = rows_to_delete.begin();
+       i != rows_to_delete.end(); ++i) {
+    delete_page->bind_int64(0, *i);
+    delete_page->step();
+    delete_page->reset();
+  }
+
+  // Delete from the info table.
+  SQLITE_UNIQUE_STATEMENT(delete_info, *statement_cache_,
+      "DELETE FROM info WHERE rowid=?");
+  if (!delete_info.is_valid())
+    return;
+  for (std::set<int64>::const_iterator i = rows_to_delete.begin();
+       i != rows_to_delete.end(); ++i) {
+    delete_info->bind_int64(0, *i);
+    delete_info->step();
+    delete_info->reset();
+  }
+}
+
+void TextDatabase::Optimize() {
+  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+      "SELECT OPTIMIZE(pages) FROM pages LIMIT 1");
+  if (!statement.is_valid())
+    return;
+  statement->step();
+}
+
+void TextDatabase::GetTextMatches(const std::string& query,
+                                  const QueryOptions& options,
+                                  std::vector<Match>* results,
+                                  URLSet* found_urls,
+                                  Time* first_time_searched) {
+  *first_time_searched = options.begin_time;
+
+  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+    "SELECT url, title, time, offsets(pages), body "
+        "FROM pages LEFT OUTER JOIN info ON pages.rowid = info.rowid "
+        "WHERE pages MATCH ? AND time >= ? AND time < ? "
+        "ORDER BY time DESC "
+        "LIMIT ?");
+  if (!statement.is_valid())
+    return;
+
+  // When their values indicate "unspecified", saturate the numbers to the max
+  // or min to get the correct result.
+  int64 effective_begin_time = options.begin_time.is_null() ?
+      0 : options.begin_time.ToInternalValue();
+  int64 effective_end_time = options.end_time.is_null() ?
+      std::numeric_limits<int64>::max() : options.end_time.ToInternalValue();
+  int effective_max_count = options.max_count ?
+      options.max_count : std::numeric_limits<int>::max();
+
+  statement->bind_string(0, query);
+  statement->bind_int64(1, effective_begin_time);
+  statement->bind_int64(2, effective_end_time);
+  statement->bind_int(3, effective_max_count);
+
+  while (statement->step() == SQLITE_ROW) {
+    // TODO(brettw) allow canceling the query in the middle.
+    // if (canceled_or_something)
+    //   break;
+
+    GURL url(statement->column_string(0));
+    if (options.most_recent_visit_only) {
+      URLSet::const_iterator found_url = found_urls->find(url);
+      if (found_url != found_urls->end())
+        continue;  // Don't add this duplicate when unique URLs are requested.
+    }
+
+    // Fill the results into the vector (avoid copying the URL with Swap()).
+    results->resize(results->size() + 1);
+    Match& match = results->at(results->size() - 1);
+    match.url.Swap(&url);
+
+    match.title = statement->column_string16(1);
+    match.time = Time::FromInternalValue(statement->column_int64(2));
+
+    // Extract any matches in the title.
+    std::string offsets_str = statement->column_string(3);
+    Snippet::ExtractMatchPositions(offsets_str, kTitleColumnIndex,
+                                   &match.title_match_positions);
+    Snippet::ConvertMatchPositionsToWide(statement->column_string(1),
+                                         &match.title_match_positions);
+
+    // Extract the matches in the body.
+    Snippet::MatchPositions match_positions;
+    Snippet::ExtractMatchPositions(offsets_str, kBodyColumnIndex,
+                                   &match_positions);
+
+    // Compute the snippet based on those matches.
+    std::string body = statement->column_string(4);
+    match.snippet.ComputeSnippet(match_positions, body);
+  }
+
+  // When we have returned all the results possible (or determined that there
+  // are none), then we have searched all the time requested, so we can
+  // set the first_time_searched to that value.
+  if (results->size() == 0 ||
+      options.max_count == 0 ||  // Special case for wanting all the results.
+      static_cast<int>(results->size()) < options.max_count) {
+    *first_time_searched = options.begin_time;
+  } else {
+    // Since we got the results in order, we know the last item is the last
+    // time we considered.
+    *first_time_searched = results->back().time;
+  }
+
+  statement->reset();
+}
+
+}  // namespace history
author	initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>	2008-07-26 23:55:29 +0000
committer	initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>	2008-07-26 23:55:29 +0000
commit	09911bf300f1a419907a9412154760efd0b7abc3 (patch)
tree	f131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/browser/history/text_database.cc
parent	586acc5fe142f498261f52c66862fa417c3d52d2 (diff)
download	chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2