summaryrefslogtreecommitdiffstats
path: root/chrome/browser/history/visitsegment_database.cc
diff options
context:
space:
mode:
authorinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-26 23:55:29 +0000
committerinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-26 23:55:29 +0000
commit09911bf300f1a419907a9412154760efd0b7abc3 (patch)
treef131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/browser/history/visitsegment_database.cc
parent586acc5fe142f498261f52c66862fa417c3d52d2 (diff)
downloadchromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip
chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz
chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2
Add chrome to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/history/visitsegment_database.cc')
-rw-r--r--chrome/browser/history/visitsegment_database.cc413
1 files changed, 413 insertions, 0 deletions
diff --git a/chrome/browser/history/visitsegment_database.cc b/chrome/browser/history/visitsegment_database.cc
new file mode 100644
index 0000000..423b3a5
--- /dev/null
+++ b/chrome/browser/history/visitsegment_database.cc
@@ -0,0 +1,413 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "chrome/browser/history/visitsegment_database.h"
+
+#include "base/logging.h"
+#include "base/string_util.h"
+#include "chrome/browser/history/page_usage_data.h"
+#include "chrome/common/sqlite_compiled_statement.h"
+#include "chrome/common/sqlite_utils.h"
+
+// The following tables are used to store url segment information.
+//
+// segments
+// id Primary key
+// name A unique string to represent that segment. (URL derived)
+// url_id ID of the url currently used to represent this segment.
+// pres_index index used to store a fixed presentation position.
+//
+// segment_usage
+// id Primary key
+// segment_id Corresponding segment id
+// time_slot time stamp identifying for what day this entry is about
+// visit_count Number of visit in the segment
+//
+
+namespace history {
+
+VisitSegmentDatabase::VisitSegmentDatabase() {
+}
+
+VisitSegmentDatabase::~VisitSegmentDatabase() {
+}
+
+bool VisitSegmentDatabase::InitSegmentTables() {
+ // Segments table.
+ if (!DoesSqliteTableExist(GetDB(), "segments")) {
+ if (sqlite3_exec(GetDB(), "CREATE TABLE segments ("
+ "id INTEGER PRIMARY KEY,"
+ "name VARCHAR,"
+ "url_id INTEGER NON NULL,"
+ "pres_index INTEGER DEFAULT -1 NOT NULL)",
+ NULL, NULL, NULL) != SQLITE_OK) {
+ NOTREACHED();
+ return false;
+ }
+
+ if (sqlite3_exec(GetDB(), "CREATE INDEX segments_name ON segments(name)",
+ NULL, NULL, NULL) != SQLITE_OK) {
+ NOTREACHED();
+ return false;
+ }
+ }
+
+ // This was added later, so we need to try to create it even if the table
+ // already exists.
+ sqlite3_exec(GetDB(), "CREATE INDEX segments_url_id ON segments(url_id)",
+ NULL, NULL, NULL);
+
+ // Segment usage table.
+ if (!DoesSqliteTableExist(GetDB(), "segment_usage")) {
+ if (sqlite3_exec(GetDB(), "CREATE TABLE segment_usage ("
+ "id INTEGER PRIMARY KEY,"
+ "segment_id INTEGER NOT NULL,"
+ "time_slot INTEGER NOT NULL,"
+ "visit_count INTEGER DEFAULT 0 NOT NULL)",
+ NULL, NULL, NULL) != SQLITE_OK) {
+ NOTREACHED();
+ return false;
+ }
+ if (sqlite3_exec(GetDB(),
+ "CREATE INDEX segment_usage_time_slot_segment_id ON "
+ "segment_usage(time_slot, segment_id)",
+ NULL, NULL, NULL) != SQLITE_OK) {
+ NOTREACHED();
+ return false;
+ }
+ }
+
+ // Added in a later version, so we always need to try to creat this index.
+ sqlite3_exec(GetDB(), "CREATE INDEX segments_usage_seg_id "
+ "ON segment_usage(segment_id)",
+ NULL, NULL, NULL);
+
+ // Presentation index table.
+ //
+ // Important note:
+ // Right now, this table is only used to store the presentation index.
+ // If you need to add more columns, keep in mind that rows are currently
+ // deleted when the presentation index is changed to -1.
+ // See SetPagePresentationIndex() in this file
+ if (!DoesSqliteTableExist(GetDB(), "presentation")) {
+ if (sqlite3_exec(GetDB(), "CREATE TABLE presentation("
+ "url_id INTEGER PRIMARY KEY,"
+ "pres_index INTEGER NOT NULL)",
+ NULL, NULL, NULL) != SQLITE_OK)
+ return false;
+ }
+ return true;
+}
+
+bool VisitSegmentDatabase::DropSegmentTables() {
+ // Dropping the tables will implicitly delete the indices.
+ return
+ sqlite3_exec(GetDB(), "DROP TABLE segments", NULL, NULL, NULL) ==
+ SQLITE_OK &&
+ sqlite3_exec(GetDB(), "DROP TABLE segment_usage", NULL, NULL, NULL) ==
+ SQLITE_OK;
+}
+
+// Note: the segment name is derived from the URL but is not a URL. It is
+// a string that can be easily recreated from various URLS. Maybe this should
+// be an MD5 to limit the length.
+//
+// static
+std::string VisitSegmentDatabase::ComputeSegmentName(const GURL& url) {
+ // TODO(brettw) this should probably use the registry controlled
+ // domains service.
+ GURL::Replacements r;
+ const char kWWWDot[] = "www.";
+ const int kWWWDotLen = arraysize(kWWWDot) - 1;
+
+ std::string host = url.host();
+ const char* host_c = host.c_str();
+ // Remove www. to avoid some dups.
+ if (static_cast<int>(host.size()) > kWWWDotLen &&
+ LowerCaseEqualsASCII(host_c, host_c + kWWWDotLen, kWWWDot)) {
+ r.SetHost(host.c_str(),
+ url_parse::Component(kWWWDotLen,
+ static_cast<int>(host.size()) - kWWWDotLen));
+ }
+ // Remove other stuff we don't want.
+ r.ClearUsername();
+ r.ClearPassword();
+ r.ClearQuery();
+ r.ClearRef();
+ r.ClearPort();
+
+ return url.ReplaceComponents(r).spec();
+}
+
+SegmentID VisitSegmentDatabase::GetSegmentNamed(
+ const std::string& segment_name) {
+ SQLITE_UNIQUE_STATEMENT(statement, GetStatementCache(),
+ "SELECT id FROM segments WHERE name = ?");
+ if (!statement.is_valid())
+ return 0;
+
+ statement->bind_string(0, segment_name);
+ if (statement->step() == SQLITE_ROW)
+ return statement->column_int64(0);
+ return 0;
+}
+
+bool VisitSegmentDatabase::UpdateSegmentRepresentationURL(SegmentID segment_id,
+ URLID url_id) {
+ SQLITE_UNIQUE_STATEMENT(statement, GetStatementCache(),
+ "UPDATE segments SET url_id = ? WHERE id = ?");
+ if (!statement.is_valid())
+ return false;
+
+ statement->bind_int64(0, url_id);
+ statement->bind_int64(1, segment_id);
+ return statement->step() == SQLITE_DONE;
+}
+
+URLID VisitSegmentDatabase::GetSegmentRepresentationURL(SegmentID segment_id) {
+ SQLITE_UNIQUE_STATEMENT(statement, GetStatementCache(),
+ "SELECT url_id FROM segments WHERE id = ?");
+ if (!statement.is_valid())
+ return 0;
+
+ statement->bind_int64(0, segment_id);
+ if (statement->step() == SQLITE_ROW)
+ return statement->column_int64(0);
+ return 0;
+}
+
+SegmentID VisitSegmentDatabase::CreateSegment(URLID url_id,
+ const std::string& segment_name) {
+ SQLITE_UNIQUE_STATEMENT(statement, GetStatementCache(),
+ "INSERT INTO segments (name, url_id) VALUES (?,?)");
+ if (!statement.is_valid())
+ return false;
+
+ statement->bind_string(0, segment_name);
+ statement->bind_int64(1, url_id);
+ if (statement->step() == SQLITE_DONE)
+ return sqlite3_last_insert_rowid(GetDB());
+ return false;
+}
+
+bool VisitSegmentDatabase::IncreaseSegmentVisitCount(SegmentID segment_id,
+ const Time& ts,
+ int amount) {
+ Time t = ts.LocalMidnight();
+
+ SQLITE_UNIQUE_STATEMENT(select, GetStatementCache(),
+ "SELECT id, visit_count FROM segment_usage "
+ "WHERE time_slot = ? AND segment_id = ?");
+ if (!select.is_valid())
+ return false;
+
+ select->bind_int64(0, t.ToInternalValue());
+ select->bind_int64(1, segment_id);
+ if (select->step() == SQLITE_ROW) {
+ SQLITE_UNIQUE_STATEMENT(update, GetStatementCache(),
+ "UPDATE segment_usage SET visit_count = ? WHERE id = ?");
+ if (!update.is_valid())
+ return false;
+
+ update->bind_int64(0, select->column_int64(1) + static_cast<int64>(amount));
+ update->bind_int64(1, select->column_int64(0));
+ return update->step() == SQLITE_DONE;
+
+ } else {
+ SQLITE_UNIQUE_STATEMENT(insert, GetStatementCache(),
+ "INSERT INTO segment_usage "
+ "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)");
+ if (!insert.is_valid())
+ return false;
+
+ insert->bind_int64(0, segment_id);
+ insert->bind_int64(1, t.ToInternalValue());
+ insert->bind_int64(2, static_cast<int64>(amount));
+ return insert->step() == SQLITE_DONE;
+ }
+}
+
+void VisitSegmentDatabase::QuerySegmentUsage(
+ const Time& from_time,
+ std::vector<PageUsageData*>* results) {
+ // This function gathers the highest-ranked segments in two queries.
+ // The first gathers scores for all segments.
+ // The second gathers segment data (url, title, etc.) for the highest-ranked
+ // segments.
+ // TODO(evanm): this disregards the "presentation index", which was what was
+ // used to lock results into position. But the rest of our code currently
+ // does as well.
+
+ // How many results we return, as promised in the header file.
+ const int kResultCount = 9;
+
+ // Gather all the segment scores:
+ SQLITE_UNIQUE_STATEMENT(statement, GetStatementCache(),
+ "SELECT segment_id, time_slot, visit_count "
+ "FROM segment_usage WHERE time_slot >= ? "
+ "ORDER BY segment_id");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ Time ts = from_time.LocalMidnight();
+ statement->bind_int64(0, ts.ToInternalValue());
+
+ const Time now = Time::Now();
+ SegmentID last_segment_id = 0;
+ PageUsageData* pud = NULL;
+ float score = 0;
+ while (statement->step() == SQLITE_ROW) {
+ SegmentID segment_id = statement->column_int64(0);
+ if (segment_id != last_segment_id) {
+ if (last_segment_id != 0) {
+ pud->SetScore(score);
+ results->push_back(pud);
+ }
+
+ pud = new PageUsageData(segment_id);
+ score = 0;
+ last_segment_id = segment_id;
+ }
+
+ const Time timeslot = Time::FromInternalValue(statement->column_int64(1));
+ const int visit_count = statement->column_int(2);
+ int days_ago = (now - timeslot).InDays();
+
+ // Score for this day in isolation.
+ float day_visits_score = 1.0f + log(static_cast<float>(visit_count));
+ // Recent visits count more than historical ones, so we multiply in a boost
+ // related to how long ago this day was.
+ // This boost is a curve that smoothly goes through these values:
+ // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x
+ // at the limit of how far we reach into the past.
+ float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f)));
+ score += recency_boost * day_visits_score;
+ }
+
+ if (last_segment_id != 0) {
+ pud->SetScore(score);
+ results->push_back(pud);
+ }
+
+ // Limit to the top kResultCount results.
+ sort(results->begin(), results->end(), PageUsageData::Predicate);
+ if (results->size() > kResultCount)
+ results->resize(kResultCount);
+
+ // Now fetch the details about the entries we care about.
+ SQLITE_UNIQUE_STATEMENT(statement2, GetStatementCache(),
+ "SELECT urls.url, urls.title FROM urls "
+ "JOIN segments ON segments.url_id = urls.id "
+ "WHERE segments.id = ?");
+ if (!statement2.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+ for (size_t i = 0; i < results->size(); ++i) {
+ PageUsageData* pud = (*results)[i];
+ statement2->bind_int64(0, pud->GetID());
+ if (statement2->step() == SQLITE_ROW) {
+ std::string url;
+ std::wstring title;
+ statement2->column_string(0, &url);
+ statement2->column_string16(1, &title);
+ pud->SetURL(GURL(url));
+ pud->SetTitle(title);
+ }
+ statement2->reset();
+ }
+}
+
+void VisitSegmentDatabase::DeleteSegmentData(const Time& older_than) {
+ SQLITE_UNIQUE_STATEMENT(statement, GetStatementCache(),
+ "DELETE FROM segment_usage WHERE time_slot < ?");
+ if (!statement.is_valid())
+ return;
+
+ statement->bind_int64(0, older_than.LocalMidnight().ToInternalValue());
+ if (statement->step() != SQLITE_DONE)
+ NOTREACHED();
+}
+
+void VisitSegmentDatabase::SetSegmentPresentationIndex(SegmentID segment_id,
+ int index) {
+ SQLITE_UNIQUE_STATEMENT(statement, GetStatementCache(),
+ "UPDATE segments SET pres_index = ? WHERE id = ?");
+ if (!statement.is_valid())
+ return;
+
+ statement->bind_int(0, index);
+ statement->bind_int64(1, segment_id);
+ if (statement->step() != SQLITE_DONE)
+ NOTREACHED();
+}
+
+bool VisitSegmentDatabase::DeleteSegmentForURL(URLID url_id) {
+ SQLITE_UNIQUE_STATEMENT(select, GetStatementCache(),
+ "SELECT id FROM segments WHERE url_id = ?");
+ if (!select.is_valid())
+ return false;
+
+ SQLITE_UNIQUE_STATEMENT(delete_seg, GetStatementCache(),
+ "DELETE FROM segments WHERE id = ?");
+ if (!delete_seg.is_valid())
+ return false;
+
+ SQLITE_UNIQUE_STATEMENT(delete_usage, GetStatementCache(),
+ "DELETE FROM segment_usage WHERE segment_id = ?");
+ if (!delete_usage.is_valid())
+ return false;
+
+ bool r = true;
+ select->bind_int64(0, url_id);
+ // In theory there could not be more than one segment using that URL but we
+ // loop anyway to cleanup any inconsistency.
+ while (select->step() == SQLITE_ROW) {
+ SegmentID segment_id = select->column_int64(0);
+
+ delete_usage->bind_int64(0, segment_id);
+ if (delete_usage->step() != SQLITE_DONE) {
+ NOTREACHED();
+ r = false;
+ }
+
+ delete_seg->bind_int64(0, segment_id);
+ if (delete_seg->step() != SQLITE_DONE) {
+ NOTREACHED();
+ r = false;
+ }
+ delete_usage->reset();
+ delete_seg->reset();
+ }
+ return r;
+}
+
+} // namespace history