From eb278881097c0365c0e265392ff50f03c6292862 Mon Sep 17 00:00:00 2001 From: sdefresne Date: Tue, 30 Dec 2014 11:23:49 -0800 Subject: Partial componentization of //chrome/browser/history Move thumbnail_database.{cc,h} to //components/history/core/browser as they don't have dependency on //chrome or //content and add a new depenency on //third_party/sqlite from the history component. Move url_utils.{cc,h}, visit_filter.{cc,h}, visit_tracker.{cc,h}, visitsegment_database.{cc,h}, in_memory_url_index_types.{cc,h} and top_sites_cache.{cc,h} to //components/history/core/browser as they are leaf files that don't have dependency on //chrome or //content. Move visit_database.{cc,h} to //components/history/core/browser after fixing the #include to use "url/url_constants.h" to get the constants defined in the url namespace. Move in_memory_url_index_cache.proto since it has no dependencies and add the gyp target history_core_browser_proto and gn target //components/history/core/browser:proto. All files were moved using tools/git/move_source_files.py and then the build system files (gyp and gn) were manually editted. Remove obsolete #includes and fix missing #includes. BUG=390953, 370850 TBR=sky@chromium.org Review URL: https://codereview.chromium.org/831643003 Cr-Commit-Position: refs/heads/master@{#309765} --- chrome/browser/BUILD.gn | 2 +- .../android/provider/chrome_browser_provider.cc | 1 + chrome/browser/autocomplete/history_provider.cc | 2 +- chrome/browser/autocomplete/history_provider.h | 2 +- .../browser/autocomplete/history_quick_provider.cc | 2 +- .../browser/autocomplete/history_url_provider.cc | 2 +- chrome/browser/chromeos/BUILD.gn | 2 +- chrome/browser/extensions/BUILD.gn | 2 +- chrome/browser/history/BUILD.gn | 12 - .../history/android/android_provider_backend.cc | 2 +- .../browser/history/android/favicon_sql_handler.cc | 2 +- chrome/browser/history/expire_history_backend.cc | 2 +- .../history/expire_history_backend_unittest.cc | 3 +- chrome/browser/history/history_backend.cc | 2 +- chrome/browser/history/history_backend.h | 4 +- chrome/browser/history/history_backend_unittest.cc | 2 +- chrome/browser/history/history_database.h | 4 +- chrome/browser/history/history_service.cc | 4 +- chrome/browser/history/in_memory_url_index.cc | 2 - chrome/browser/history/in_memory_url_index.h | 2 +- .../history/in_memory_url_index_cache.proto | 103 -- .../browser/history/in_memory_url_index_types.cc | 163 --- chrome/browser/history/in_memory_url_index_types.h | 179 --- .../history/in_memory_url_index_types_unittest.cc | 150 --- .../history/in_memory_url_index_unittest.cc | 2 +- chrome/browser/history/scored_history_match.h | 2 +- chrome/browser/history/thumbnail_database.cc | 1322 -------------------- chrome/browser/history/thumbnail_database.h | 278 ---- .../browser/history/thumbnail_database_unittest.cc | 2 +- chrome/browser/history/top_sites.h | 4 +- chrome/browser/history/top_sites_cache.cc | 182 --- chrome/browser/history/top_sites_cache.h | 170 --- chrome/browser/history/top_sites_cache_unittest.cc | 258 ---- chrome/browser/history/top_sites_impl.cc | 4 +- chrome/browser/history/top_sites_impl_unittest.cc | 2 +- chrome/browser/history/url_index_private_data.h | 4 +- chrome/browser/history/url_utils.cc | 88 -- chrome/browser/history/url_utils.h | 46 - chrome/browser/history/url_utils_unittest.cc | 131 -- chrome/browser/history/visit_database.cc | 625 --------- chrome/browser/history/visit_database.h | 233 ---- chrome/browser/history/visit_database_unittest.cc | 419 ------- chrome/browser/history/visit_filter.cc | 358 ------ chrome/browser/history/visit_filter.h | 165 --- chrome/browser/history/visit_filter_unittest.cc | 314 ----- chrome/browser/history/visit_tracker.cc | 106 -- chrome/browser/history/visit_tracker.h | 66 - chrome/browser/history/visit_tracker_unittest.cc | 129 -- chrome/browser/history/visitsegment_database.cc | 327 ----- chrome/browser/history/visitsegment_database.h | 92 -- .../browser/prerender/prerender_local_predictor.h | 2 +- chrome/browser/ui/BUILD.gn | 2 +- .../ui/webui/ntp/suggestions_source_top_sites.cc | 3 +- .../ui/webui/ntp/suggestions_source_top_sites.h | 2 +- chrome/chrome_browser.gypi | 30 - chrome/chrome_browser_chromeos.gypi | 2 +- chrome/chrome_browser_extensions.gypi | 2 +- chrome/chrome_browser_ui.gypi | 2 +- chrome/chrome_tests_unit.gypi | 6 - components/components_tests.gyp | 6 + components/history.gypi | 33 + components/history/DEPS | 1 + components/history/core/browser/BUILD.gn | 26 + .../core/browser/in_memory_url_index_cache.proto | 103 ++ .../core/browser/in_memory_url_index_types.cc | 163 +++ .../core/browser/in_memory_url_index_types.h | 179 +++ .../browser/in_memory_url_index_types_unittest.cc | 151 +++ .../history/core/browser/thumbnail_database.cc | 1322 ++++++++++++++++++++ .../history/core/browser/thumbnail_database.h | 278 ++++ components/history/core/browser/top_sites_cache.cc | 182 +++ components/history/core/browser/top_sites_cache.h | 170 +++ .../core/browser/top_sites_cache_unittest.cc | 258 ++++ components/history/core/browser/url_utils.cc | 88 ++ components/history/core/browser/url_utils.h | 46 + .../history/core/browser/url_utils_unittest.cc | 131 ++ components/history/core/browser/visit_database.cc | 625 +++++++++ components/history/core/browser/visit_database.h | 233 ++++ .../core/browser/visit_database_unittest.cc | 419 +++++++ components/history/core/browser/visit_filter.cc | 358 ++++++ components/history/core/browser/visit_filter.h | 165 +++ .../history/core/browser/visit_filter_unittest.cc | 314 +++++ components/history/core/browser/visit_tracker.cc | 106 ++ components/history/core/browser/visit_tracker.h | 66 + .../history/core/browser/visit_tracker_unittest.cc | 130 ++ .../history/core/browser/visitsegment_database.cc | 327 +++++ .../history/core/browser/visitsegment_database.h | 92 ++ 86 files changed, 6013 insertions(+), 5990 deletions(-) delete mode 100644 chrome/browser/history/BUILD.gn delete mode 100644 chrome/browser/history/in_memory_url_index_cache.proto delete mode 100644 chrome/browser/history/in_memory_url_index_types.cc delete mode 100644 chrome/browser/history/in_memory_url_index_types.h delete mode 100644 chrome/browser/history/in_memory_url_index_types_unittest.cc delete mode 100644 chrome/browser/history/thumbnail_database.cc delete mode 100644 chrome/browser/history/thumbnail_database.h delete mode 100644 chrome/browser/history/top_sites_cache.cc delete mode 100644 chrome/browser/history/top_sites_cache.h delete mode 100644 chrome/browser/history/top_sites_cache_unittest.cc delete mode 100644 chrome/browser/history/url_utils.cc delete mode 100644 chrome/browser/history/url_utils.h delete mode 100644 chrome/browser/history/url_utils_unittest.cc delete mode 100644 chrome/browser/history/visit_database.cc delete mode 100644 chrome/browser/history/visit_database.h delete mode 100644 chrome/browser/history/visit_database_unittest.cc delete mode 100644 chrome/browser/history/visit_filter.cc delete mode 100644 chrome/browser/history/visit_filter.h delete mode 100644 chrome/browser/history/visit_filter_unittest.cc delete mode 100644 chrome/browser/history/visit_tracker.cc delete mode 100644 chrome/browser/history/visit_tracker.h delete mode 100644 chrome/browser/history/visit_tracker_unittest.cc delete mode 100644 chrome/browser/history/visitsegment_database.cc delete mode 100644 chrome/browser/history/visitsegment_database.h create mode 100644 components/history/core/browser/in_memory_url_index_cache.proto create mode 100644 components/history/core/browser/in_memory_url_index_types.cc create mode 100644 components/history/core/browser/in_memory_url_index_types.h create mode 100644 components/history/core/browser/in_memory_url_index_types_unittest.cc create mode 100644 components/history/core/browser/thumbnail_database.cc create mode 100644 components/history/core/browser/thumbnail_database.h create mode 100644 components/history/core/browser/top_sites_cache.cc create mode 100644 components/history/core/browser/top_sites_cache.h create mode 100644 components/history/core/browser/top_sites_cache_unittest.cc create mode 100644 components/history/core/browser/url_utils.cc create mode 100644 components/history/core/browser/url_utils.h create mode 100644 components/history/core/browser/url_utils_unittest.cc create mode 100644 components/history/core/browser/visit_database.cc create mode 100644 components/history/core/browser/visit_database.h create mode 100644 components/history/core/browser/visit_database_unittest.cc create mode 100644 components/history/core/browser/visit_filter.cc create mode 100644 components/history/core/browser/visit_filter.h create mode 100644 components/history/core/browser/visit_filter_unittest.cc create mode 100644 components/history/core/browser/visit_tracker.cc create mode 100644 components/history/core/browser/visit_tracker.h create mode 100644 components/history/core/browser/visit_tracker_unittest.cc create mode 100644 components/history/core/browser/visitsegment_database.cc create mode 100644 components/history/core/browser/visitsegment_database.h diff --git a/chrome/browser/BUILD.gn b/chrome/browser/BUILD.gn index 7ec6ebe..240f3cd 100644 --- a/chrome/browser/BUILD.gn +++ b/chrome/browser/BUILD.gn @@ -59,7 +59,6 @@ static_library("browser") { "//chrome/app:generated_resources_map", "//chrome/app/resources:platform_locale_settings", "//chrome/app/theme:theme_resources", - "//chrome/browser/history:in_memory_url_index_cache_proto", "//chrome/browser/net:cert_logger_proto", "//chrome/browser/net:probe_message_proto", "//chrome/browser/ui", @@ -83,6 +82,7 @@ static_library("browser") { "//components/google/core/browser", "//components/handoff", "//components/history/core/browser", + "//components/history/core/browser:proto", "//components/history/core/common", "//components/infobars/core", "//components/invalidation", diff --git a/chrome/browser/android/provider/chrome_browser_provider.cc b/chrome/browser/android/provider/chrome_browser_provider.cc index 681aa8b..f8c2147 100644 --- a/chrome/browser/android/provider/chrome_browser_provider.cc +++ b/chrome/browser/android/provider/chrome_browser_provider.cc @@ -27,6 +27,7 @@ #include "chrome/browser/favicon/favicon_service.h" #include "chrome/browser/favicon/favicon_service_factory.h" #include "chrome/browser/history/android/sqlite_cursor.h" +#include "chrome/browser/history/history_service.h" #include "chrome/browser/history/history_service_factory.h" #include "chrome/browser/history/top_sites.h" #include "chrome/browser/profiles/profile.h" diff --git a/chrome/browser/autocomplete/history_provider.cc b/chrome/browser/autocomplete/history_provider.cc index a23adad..6f9dc2e 100644 --- a/chrome/browser/autocomplete/history_provider.cc +++ b/chrome/browser/autocomplete/history_provider.cc @@ -11,10 +11,10 @@ #include "chrome/browser/bookmarks/bookmark_model_factory.h" #include "chrome/browser/history/history_service.h" #include "chrome/browser/history/history_service_factory.h" -#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/browser/profiles/profile.h" #include "chrome/common/url_constants.h" #include "components/bookmarks/browser/bookmark_model.h" +#include "components/history/core/browser/in_memory_url_index_types.h" #include "components/omnibox/autocomplete_input.h" #include "components/omnibox/autocomplete_match.h" #include "url/url_util.h" diff --git a/chrome/browser/autocomplete/history_provider.h b/chrome/browser/autocomplete/history_provider.h index 935dc5a..ed957ad 100644 --- a/chrome/browser/autocomplete/history_provider.h +++ b/chrome/browser/autocomplete/history_provider.h @@ -6,7 +6,7 @@ #define CHROME_BROWSER_AUTOCOMPLETE_HISTORY_PROVIDER_H_ #include "base/compiler_specific.h" -#include "chrome/browser/history/in_memory_url_index_types.h" +#include "components/history/core/browser/in_memory_url_index_types.h" #include "components/omnibox/autocomplete_provider.h" class AutocompleteInput; diff --git a/chrome/browser/autocomplete/history_quick_provider.cc b/chrome/browser/autocomplete/history_quick_provider.cc index 5cff89c..de1c711 100644 --- a/chrome/browser/autocomplete/history_quick_provider.cc +++ b/chrome/browser/autocomplete/history_quick_provider.cc @@ -23,13 +23,13 @@ #include "chrome/browser/history/history_service.h" #include "chrome/browser/history/history_service_factory.h" #include "chrome/browser/history/in_memory_url_index.h" -#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/browser/history/scored_history_match.h" #include "chrome/browser/profiles/profile.h" #include "chrome/browser/search_engines/template_url_service_factory.h" #include "chrome/common/chrome_switches.h" #include "chrome/common/pref_names.h" #include "chrome/common/url_constants.h" +#include "components/history/core/browser/in_memory_url_index_types.h" #include "components/metrics/proto/omnibox_input_type.pb.h" #include "components/omnibox/autocomplete_match_type.h" #include "components/omnibox/autocomplete_result.h" diff --git a/chrome/browser/autocomplete/history_url_provider.cc b/chrome/browser/autocomplete/history_url_provider.cc index 14f60a8..c87a71b 100644 --- a/chrome/browser/autocomplete/history_url_provider.cc +++ b/chrome/browser/autocomplete/history_url_provider.cc @@ -20,7 +20,6 @@ #include "chrome/browser/history/history_database.h" #include "chrome/browser/history/history_service.h" #include "chrome/browser/history/history_service_factory.h" -#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/browser/history/scored_history_match.h" #include "chrome/browser/profiles/profile.h" #include "chrome/browser/search_engines/template_url_service_factory.h" @@ -30,6 +29,7 @@ #include "chrome/common/url_constants.h" #include "components/bookmarks/browser/bookmark_utils.h" #include "components/history/core/browser/history_types.h" +#include "components/history/core/browser/in_memory_url_index_types.h" #include "components/metrics/proto/omnibox_input_type.pb.h" #include "components/omnibox/autocomplete_match.h" #include "components/omnibox/autocomplete_provider_listener.h" diff --git a/chrome/browser/chromeos/BUILD.gn b/chrome/browser/chromeos/BUILD.gn index a81bf54..4c5112e 100644 --- a/chrome/browser/chromeos/BUILD.gn +++ b/chrome/browser/chromeos/BUILD.gn @@ -48,7 +48,6 @@ source_set("chromeos") { "//components/wifi_sync", "//chrome/browser/devtools", "//chrome/browser/extensions", - "//chrome/browser/history:in_memory_url_index_cache_proto", "//chrome/browser/net:cert_logger_proto", "//chrome/browser/safe_browsing:chunk_proto", "//chrome/browser/safe_browsing:metadata_proto", @@ -63,6 +62,7 @@ source_set("chromeos") { "//chromeos:cryptohome_proto", "//chromeos:cryptohome_signkey_proto", "//chromeos:power_manager_proto", + "//components/history/core/browser:proto", "//components/onc", "//components/ownership", "//components/pairing", diff --git a/chrome/browser/extensions/BUILD.gn b/chrome/browser/extensions/BUILD.gn index 790c323..0a4584a 100644 --- a/chrome/browser/extensions/BUILD.gn +++ b/chrome/browser/extensions/BUILD.gn @@ -39,13 +39,13 @@ static_library("extensions") { "//chrome/app/resources:platform_locale_settings", "//chrome/app/theme:theme_resources", "//chrome/browser/devtools", - "//chrome/browser/history:in_memory_url_index_cache_proto", "//chrome/common", "//chrome/common/extensions/api:api_registration", "//chrome/common/net", "//chrome/common/safe_browsing:proto", "//chrome/installer/util", "//components/copresence", + "//components/history/core/browser:proto", "//components/omaha_client", "//components/onc", "//components/proximity_auth", diff --git a/chrome/browser/history/BUILD.gn b/chrome/browser/history/BUILD.gn deleted file mode 100644 index 6cb65f4..0000000 --- a/chrome/browser/history/BUILD.gn +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright 2014 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -import("//third_party/protobuf/proto_library.gni") - -# GYP version: chrome/chrome_browser.gypi:in_memory_url_index_cache_proto -proto_library("in_memory_url_index_cache_proto") { - sources = [ - "in_memory_url_index_cache.proto", - ] -} diff --git a/chrome/browser/history/android/android_provider_backend.cc b/chrome/browser/history/android/android_provider_backend.cc index dddf6ce..536754e 100644 --- a/chrome/browser/history/android/android_provider_backend.cc +++ b/chrome/browser/history/android/android_provider_backend.cc @@ -14,10 +14,10 @@ #include "chrome/browser/history/android/visit_sql_handler.h" #include "chrome/browser/history/history_backend.h" #include "chrome/browser/history/history_database.h" -#include "chrome/browser/history/thumbnail_database.h" #include "components/history/core/browser/history_backend_notifier.h" #include "components/history/core/browser/history_client.h" #include "components/history/core/browser/keyword_search_term.h" +#include "components/history/core/browser/thumbnail_database.h" #include "sql/connection.h" diff --git a/chrome/browser/history/android/favicon_sql_handler.cc b/chrome/browser/history/android/favicon_sql_handler.cc index e56d71c..d40efe9 100644 --- a/chrome/browser/history/android/favicon_sql_handler.cc +++ b/chrome/browser/history/android/favicon_sql_handler.cc @@ -7,7 +7,7 @@ #include "base/logging.h" #include "base/memory/ref_counted.h" #include "base/memory/ref_counted_memory.h" -#include "chrome/browser/history/thumbnail_database.h" +#include "components/history/core/browser/thumbnail_database.h" using base::Time; diff --git a/chrome/browser/history/expire_history_backend.cc b/chrome/browser/history/expire_history_backend.cc index f4d1094..81990f12 100644 --- a/chrome/browser/history/expire_history_backend.cc +++ b/chrome/browser/history/expire_history_backend.cc @@ -15,9 +15,9 @@ #include "base/logging.h" #include "base/message_loop/message_loop.h" #include "chrome/browser/history/history_database.h" -#include "chrome/browser/history/thumbnail_database.h" #include "components/history/core/browser/history_backend_notifier.h" #include "components/history/core/browser/history_client.h" +#include "components/history/core/browser/thumbnail_database.h" namespace history { diff --git a/chrome/browser/history/expire_history_backend_unittest.cc b/chrome/browser/history/expire_history_backend_unittest.cc index 29b9d43..2a3c8e0 100644 --- a/chrome/browser/history/expire_history_backend_unittest.cc +++ b/chrome/browser/history/expire_history_backend_unittest.cc @@ -17,11 +17,12 @@ #include "base/strings/utf_string_conversions.h" #include "chrome/browser/history/expire_history_backend.h" #include "chrome/browser/history/history_database.h" -#include "chrome/browser/history/thumbnail_database.h" +#include "chrome/browser/history/history_details.h" #include "chrome/browser/history/top_sites.h" #include "chrome/test/base/testing_profile.h" #include "chrome/tools/profiles/thumbnail-inl.h" #include "components/history/core/browser/history_backend_notifier.h" +#include "components/history/core/browser/thumbnail_database.h" #include "components/history/core/common/thumbnail_score.h" #include "components/history/core/test/history_client_fake_bookmarks.h" #include "content/public/test/test_browser_thread.h" diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc index 88024c3..5e39178 100644 --- a/chrome/browser/history/history_backend.cc +++ b/chrome/browser/history/history_backend.cc @@ -31,7 +31,6 @@ #include "chrome/browser/history/top_sites.h" #include "chrome/browser/history/typed_url_syncable_service.h" #include "chrome/browser/history/typed_url_syncable_service.h" -#include "chrome/browser/history/visit_filter.h" #include "chrome/common/chrome_constants.h" #include "chrome/common/importer/imported_favicon_usage.h" #include "chrome/common/url_constants.h" @@ -42,6 +41,7 @@ #include "components/history/core/browser/history_db_task.h" #include "components/history/core/browser/keyword_search_term.h" #include "components/history/core/browser/page_usage_data.h" +#include "components/history/core/browser/visit_filter.h" #include "net/base/registry_controlled_domains/registry_controlled_domain.h" #include "sql/error_delegate_util.h" #include "third_party/skia/include/core/SkBitmap.h" diff --git a/chrome/browser/history/history_backend.h b/chrome/browser/history/history_backend.h index d0987e5..61002a1 100644 --- a/chrome/browser/history/history_backend.h +++ b/chrome/browser/history/history_backend.h @@ -20,11 +20,11 @@ #include "base/task/cancelable_task_tracker.h" #include "chrome/browser/history/expire_history_backend.h" #include "chrome/browser/history/history_database.h" -#include "chrome/browser/history/thumbnail_database.h" -#include "chrome/browser/history/visit_tracker.h" #include "components/history/core/browser/history_backend_notifier.h" #include "components/history/core/browser/history_types.h" #include "components/history/core/browser/keyword_id.h" +#include "components/history/core/browser/thumbnail_database.h" +#include "components/history/core/browser/visit_tracker.h" #include "components/visitedlink/browser/visitedlink_delegate.h" #include "sql/init_status.h" diff --git a/chrome/browser/history/history_backend_unittest.cc b/chrome/browser/history/history_backend_unittest.cc index cbbcac4..5825415 100644 --- a/chrome/browser/history/history_backend_unittest.cc +++ b/chrome/browser/history/history_backend_unittest.cc @@ -26,7 +26,6 @@ #include "chrome/browser/history/history_service.h" #include "chrome/browser/history/history_service_factory.h" #include "chrome/browser/history/in_memory_history_backend.h" -#include "chrome/browser/history/visit_filter.h" #include "chrome/common/chrome_constants.h" #include "chrome/common/chrome_paths.h" #include "chrome/common/importer/imported_favicon_usage.h" @@ -34,6 +33,7 @@ #include "components/history/core/browser/history_constants.h" #include "components/history/core/browser/in_memory_database.h" #include "components/history/core/browser/keyword_search_term.h" +#include "components/history/core/browser/visit_filter.h" #include "components/history/core/test/history_client_fake_bookmarks.h" #include "content/public/browser/notification_details.h" #include "content/public/browser/notification_source.h" diff --git a/chrome/browser/history/history_database.h b/chrome/browser/history/history_database.h index d0d79a9..deb26e2 100644 --- a/chrome/browser/history/history_database.h +++ b/chrome/browser/history/history_database.h @@ -10,9 +10,9 @@ #include "base/gtest_prod_util.h" #include "build/build_config.h" #include "chrome/browser/history/download_database.h" -#include "chrome/browser/history/visit_database.h" -#include "chrome/browser/history/visitsegment_database.h" #include "components/history/core/browser/url_database.h" +#include "components/history/core/browser/visit_database.h" +#include "components/history/core/browser/visitsegment_database.h" #include "sql/connection.h" #include "sql/init_status.h" #include "sql/meta_table.h" diff --git a/chrome/browser/history/history_service.cc b/chrome/browser/history/history_service.cc index ea86061..a22ac4b 100644 --- a/chrome/browser/history/history_service.cc +++ b/chrome/browser/history/history_service.cc @@ -37,8 +37,6 @@ #include "chrome/browser/history/in_memory_history_backend.h" #include "chrome/browser/history/in_memory_url_index.h" #include "chrome/browser/history/top_sites.h" -#include "chrome/browser/history/visit_database.h" -#include "chrome/browser/history/visit_filter.h" #include "chrome/browser/history/web_history_service.h" #include "chrome/browser/history/web_history_service_factory.h" #include "chrome/browser/profiles/profile.h" @@ -53,6 +51,8 @@ #include "components/history/core/browser/history_types.h" #include "components/history/core/browser/in_memory_database.h" #include "components/history/core/browser/keyword_search_term.h" +#include "components/history/core/browser/visit_database.h" +#include "components/history/core/browser/visit_filter.h" #include "components/history/core/common/thumbnail_score.h" #include "components/visitedlink/browser/visitedlink_master.h" #include "content/public/browser/browser_thread.h" diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc index 74ad2a0..b14a19e 100644 --- a/chrome/browser/history/in_memory_url_index.cc +++ b/chrome/browser/history/in_memory_url_index.cc @@ -7,7 +7,6 @@ #include "base/debug/trace_event.h" #include "base/files/file_util.h" #include "base/strings/utf_string_conversions.h" -#include "chrome/browser/bookmarks/bookmark_model_factory.h" #include "chrome/browser/chrome_notification_types.h" #include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/history_service.h" @@ -15,7 +14,6 @@ #include "chrome/browser/history/url_index_private_data.h" #include "chrome/browser/profiles/profile.h" #include "chrome/common/url_constants.h" -#include "components/bookmarks/browser/bookmark_model.h" #include "components/history/core/browser/url_database.h" #include "content/public/browser/browser_thread.h" #include "content/public/browser/notification_details.h" diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h index 59dbd20..69b5b90 100644 --- a/chrome/browser/history/in_memory_url_index.h +++ b/chrome/browser/history/in_memory_url_index.h @@ -19,11 +19,11 @@ #include "base/scoped_observer.h" #include "base/strings/string16.h" #include "base/task/cancelable_task_tracker.h" -#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/browser/history/scored_history_match.h" #include "components/history/core/browser/history_db_task.h" #include "components/history/core/browser/history_service_observer.h" #include "components/history/core/browser/history_types.h" +#include "components/history/core/browser/in_memory_url_index_types.h" #include "content/public/browser/notification_observer.h" #include "content/public/browser/notification_registrar.h" #include "sql/connection.h" diff --git a/chrome/browser/history/in_memory_url_index_cache.proto b/chrome/browser/history/in_memory_url_index_cache.proto deleted file mode 100644 index df2de1b..0000000 --- a/chrome/browser/history/in_memory_url_index_cache.proto +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// -// InMemoryURLIndex caching protocol buffers. -// -// At certain times during browser operation, the indexes from the -// InMemoryURLIndex are written to a disk-based cache using the -// following protobuf description. - -syntax = "proto2"; - -option optimize_for = LITE_RUNTIME; - -package in_memory_url_index; - -message InMemoryURLIndexCacheItem { - - message WordListItem { - required uint32 word_count = 1; - repeated string word = 2; - } - - message WordMapItem { - message WordMapEntry { - required string word = 1; - required int32 word_id = 2; - } - - required uint32 item_count = 1; - repeated WordMapEntry word_map_entry = 2; - } - - message CharWordMapItem { - message CharWordMapEntry { - required uint32 item_count = 1; - required int32 char_16 = 2; - repeated int32 word_id = 3 [packed=true]; - } - - required uint32 item_count = 1; - repeated CharWordMapEntry char_word_map_entry = 2; - } - - message WordIDHistoryMapItem { - message WordIDHistoryMapEntry { - required uint32 item_count = 1; - required int32 word_id = 2; - repeated int64 history_id = 3 [packed=true]; - } - - required uint32 item_count = 1; - repeated WordIDHistoryMapEntry word_id_history_map_entry = 2; - } - - message HistoryInfoMapItem { - message HistoryInfoMapEntry { - message VisitInfo { - required int64 visit_time = 1; - // Corresponds to ui::PageTransition. - required uint64 transition_type = 2; - } - required int64 history_id = 1; - required int32 visit_count = 2; - required int32 typed_count = 3; - required int64 last_visit = 4; - required string url = 5; - optional string title = 6; - repeated VisitInfo visits = 7; - } - - required uint32 item_count = 1; - repeated HistoryInfoMapEntry history_info_map_entry = 2; - } - - message WordStartsMapItem { - message WordStartsMapEntry { - required int64 history_id = 1; - repeated int32 url_word_starts = 2 [packed=true]; - repeated int32 title_word_starts = 3 [packed=true]; - } - - required uint32 item_count = 1; - repeated WordStartsMapEntry word_starts_map_entry = 2; - } - - // The date that the cache was last rebuilt from history. Note that - // this cache may include items that were visited after this date if - // the InMemoryURLIndex was updated on the fly. This timestamp is meant - // to indicate the last date the index was rebuilt from the ground truth: - // the history database on disk. - required int64 last_rebuild_timestamp = 1; - // If there is no version we'll assume version 0. - optional int32 version = 2; - required int32 history_item_count = 3; - - optional WordListItem word_list = 4; - optional WordMapItem word_map = 5; - optional CharWordMapItem char_word_map = 6; - optional WordIDHistoryMapItem word_id_history_map = 7; - optional HistoryInfoMapItem history_info_map = 8; - optional WordStartsMapItem word_starts_map = 9; -} diff --git a/chrome/browser/history/in_memory_url_index_types.cc b/chrome/browser/history/in_memory_url_index_types.cc deleted file mode 100644 index 4cc1d16..0000000 --- a/chrome/browser/history/in_memory_url_index_types.cc +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/in_memory_url_index_types.h" - -#include -#include -#include -#include -#include - -#include "base/i18n/break_iterator.h" -#include "base/i18n/case_conversion.h" -#include "base/strings/string_util.h" -#include "net/base/escape.h" -#include "net/base/net_util.h" - -namespace history { - -// Matches within URL and Title Strings ---------------------------------------- - -TermMatches MatchTermInString(const base::string16& term, - const base::string16& cleaned_string, - int term_num) { - const size_t kMaxCompareLength = 2048; - const base::string16& short_string = - (cleaned_string.length() > kMaxCompareLength) ? - cleaned_string.substr(0, kMaxCompareLength) : cleaned_string; - TermMatches matches; - for (size_t location = short_string.find(term); - location != base::string16::npos; - location = short_string.find(term, location + 1)) - matches.push_back(TermMatch(term_num, location, term.length())); - return matches; -} - -// Comparison function for sorting TermMatches by their offsets. -bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { - return m1.offset < m2.offset; -} - -TermMatches SortAndDeoverlapMatches(const TermMatches& matches) { - if (matches.empty()) - return matches; - TermMatches sorted_matches = matches; - std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); - TermMatches clean_matches; - TermMatch last_match; - for (TermMatches::const_iterator iter = sorted_matches.begin(); - iter != sorted_matches.end(); ++iter) { - if (iter->offset >= last_match.offset + last_match.length) { - last_match = *iter; - clean_matches.push_back(last_match); - } - } - return clean_matches; -} - -std::vector OffsetsFromTermMatches(const TermMatches& matches) { - std::vector offsets; - for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); - ++i) { - offsets.push_back(i->offset); - offsets.push_back(i->offset + i->length); - } - return offsets; -} - -TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, - const std::vector& offsets) { - DCHECK_EQ(2 * matches.size(), offsets.size()); - TermMatches new_matches; - std::vector::const_iterator offset_iter = offsets.begin(); - for (TermMatches::const_iterator term_iter = matches.begin(); - term_iter != matches.end(); ++term_iter, ++offset_iter) { - const size_t starting_offset = *offset_iter; - ++offset_iter; - const size_t ending_offset = *offset_iter; - if ((starting_offset != base::string16::npos) && - (ending_offset != base::string16::npos) && - (starting_offset != ending_offset)) { - TermMatch new_match(*term_iter); - new_match.offset = starting_offset; - new_match.length = ending_offset - starting_offset; - new_matches.push_back(new_match); - } - } - return new_matches; -} - -// Utility Functions ----------------------------------------------------------- - -String16Set String16SetFromString16(const base::string16& cleaned_uni_string, - WordStarts* word_starts) { - String16Vector words = - String16VectorFromString16(cleaned_uni_string, false, word_starts); - String16Set word_set; - for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); - ++iter) - word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxSignificantChars)); - return word_set; -} - -String16Vector String16VectorFromString16( - const base::string16& cleaned_uni_string, - bool break_on_space, - WordStarts* word_starts) { - if (word_starts) - word_starts->clear(); - base::i18n::BreakIterator iter(cleaned_uni_string, - break_on_space ? base::i18n::BreakIterator::BREAK_SPACE : - base::i18n::BreakIterator::BREAK_WORD); - String16Vector words; - if (!iter.Init()) - return words; - while (iter.Advance()) { - if (break_on_space || iter.IsWord()) { - base::string16 word(iter.GetString()); - size_t initial_whitespace = 0; - if (break_on_space) { - base::string16 trimmed_word; - base::TrimWhitespace(word, base::TRIM_LEADING, &trimmed_word); - initial_whitespace = word.length() - trimmed_word.length(); - base::TrimWhitespace(trimmed_word, base::TRIM_TRAILING, &word); - } - if (word.empty()) - continue; - words.push_back(word); - if (!word_starts) - continue; - size_t word_start = iter.prev() + initial_whitespace; - if (word_start < kMaxSignificantChars) - word_starts->push_back(word_start); - } - } - return words; -} - -Char16Set Char16SetFromString16(const base::string16& term) { - Char16Set characters; - for (base::string16::const_iterator iter = term.begin(); iter != term.end(); - ++iter) - characters.insert(*iter); - return characters; -} - -// HistoryInfoMapValue --------------------------------------------------------- - -HistoryInfoMapValue::HistoryInfoMapValue() {} -HistoryInfoMapValue::~HistoryInfoMapValue() {} - -// RowWordStarts --------------------------------------------------------------- - -RowWordStarts::RowWordStarts() {} -RowWordStarts::~RowWordStarts() {} - -void RowWordStarts::Clear() { - url_word_starts_.clear(); - title_word_starts_.clear(); -} - -} // namespace history diff --git a/chrome/browser/history/in_memory_url_index_types.h b/chrome/browser/history/in_memory_url_index_types.h deleted file mode 100644 index bd23160..0000000 --- a/chrome/browser/history/in_memory_url_index_types.h +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ -#define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ - -#include -#include -#include - -#include "base/strings/string16.h" -#include "components/history/core/browser/history_types.h" -#include "url/gurl.h" - -namespace history { - -// The maximum number of characters to consider from an URL and page title -// while matching user-typed terms. -const size_t kMaxSignificantChars = 200; - -// Matches within URL and Title Strings ---------------------------------------- - -// Specifies where an omnibox term occurs within a string. Used for specifying -// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in -// scoring a result. -struct TermMatch { - TermMatch() : term_num(0), offset(0), length(0) {} - TermMatch(int term_num, size_t offset, size_t length) - : term_num(term_num), - offset(offset), - length(length) {} - - int term_num; // The index of the term in the original search string. - size_t offset; // The starting offset of the substring match. - size_t length; // The length of the substring match. -}; -typedef std::vector TermMatches; - -// Returns a TermMatches which has an entry for each occurrence of the -// string |term| found in the string |cleaned_string|. Use -// CleanUpUrlForMatching() or CleanUpUrlTitleMatching() before passing -// |cleaned_string| to this function. The function marks each match -// with |term_num| so that the resulting TermMatches can be merged -// with other TermMatches for other terms. Note that only the first -// 2,048 characters of |string| are considered during the match -// operation. -TermMatches MatchTermInString(const base::string16& term, - const base::string16& cleaned_string, - int term_num); - -// Sorts and removes overlapping substring matches from |matches| and -// returns the cleaned up matches. -TermMatches SortAndDeoverlapMatches(const TermMatches& matches); - -// Extracts and returns the offsets from |matches|. This includes both -// the offsets corresponding to the beginning of a match and the offsets -// corresponding to the end of a match (i.e., offset+length for that match). -std::vector OffsetsFromTermMatches(const TermMatches& matches); - -// Replaces the offsets and lengths in |matches| with those given in |offsets|. -// |offsets| gives beginning and ending offsets for each match; this function -// translates (beginning, ending) offset into (beginning offset, length of -// match). It deletes any matches for which an endpoint is npos and returns -// the updated list of matches. -TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, - const std::vector& offsets); - -// Convenience Types ----------------------------------------------------------- - -typedef std::vector String16Vector; -typedef std::set String16Set; -typedef std::set Char16Set; -typedef std::vector Char16Vector; - -// A vector that contains the offsets at which each word starts within a string. -typedef std::vector WordStarts; - -// Utility Functions ----------------------------------------------------------- - -// Breaks the string |cleaned_uni_string| down into individual words. -// Use CleanUpUrlForMatching() or CleanUpUrlTitleMatching() before -// passing |cleaned_uni_string| to this function. If |word_starts| is -// not NULL then clears and pushes the offsets within -// |cleaned_uni_string| at which each word starts onto -// |word_starts|. These offsets are collected only up to the first -// kMaxSignificantChars of |cleaned_uni_string|. -String16Set String16SetFromString16(const base::string16& cleaned_uni_string, - WordStarts* word_starts); - -// Breaks the |cleaned_uni_string| string down into individual words -// and return a vector with the individual words in their original -// order. Use CleanUpUrlForMatching() or CleanUpUrlTitleMatching() -// before passing |cleaned_uni_string| to this function. If -// |break_on_space| is false then the resulting list will contain only -// words containing alpha-numeric characters. If |break_on_space| is -// true then the resulting list will contain strings broken at -// whitespace. (|break_on_space| indicates that the -// BreakIterator::BREAK_SPACE (equivalent to BREAK_LINE) approach is -// to be used. For a complete description of this algorithm refer to -// the comments in base/i18n/break_iterator.h.) If |word_starts| is -// not NULL then clears and pushes the word starts onto |word_starts|. -// -// Example: -// Given: |cleaned_uni_string|: "http://www.google.com/ harry the rabbit." -// With |break_on_space| false the returned list will contain: -// "http", "www", "google", "com", "harry", "the", "rabbit" -// With |break_on_space| true the returned list will contain: -// "http://", "www.google.com/", "harry", "the", "rabbit." -String16Vector String16VectorFromString16( - const base::string16& cleaned_uni_string, - bool break_on_space, - WordStarts* word_starts); - -// Breaks the |uni_word| string down into its individual characters. -// Note that this is temporarily intended to work on a single word, but -// _will_ work on a string of words, perhaps with unexpected results. -// TODO(mrossetti): Lots of optimizations possible here for not restarting -// a search if the user is just typing along. Also, change this to uniString -// and properly handle substring matches, scoring and sorting the results -// by score. Also, provide the metrics for where the matches occur so that -// the UI can highlight the matched sections. -Char16Set Char16SetFromString16(const base::string16& uni_word); - -// Support for InMemoryURLIndex Private Data ----------------------------------- - -// An index into a list of all of the words we have indexed. -typedef size_t WordID; - -// A map allowing a WordID to be determined given a word. -typedef std::map WordMap; - -// A map from character to the word_ids of words containing that character. -typedef std::set WordIDSet; // An index into the WordList. -typedef std::map CharWordIDMap; - -// A map from word (by word_id) to history items containing that word. -typedef history::URLID HistoryID; -typedef std::set HistoryIDSet; -typedef std::vector HistoryIDVector; -typedef std::map WordIDHistoryMap; -typedef std::map HistoryIDWordMap; - - -// Information used in scoring a particular URL. -typedef std::vector VisitInfoVector; -struct HistoryInfoMapValue { - HistoryInfoMapValue(); - ~HistoryInfoMapValue(); - - // This field is always populated. - URLRow url_row; - - // This field gets filled in asynchronously after a visit. As such, - // it's almost always correct. If it's wrong, it's likely to either - // be empty (if this URL was recently added to the index) or - // slightly out-of-date (one visit behind). - VisitInfoVector visits; -}; - -// A map from history_id to the history's URL and title. -typedef std::map HistoryInfoMap; - -// A map from history_id to URL and page title word start metrics. -struct RowWordStarts { - RowWordStarts(); - ~RowWordStarts(); - - // Clears both url_word_starts_ and title_word_starts_. - void Clear(); - - WordStarts url_word_starts_; - WordStarts title_word_starts_; -}; -typedef std::map WordStartsMap; - -} // namespace history - -#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ diff --git a/chrome/browser/history/in_memory_url_index_types_unittest.cc b/chrome/browser/history/in_memory_url_index_types_unittest.cc deleted file mode 100644 index 874fc62..0000000 --- a/chrome/browser/history/in_memory_url_index_types_unittest.cc +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include - -#include "base/strings/string16.h" -#include "base/strings/utf_string_conversions.h" -#include "chrome/browser/history/in_memory_url_index_types.h" -#include "testing/gtest/include/gtest/gtest.h" - -using base::UTF8ToUTF16; - -namespace history { - -// Helper function for verifying that the contents of a C++ iterable container -// of ints matches a C array ints. -template -bool IntArraysEqual(const size_t* expected, - size_t expected_size, - const T& actual) { - if (expected_size != actual.size()) - return false; - for (size_t i = 0; i < expected_size; ++i) - if (expected[i] != actual[i]) - return false; - return true; -} - -class InMemoryURLIndexTypesTest : public testing::Test { -}; - -TEST_F(InMemoryURLIndexTypesTest, StaticFunctions) { - // Test String16VectorFromString16 - base::string16 string_a( - base::UTF8ToUTF16("http://www.google.com/ frammy the brammy")); - WordStarts actual_starts_a; - String16Vector string_vec = - String16VectorFromString16(string_a, false, &actual_starts_a); - ASSERT_EQ(7U, string_vec.size()); - // See if we got the words we expected. - EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]); - EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]); - EXPECT_EQ(UTF8ToUTF16("google"), string_vec[2]); - EXPECT_EQ(UTF8ToUTF16("com"), string_vec[3]); - EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[4]); - EXPECT_EQ(UTF8ToUTF16("the"), string_vec[5]); - EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]); - // Verify the word starts. - size_t expected_starts_a[] = {0, 7, 11, 18, 23, 31, 35}; - EXPECT_TRUE(IntArraysEqual(expected_starts_a, arraysize(expected_starts_a), - actual_starts_a)); - - WordStarts actual_starts_b; - string_vec = String16VectorFromString16(string_a, true, &actual_starts_b); - ASSERT_EQ(5U, string_vec.size()); - EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]); - EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]); - EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]); - EXPECT_EQ(UTF8ToUTF16("the"), string_vec[3]); - EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[4]); - size_t expected_starts_b[] = {0, 7, 23, 31, 35}; - EXPECT_TRUE(IntArraysEqual(expected_starts_b, arraysize(expected_starts_b), - actual_starts_b)); - - base::string16 string_c(base::ASCIIToUTF16( - " funky%20string-with=@strange sequences, intended(to exceed)")); - WordStarts actual_starts_c; - string_vec = String16VectorFromString16(string_c, false, &actual_starts_c); - ASSERT_EQ(8U, string_vec.size()); - // Note that we stop collecting words and word starts at kMaxSignificantChars. - size_t expected_starts_c[] = {1, 7, 16, 22, 32, 43, 52, 55}; - EXPECT_TRUE(IntArraysEqual(expected_starts_c, arraysize(expected_starts_c), - actual_starts_c)); - - // Test String16SetFromString16 - base::string16 string_d(base::ASCIIToUTF16( - "http://web.google.com/search Google Web Search")); - WordStarts actual_starts_d; - String16Set string_set = String16SetFromString16(string_d, &actual_starts_d); - EXPECT_EQ(5U, string_set.size()); - // See if we got the words we expected. - EXPECT_TRUE(string_set.find(UTF8ToUTF16("com")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("google")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("http")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("search")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("web")) != string_set.end()); - size_t expected_starts_d[] = {0, 7, 11, 18, 22, 29, 36, 40}; - EXPECT_TRUE(IntArraysEqual(expected_starts_d, arraysize(expected_starts_d), - actual_starts_d)); - - // Test SortAndDeoverlapMatches - TermMatches matches_e; - matches_e.push_back(TermMatch(1, 13, 10)); - matches_e.push_back(TermMatch(2, 23, 10)); - matches_e.push_back(TermMatch(3, 3, 10)); - matches_e.push_back(TermMatch(4, 40, 5)); - TermMatches matches_f = SortAndDeoverlapMatches(matches_e); - // Nothing should have been eliminated. - EXPECT_EQ(matches_e.size(), matches_f.size()); - // The order should now be 3, 1, 2, 4. - EXPECT_EQ(3, matches_f[0].term_num); - EXPECT_EQ(1, matches_f[1].term_num); - EXPECT_EQ(2, matches_f[2].term_num); - EXPECT_EQ(4, matches_f[3].term_num); - matches_e.push_back(TermMatch(5, 18, 10)); - matches_e.push_back(TermMatch(6, 38, 5)); - matches_f = SortAndDeoverlapMatches(matches_e); - // Two matches should have been eliminated. - EXPECT_EQ(matches_e.size() - 2, matches_f.size()); - // The order should now be 3, 1, 2, 6. - EXPECT_EQ(3, matches_f[0].term_num); - EXPECT_EQ(1, matches_f[1].term_num); - EXPECT_EQ(2, matches_f[2].term_num); - EXPECT_EQ(6, matches_f[3].term_num); - - // Test MatchTermInString - TermMatches matches_g = MatchTermInString( - UTF8ToUTF16("x"), UTF8ToUTF16("axbxcxdxex fxgx/hxixjx.kx"), 123); - const size_t expected_offsets[] = { 1, 3, 5, 7, 9, 12, 14, 17, 19, 21, 24 }; - ASSERT_EQ(arraysize(expected_offsets), matches_g.size()); - for (size_t i = 0; i < arraysize(expected_offsets); ++i) - EXPECT_EQ(expected_offsets[i], matches_g[i].offset); -} - -TEST_F(InMemoryURLIndexTypesTest, OffsetsAndTermMatches) { - // Test OffsetsFromTermMatches - history::TermMatches matches_a; - matches_a.push_back(history::TermMatch(1, 1, 2)); - matches_a.push_back(history::TermMatch(2, 4, 3)); - matches_a.push_back(history::TermMatch(3, 9, 1)); - matches_a.push_back(history::TermMatch(3, 10, 1)); - matches_a.push_back(history::TermMatch(4, 14, 5)); - std::vector offsets = OffsetsFromTermMatches(matches_a); - const size_t expected_offsets_a[] = {1, 3, 4, 7, 9, 10, 10, 11, 14, 19}; - ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a)); - for (size_t i = 0; i < offsets.size(); ++i) - EXPECT_EQ(expected_offsets_a[i], offsets[i]); - - // Test ReplaceOffsetsInTermMatches - offsets[4] = base::string16::npos; // offset of third term - history::TermMatches matches_b = - ReplaceOffsetsInTermMatches(matches_a, offsets); - const size_t expected_offsets_b[] = {1, 4, 10, 14}; - ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size()); - for (size_t i = 0; i < matches_b.size(); ++i) - EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset); -} - -} // namespace history diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc index 59f8a80..318e8c9 100644 --- a/chrome/browser/history/in_memory_url_index_unittest.cc +++ b/chrome/browser/history/in_memory_url_index_unittest.cc @@ -22,13 +22,13 @@ #include "chrome/browser/history/history_service.h" #include "chrome/browser/history/history_service_factory.h" #include "chrome/browser/history/in_memory_url_index.h" -#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/browser/history/url_index_private_data.h" #include "chrome/common/chrome_paths.h" #include "chrome/test/base/history_index_restore_observer.h" #include "chrome/test/base/testing_profile.h" #include "components/bookmarks/test/bookmark_test_helpers.h" #include "components/history/core/browser/history_client.h" +#include "components/history/core/browser/in_memory_url_index_types.h" #include "content/public/browser/notification_details.h" #include "content/public/browser/notification_source.h" #include "content/public/test/test_browser_thread_bundle.h" diff --git a/chrome/browser/history/scored_history_match.h b/chrome/browser/history/scored_history_match.h index eb8f4cb..15a37e7 100644 --- a/chrome/browser/history/scored_history_match.h +++ b/chrome/browser/history/scored_history_match.h @@ -10,9 +10,9 @@ #include #include "base/strings/string16.h" -#include "chrome/browser/history/in_memory_url_index_types.h" #include "components/history/core/browser/history_match.h" #include "components/history/core/browser/history_types.h" +#include "components/history/core/browser/in_memory_url_index_types.h" #include "testing/gtest/include/gtest/gtest_prod.h" namespace history { diff --git a/chrome/browser/history/thumbnail_database.cc b/chrome/browser/history/thumbnail_database.cc deleted file mode 100644 index 44ce287..0000000 --- a/chrome/browser/history/thumbnail_database.cc +++ /dev/null @@ -1,1322 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/thumbnail_database.h" - -#include -#include - -#include "base/bind.h" -#include "base/debug/alias.h" -#include "base/debug/dump_without_crashing.h" -#include "base/files/file_util.h" -#include "base/format_macros.h" -#include "base/memory/ref_counted_memory.h" -#include "base/metrics/histogram.h" -#include "base/rand_util.h" -#include "base/strings/string_util.h" -#include "base/strings/stringprintf.h" -#include "base/time/time.h" -#include "components/history/core/browser/history_client.h" -#include "components/history/core/browser/url_database.h" -#include "sql/recovery.h" -#include "sql/statement.h" -#include "sql/transaction.h" -#include "third_party/sqlite/sqlite3.h" - -#if defined(OS_MACOSX) -#include "base/mac/mac_util.h" -#endif - -// Description of database tables: -// -// icon_mapping -// id Unique ID. -// page_url Page URL which has one or more associated favicons. -// icon_id The ID of favicon that this mapping maps to. -// -// favicons This table associates a row to each favicon for a -// |page_url| in the |icon_mapping| table. This is the -// default favicon |page_url|/favicon.ico plus any favicons -// associated via . -// The |id| matches the |icon_id| field in the appropriate -// row in the icon_mapping table. -// -// id Unique ID. -// url The URL at which the favicon file is located. -// icon_type The type of the favicon specified in the rel attribute of -// the link tag. The FAVICON type is used for the default -// favicon.ico favicon. -// -// favicon_bitmaps This table contains the PNG encoded bitmap data of the -// favicons. There is a separate row for every size in a -// multi resolution bitmap. The bitmap data is associated -// to the favicon via the |icon_id| field which matches -// the |id| field in the appropriate row in the |favicons| -// table. -// -// id Unique ID. -// icon_id The ID of the favicon that the bitmap is associated to. -// last_updated The time at which this favicon was inserted into the -// table. This is used to determine if it needs to be -// redownloaded from the web. -// image_data PNG encoded data of the favicon. -// width Pixel width of |image_data|. -// height Pixel height of |image_data|. - -namespace { - -// For this database, schema migrations are deprecated after two -// years. This means that the oldest non-deprecated version should be -// two years old or greater (thus the migrations to get there are -// older). Databases containing deprecated versions will be cleared -// at startup. Since this database is a cache, losing old data is not -// fatal (in fact, very old data may be expired immediately at startup -// anyhow). - -// Version 7: 911a634d/r209424 by qsr@chromium.org on 2013-07-01 -// Version 6: 610f923b/r152367 by pkotwicz@chromium.org on 2012-08-20 -// Version 5: e2ee8ae9/r105004 by groby@chromium.org on 2011-10-12 -// Version 4: 5f104d76/r77288 by sky@chromium.org on 2011-03-08 (deprecated) -// Version 3: 09911bf3/r15 by initial.commit on 2008-07-26 (deprecated) - -// Version number of the database. -// NOTE(shess): When changing the version, add a new golden file for -// the new version and a test to verify that Init() works with it. -const int kCurrentVersionNumber = 7; -const int kCompatibleVersionNumber = 7; -const int kDeprecatedVersionNumber = 4; // and earlier. - -void FillIconMapping(const sql::Statement& statement, - const GURL& page_url, - history::IconMapping* icon_mapping) { - icon_mapping->mapping_id = statement.ColumnInt64(0); - icon_mapping->icon_id = statement.ColumnInt64(1); - icon_mapping->icon_type = - static_cast(statement.ColumnInt(2)); - icon_mapping->icon_url = GURL(statement.ColumnString(3)); - icon_mapping->page_url = page_url; -} - -enum InvalidStructureType { - // NOTE(shess): Intentionally skip bucket 0 to account for - // conversion from a boolean histogram. - STRUCTURE_EVENT_FAVICON = 1, - STRUCTURE_EVENT_VERSION4, - STRUCTURE_EVENT_VERSION5, - - // Always keep this at the end. - STRUCTURE_EVENT_MAX, -}; - -void RecordInvalidStructure(InvalidStructureType invalid_type) { - UMA_HISTOGRAM_ENUMERATION("History.InvalidFaviconsDBStructure", - invalid_type, STRUCTURE_EVENT_MAX); -} - -// Attempt to pass 2000 bytes of |debug_info| into a crash dump. -void DumpWithoutCrashing2000(const std::string& debug_info) { - char debug_buf[2000]; - base::strlcpy(debug_buf, debug_info.c_str(), arraysize(debug_buf)); - base::debug::Alias(&debug_buf); - - base::debug::DumpWithoutCrashing(); -} - -void ReportCorrupt(sql::Connection* db, size_t startup_kb) { - // Buffer for accumulating debugging info about the error. Place - // more-relevant information earlier, in case things overflow the - // fixed-size buffer. - std::string debug_info; - - base::StringAppendF(&debug_info, "SQLITE_CORRUPT, integrity_check:\n"); - - // Check files up to 8M to keep things from blocking too long. - const size_t kMaxIntegrityCheckSize = 8192; - if (startup_kb > kMaxIntegrityCheckSize) { - base::StringAppendF(&debug_info, "too big %" PRIuS "\n", startup_kb); - } else { - std::vector messages; - - const base::TimeTicks before = base::TimeTicks::Now(); - db->FullIntegrityCheck(&messages); - base::StringAppendF(&debug_info, "# %" PRIx64 " ms, %" PRIuS " records\n", - (base::TimeTicks::Now() - before).InMilliseconds(), - messages.size()); - - // SQLite returns up to 100 messages by default, trim deeper to - // keep close to the 2000-character size limit for dumping. - // - // TODO(shess): If the first 20 tend to be actionable, test if - // passing the count to integrity_check makes it exit earlier. In - // that case it may be possible to greatly ease the size - // restriction. - const size_t kMaxMessages = 20; - for (size_t i = 0; i < kMaxMessages && i < messages.size(); ++i) { - base::StringAppendF(&debug_info, "%s\n", messages[i].c_str()); - } - } - - DumpWithoutCrashing2000(debug_info); -} - -void ReportError(sql::Connection* db, int error) { - // Buffer for accumulating debugging info about the error. Place - // more-relevant information earlier, in case things overflow the - // fixed-size buffer. - std::string debug_info; - - // The error message from the failed operation. - base::StringAppendF(&debug_info, "db error: %d/%s\n", - db->GetErrorCode(), db->GetErrorMessage()); - - // System errno information. - base::StringAppendF(&debug_info, "errno: %d\n", db->GetLastErrno()); - - // SQLITE_ERROR reports seem to be attempts to upgrade invalid - // schema, try to log that info. - if (error == SQLITE_ERROR) { - const char* kVersionSql = "SELECT value FROM meta WHERE key = 'version'"; - if (db->IsSQLValid(kVersionSql)) { - sql::Statement statement(db->GetUniqueStatement(kVersionSql)); - if (statement.Step()) { - debug_info += "version: "; - debug_info += statement.ColumnString(0); - debug_info += '\n'; - } else if (statement.Succeeded()) { - debug_info += "version: none\n"; - } else { - debug_info += "version: error\n"; - } - } else { - debug_info += "version: invalid\n"; - } - - debug_info += "schema:\n"; - - // sqlite_master has columns: - // type - "index" or "table". - // name - name of created element. - // tbl_name - name of element, or target table in case of index. - // rootpage - root page of the element in database file. - // sql - SQL to create the element. - // In general, the |sql| column is sufficient to derive the other - // columns. |rootpage| is not interesting for debugging, without - // the contents of the database. The COALESCE is because certain - // automatic elements will have a |name| but no |sql|, - const char* kSchemaSql = "SELECT COALESCE(sql, name) FROM sqlite_master"; - sql::Statement statement(db->GetUniqueStatement(kSchemaSql)); - while (statement.Step()) { - debug_info += statement.ColumnString(0); - debug_info += '\n'; - } - if (!statement.Succeeded()) - debug_info += "error\n"; - } - - // TODO(shess): Think of other things to log. Not logging the - // statement text because the backtrace should suffice in most - // cases. The database schema is a possibility, but the - // likelihood of recursive error callbacks makes that risky (same - // reasoning applies to other data fetched from the database). - - DumpWithoutCrashing2000(debug_info); -} - -// TODO(shess): If this proves out, perhaps lift the code out to -// chrome/browser/diagnostics/sqlite_diagnostics.{h,cc}. -void GenerateDiagnostics(sql::Connection* db, - size_t startup_kb, - int extended_error) { - int error = (extended_error & 0xFF); - - // Infrequently report information about the error up to the crash - // server. - static const uint64 kReportsPerMillion = 50000; - - // Since some/most errors will not resolve themselves, only report - // once per Chrome run. - static bool reported = false; - if (reported) - return; - - uint64 rand = base::RandGenerator(1000000); - if (error == SQLITE_CORRUPT) { - // Once the database is known to be corrupt, it will generate a - // stream of errors until someone fixes it, so give one chance. - // Set first in case of errors in generating the report. - reported = true; - - // Corrupt cases currently dominate, report them very infrequently. - static const uint64 kCorruptReportsPerMillion = 10000; - if (rand < kCorruptReportsPerMillion) - ReportCorrupt(db, startup_kb); - } else if (error == SQLITE_READONLY) { - // SQLITE_READONLY appears similar to SQLITE_CORRUPT - once it - // is seen, it is almost guaranteed to be seen again. - reported = true; - - if (rand < kReportsPerMillion) - ReportError(db, extended_error); - } else { - // Only set the flag when making a report. This should allow - // later (potentially different) errors in a stream of errors to - // be reported. - // - // TODO(shess): Would it be worthwile to audit for which cases - // want once-only handling? Sqlite.Error.Thumbnail shows - // CORRUPT and READONLY as almost 95% of all reports on these - // channels, so probably easier to just harvest from the field. - if (rand < kReportsPerMillion) { - reported = true; - ReportError(db, extended_error); - } - } -} - -// NOTE(shess): Schema modifications must consider initial creation in -// |InitImpl()|, recovery in |RecoverDatabaseOrRaze()|, and history pruning in -// |RetainDataForPageUrls()|. -bool InitTables(sql::Connection* db) { - const char kIconMappingSql[] = - "CREATE TABLE IF NOT EXISTS icon_mapping" - "(" - "id INTEGER PRIMARY KEY," - "page_url LONGVARCHAR NOT NULL," - "icon_id INTEGER" - ")"; - if (!db->Execute(kIconMappingSql)) - return false; - - const char kFaviconsSql[] = - "CREATE TABLE IF NOT EXISTS favicons" - "(" - "id INTEGER PRIMARY KEY," - "url LONGVARCHAR NOT NULL," - // default icon_type FAVICON to be consistent with past migration. - "icon_type INTEGER DEFAULT 1" - ")"; - if (!db->Execute(kFaviconsSql)) - return false; - - const char kFaviconBitmapsSql[] = - "CREATE TABLE IF NOT EXISTS favicon_bitmaps" - "(" - "id INTEGER PRIMARY KEY," - "icon_id INTEGER NOT NULL," - "last_updated INTEGER DEFAULT 0," - "image_data BLOB," - "width INTEGER DEFAULT 0," - "height INTEGER DEFAULT 0" - ")"; - if (!db->Execute(kFaviconBitmapsSql)) - return false; - - return true; -} - -// NOTE(shess): Schema modifications must consider initial creation in -// |InitImpl()|, recovery in |RecoverDatabaseOrRaze()|, and history pruning in -// |RetainDataForPageUrls()|. -bool InitIndices(sql::Connection* db) { - const char kIconMappingUrlIndexSql[] = - "CREATE INDEX IF NOT EXISTS icon_mapping_page_url_idx" - " ON icon_mapping(page_url)"; - const char kIconMappingIdIndexSql[] = - "CREATE INDEX IF NOT EXISTS icon_mapping_icon_id_idx" - " ON icon_mapping(icon_id)"; - if (!db->Execute(kIconMappingUrlIndexSql) || - !db->Execute(kIconMappingIdIndexSql)) { - return false; - } - - const char kFaviconsIndexSql[] = - "CREATE INDEX IF NOT EXISTS favicons_url ON favicons(url)"; - if (!db->Execute(kFaviconsIndexSql)) - return false; - - const char kFaviconBitmapsIndexSql[] = - "CREATE INDEX IF NOT EXISTS favicon_bitmaps_icon_id ON " - "favicon_bitmaps(icon_id)"; - if (!db->Execute(kFaviconBitmapsIndexSql)) - return false; - - return true; -} - -enum RecoveryEventType { - RECOVERY_EVENT_RECOVERED = 0, - RECOVERY_EVENT_FAILED_SCOPER, - RECOVERY_EVENT_FAILED_META_VERSION_ERROR, // obsolete - RECOVERY_EVENT_FAILED_META_VERSION_NONE, // obsolete - RECOVERY_EVENT_FAILED_META_WRONG_VERSION6, // obsolete - RECOVERY_EVENT_FAILED_META_WRONG_VERSION5, // obsolete - RECOVERY_EVENT_FAILED_META_WRONG_VERSION, - RECOVERY_EVENT_FAILED_RECOVER_META, // obsolete - RECOVERY_EVENT_FAILED_META_INSERT, // obsolete - RECOVERY_EVENT_FAILED_INIT, - RECOVERY_EVENT_FAILED_RECOVER_FAVICONS, // obsolete - RECOVERY_EVENT_FAILED_FAVICONS_INSERT, // obsolete - RECOVERY_EVENT_FAILED_RECOVER_FAVICON_BITMAPS, // obsolete - RECOVERY_EVENT_FAILED_FAVICON_BITMAPS_INSERT, // obsolete - RECOVERY_EVENT_FAILED_RECOVER_ICON_MAPPING, // obsolete - RECOVERY_EVENT_FAILED_ICON_MAPPING_INSERT, // obsolete - RECOVERY_EVENT_RECOVERED_VERSION6, // obsolete - RECOVERY_EVENT_FAILED_META_INIT, - RECOVERY_EVENT_FAILED_META_VERSION, - RECOVERY_EVENT_DEPRECATED, - RECOVERY_EVENT_FAILED_V5_INITSCHEMA, // obsolete - RECOVERY_EVENT_FAILED_V5_AUTORECOVER_FAVICONS, // obsolete - RECOVERY_EVENT_FAILED_V5_AUTORECOVER_ICON_MAPPING, // obsolete - RECOVERY_EVENT_RECOVERED_VERSION5, // obsolete - RECOVERY_EVENT_FAILED_AUTORECOVER_FAVICONS, - RECOVERY_EVENT_FAILED_AUTORECOVER_FAVICON_BITMAPS, - RECOVERY_EVENT_FAILED_AUTORECOVER_ICON_MAPPING, - RECOVERY_EVENT_FAILED_COMMIT, - - // Always keep this at the end. - RECOVERY_EVENT_MAX, -}; - -void RecordRecoveryEvent(RecoveryEventType recovery_event) { - UMA_HISTOGRAM_ENUMERATION("History.FaviconsRecovery", - recovery_event, RECOVERY_EVENT_MAX); -} - -// Recover the database to the extent possible, razing it if recovery -// is not possible. -// TODO(shess): This is mostly just a safe proof of concept. In the -// real world, this database is probably not worthwhile recovering, as -// opposed to just razing it and starting over whenever corruption is -// detected. So this database is a good test subject. -void RecoverDatabaseOrRaze(sql::Connection* db, const base::FilePath& db_path) { - // NOTE(shess): This code is currently specific to the version - // number. I am working on simplifying things to loosen the - // dependency, meanwhile contact me if you need to bump the version. - DCHECK_EQ(7, kCurrentVersionNumber); - - // TODO(shess): Reset back after? - db->reset_error_callback(); - - // For histogram purposes. - size_t favicons_rows_recovered = 0; - size_t favicon_bitmaps_rows_recovered = 0; - size_t icon_mapping_rows_recovered = 0; - int64 original_size = 0; - base::GetFileSize(db_path, &original_size); - - scoped_ptr recovery = sql::Recovery::Begin(db, db_path); - if (!recovery) { - // TODO(shess): Unable to create recovery connection. This - // implies something substantial is wrong. At this point |db| has - // been poisoned so there is nothing really to do. - // - // Possible responses are unclear. If the failure relates to a - // problem somehow specific to the temporary file used to back the - // database, then an in-memory database could possibly be used. - // This could potentially allow recovering the main database, and - // might be simple to implement w/in Begin(). - RecordRecoveryEvent(RECOVERY_EVENT_FAILED_SCOPER); - return; - } - - // Setup the meta recovery table and fetch the version number from - // the corrupt database. - int version = 0; - if (!recovery->SetupMeta() || !recovery->GetMetaVersionNumber(&version)) { - // TODO(shess): Prior histograms indicate all failures are in - // creating the recover virtual table for corrupt.meta. The table - // may not exist, or the database may be too far gone. Either - // way, unclear how to resolve. - sql::Recovery::Rollback(recovery.Pass()); - RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_VERSION); - return; - } - - // This code may be able to fetch version information that the regular - // deprecation path cannot. - // NOTE(shess): v5 and v6 are currently not deprecated in the normal Init() - // path, but are deprecated in the recovery path in the interest of keeping - // the code simple. http://crbug.com/327485 for numbers. - DCHECK_LE(kDeprecatedVersionNumber, 6); - if (version <= 6) { - sql::Recovery::Unrecoverable(recovery.Pass()); - RecordRecoveryEvent(RECOVERY_EVENT_DEPRECATED); - return; - } - - // Earlier versions have been handled or deprecated, later versions should be - // impossible. - if (version != 7) { - sql::Recovery::Unrecoverable(recovery.Pass()); - RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_WRONG_VERSION); - return; - } - - // Recover to current schema version. - sql::MetaTable recover_meta_table; - if (!recover_meta_table.Init(recovery->db(), kCurrentVersionNumber, - kCompatibleVersionNumber)) { - sql::Recovery::Rollback(recovery.Pass()); - RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_INIT); - return; - } - - // Create a fresh version of the database. The recovery code uses - // conflict-resolution to handle duplicates, so the indices are - // necessary. - if (!InitTables(recovery->db()) || !InitIndices(recovery->db())) { - // TODO(shess): Unable to create the new schema in the new - // database. The new database should be a temporary file, so - // being unable to work with it is pretty unclear. - // - // What are the potential responses, even? The recovery database - // could be opened as in-memory. If the temp database had a - // filesystem problem and the temp filesystem differs from the - // main database, then that could fix it. - sql::Recovery::Rollback(recovery.Pass()); - RecordRecoveryEvent(RECOVERY_EVENT_FAILED_INIT); - return; - } - - if (!recovery->AutoRecoverTable("favicons", 0, &favicons_rows_recovered)) { - sql::Recovery::Rollback(recovery.Pass()); - RecordRecoveryEvent(RECOVERY_EVENT_FAILED_AUTORECOVER_FAVICONS); - return; - } - if (!recovery->AutoRecoverTable("favicon_bitmaps", 0, - &favicon_bitmaps_rows_recovered)) { - sql::Recovery::Rollback(recovery.Pass()); - RecordRecoveryEvent(RECOVERY_EVENT_FAILED_AUTORECOVER_FAVICON_BITMAPS); - return; - } - if (!recovery->AutoRecoverTable("icon_mapping", 0, - &icon_mapping_rows_recovered)) { - sql::Recovery::Rollback(recovery.Pass()); - RecordRecoveryEvent(RECOVERY_EVENT_FAILED_AUTORECOVER_ICON_MAPPING); - return; - } - - // TODO(shess): Is it possible/likely to have broken foreign-key - // issues with the tables? - // - icon_mapping.icon_id maps to no favicons.id - // - favicon_bitmaps.icon_id maps to no favicons.id - // - favicons.id is referenced by no icon_mapping.icon_id - // - favicons.id is referenced by no favicon_bitmaps.icon_id - // This step is possibly not worth the effort necessary to develop - // and sequence the statements, as it is basically a form of garbage - // collection. - - if (!sql::Recovery::Recovered(recovery.Pass())) { - RecordRecoveryEvent(RECOVERY_EVENT_FAILED_COMMIT); - return; - } - - // Track the size of the recovered database relative to the size of - // the input database. The size should almost always be smaller, - // unless the input database was empty to start with. If the - // percentage results are very low, something is awry. - int64 final_size = 0; - if (original_size > 0 && - base::GetFileSize(db_path, &final_size) && - final_size > 0) { - int percentage = static_cast(original_size * 100 / final_size); - UMA_HISTOGRAM_PERCENTAGE("History.FaviconsRecoveredPercentage", - std::max(100, percentage)); - } - - // Using 10,000 because these cases mostly care about "none - // recovered" and "lots recovered". More than 10,000 rows recovered - // probably means there's something wrong with the profile. - UMA_HISTOGRAM_COUNTS_10000("History.FaviconsRecoveredRowsFavicons", - favicons_rows_recovered); - UMA_HISTOGRAM_COUNTS_10000("History.FaviconsRecoveredRowsFaviconBitmaps", - favicon_bitmaps_rows_recovered); - UMA_HISTOGRAM_COUNTS_10000("History.FaviconsRecoveredRowsIconMapping", - icon_mapping_rows_recovered); - - RecordRecoveryEvent(RECOVERY_EVENT_RECOVERED); -} - -void DatabaseErrorCallback(sql::Connection* db, - const base::FilePath& db_path, - size_t startup_kb, - history::HistoryClient* history_client, - int extended_error, - sql::Statement* stmt) { - // TODO(shess): Assert that this is running on a safe thread. - // AFAICT, should be the history thread, but at this level I can't - // see how to reach that. - - if (history_client && history_client->ShouldReportDatabaseError()) { - GenerateDiagnostics(db, startup_kb, extended_error); - } - - // Attempt to recover corrupt databases. - int error = (extended_error & 0xFF); - if (error == SQLITE_CORRUPT || - error == SQLITE_CANTOPEN || - error == SQLITE_NOTADB) { - RecoverDatabaseOrRaze(db, db_path); - } - - // The default handling is to assert on debug and to ignore on release. - if (!sql::Connection::ShouldIgnoreSqliteError(extended_error)) - DLOG(FATAL) << db->GetErrorMessage(); -} - -} // namespace - -namespace history { - -ThumbnailDatabase::IconMappingEnumerator::IconMappingEnumerator() { -} - -ThumbnailDatabase::IconMappingEnumerator::~IconMappingEnumerator() { -} - -bool ThumbnailDatabase::IconMappingEnumerator::GetNextIconMapping( - IconMapping* icon_mapping) { - if (!statement_.Step()) - return false; - FillIconMapping(statement_, GURL(statement_.ColumnString(4)), icon_mapping); - return true; -} - -ThumbnailDatabase::ThumbnailDatabase(HistoryClient* history_client) - : history_client_(history_client) { -} - -ThumbnailDatabase::~ThumbnailDatabase() { - // The DBCloseScoper will delete the DB and the cache. -} - -sql::InitStatus ThumbnailDatabase::Init(const base::FilePath& db_name) { - // TODO(shess): Consider separating database open from schema setup. - // With that change, this code could Raze() from outside the - // transaction, rather than needing RazeAndClose() in InitImpl(). - - // Retry failed setup in case the recovery system fixed things. - const size_t kAttempts = 2; - - sql::InitStatus status = sql::INIT_FAILURE; - for (size_t i = 0; i < kAttempts; ++i) { - status = InitImpl(db_name); - if (status == sql::INIT_OK) - return status; - - meta_table_.Reset(); - db_.Close(); - } - return status; -} - -void ThumbnailDatabase::ComputeDatabaseMetrics() { - sql::Statement favicon_count( - db_.GetCachedStatement(SQL_FROM_HERE, "SELECT COUNT(*) FROM favicons")); - UMA_HISTOGRAM_COUNTS_10000( - "History.NumFaviconsInDB", - favicon_count.Step() ? favicon_count.ColumnInt(0) : 0); -} - -void ThumbnailDatabase::BeginTransaction() { - db_.BeginTransaction(); -} - -void ThumbnailDatabase::CommitTransaction() { - db_.CommitTransaction(); -} - -void ThumbnailDatabase::RollbackTransaction() { - db_.RollbackTransaction(); -} - -void ThumbnailDatabase::Vacuum() { - DCHECK(db_.transaction_nesting() == 0) << - "Can not have a transaction when vacuuming."; - ignore_result(db_.Execute("VACUUM")); -} - -void ThumbnailDatabase::TrimMemory(bool aggressively) { - db_.TrimMemory(aggressively); -} - -bool ThumbnailDatabase::GetFaviconBitmapIDSizes( - favicon_base::FaviconID icon_id, - std::vector* bitmap_id_sizes) { - DCHECK(icon_id); - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "SELECT id, width, height FROM favicon_bitmaps WHERE icon_id=?")); - statement.BindInt64(0, icon_id); - - bool result = false; - while (statement.Step()) { - result = true; - if (!bitmap_id_sizes) - return result; - - FaviconBitmapIDSize bitmap_id_size; - bitmap_id_size.bitmap_id = statement.ColumnInt64(0); - bitmap_id_size.pixel_size = gfx::Size(statement.ColumnInt(1), - statement.ColumnInt(2)); - bitmap_id_sizes->push_back(bitmap_id_size); - } - return result; -} - -bool ThumbnailDatabase::GetFaviconBitmaps( - favicon_base::FaviconID icon_id, - std::vector* favicon_bitmaps) { - DCHECK(icon_id); - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "SELECT id, last_updated, image_data, width, height FROM favicon_bitmaps " - "WHERE icon_id=?")); - statement.BindInt64(0, icon_id); - - bool result = false; - while (statement.Step()) { - result = true; - if (!favicon_bitmaps) - return result; - - FaviconBitmap favicon_bitmap; - favicon_bitmap.bitmap_id = statement.ColumnInt64(0); - favicon_bitmap.icon_id = icon_id; - favicon_bitmap.last_updated = - base::Time::FromInternalValue(statement.ColumnInt64(1)); - if (statement.ColumnByteLength(2) > 0) { - scoped_refptr data(new base::RefCountedBytes()); - statement.ColumnBlobAsVector(2, &data->data()); - favicon_bitmap.bitmap_data = data; - } - favicon_bitmap.pixel_size = gfx::Size(statement.ColumnInt(3), - statement.ColumnInt(4)); - favicon_bitmaps->push_back(favicon_bitmap); - } - return result; -} - -bool ThumbnailDatabase::GetFaviconBitmap( - FaviconBitmapID bitmap_id, - base::Time* last_updated, - scoped_refptr* png_icon_data, - gfx::Size* pixel_size) { - DCHECK(bitmap_id); - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "SELECT last_updated, image_data, width, height FROM favicon_bitmaps " - "WHERE id=?")); - statement.BindInt64(0, bitmap_id); - - if (!statement.Step()) - return false; - - if (last_updated) - *last_updated = base::Time::FromInternalValue(statement.ColumnInt64(0)); - - if (png_icon_data && statement.ColumnByteLength(1) > 0) { - scoped_refptr data(new base::RefCountedBytes()); - statement.ColumnBlobAsVector(1, &data->data()); - *png_icon_data = data; - } - - if (pixel_size) { - *pixel_size = gfx::Size(statement.ColumnInt(2), - statement.ColumnInt(3)); - } - return true; -} - -FaviconBitmapID ThumbnailDatabase::AddFaviconBitmap( - favicon_base::FaviconID icon_id, - const scoped_refptr& icon_data, - base::Time time, - const gfx::Size& pixel_size) { - DCHECK(icon_id); - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "INSERT INTO favicon_bitmaps (icon_id, image_data, last_updated, width, " - "height) VALUES (?, ?, ?, ?, ?)")); - statement.BindInt64(0, icon_id); - if (icon_data.get() && icon_data->size()) { - statement.BindBlob(1, icon_data->front(), - static_cast(icon_data->size())); - } else { - statement.BindNull(1); - } - statement.BindInt64(2, time.ToInternalValue()); - statement.BindInt(3, pixel_size.width()); - statement.BindInt(4, pixel_size.height()); - - if (!statement.Run()) - return 0; - return db_.GetLastInsertRowId(); -} - -bool ThumbnailDatabase::SetFaviconBitmap( - FaviconBitmapID bitmap_id, - scoped_refptr bitmap_data, - base::Time time) { - DCHECK(bitmap_id); - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "UPDATE favicon_bitmaps SET image_data=?, last_updated=? WHERE id=?")); - if (bitmap_data.get() && bitmap_data->size()) { - statement.BindBlob(0, bitmap_data->front(), - static_cast(bitmap_data->size())); - } else { - statement.BindNull(0); - } - statement.BindInt64(1, time.ToInternalValue()); - statement.BindInt64(2, bitmap_id); - - return statement.Run(); -} - -bool ThumbnailDatabase::SetFaviconBitmapLastUpdateTime( - FaviconBitmapID bitmap_id, - base::Time time) { - DCHECK(bitmap_id); - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "UPDATE favicon_bitmaps SET last_updated=? WHERE id=?")); - statement.BindInt64(0, time.ToInternalValue()); - statement.BindInt64(1, bitmap_id); - return statement.Run(); -} - -bool ThumbnailDatabase::DeleteFaviconBitmap(FaviconBitmapID bitmap_id) { - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "DELETE FROM favicon_bitmaps WHERE id=?")); - statement.BindInt64(0, bitmap_id); - return statement.Run(); -} - -bool ThumbnailDatabase::SetFaviconOutOfDate(favicon_base::FaviconID icon_id) { - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "UPDATE favicon_bitmaps SET last_updated=? WHERE icon_id=?")); - statement.BindInt64(0, 0); - statement.BindInt64(1, icon_id); - - return statement.Run(); -} - -favicon_base::FaviconID ThumbnailDatabase::GetFaviconIDForFaviconURL( - const GURL& icon_url, - int required_icon_type, - favicon_base::IconType* icon_type) { - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "SELECT id, icon_type FROM favicons WHERE url=? AND (icon_type & ? > 0) " - "ORDER BY icon_type DESC")); - statement.BindString(0, URLDatabase::GURLToDatabaseURL(icon_url)); - statement.BindInt(1, required_icon_type); - - if (!statement.Step()) - return 0; // not cached - - if (icon_type) - *icon_type = static_cast(statement.ColumnInt(1)); - return statement.ColumnInt64(0); -} - -bool ThumbnailDatabase::GetFaviconHeader(favicon_base::FaviconID icon_id, - GURL* icon_url, - favicon_base::IconType* icon_type) { - DCHECK(icon_id); - - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "SELECT url, icon_type FROM favicons WHERE id=?")); - statement.BindInt64(0, icon_id); - - if (!statement.Step()) - return false; // No entry for the id. - - if (icon_url) - *icon_url = GURL(statement.ColumnString(0)); - if (icon_type) - *icon_type = static_cast(statement.ColumnInt(1)); - - return true; -} - -favicon_base::FaviconID ThumbnailDatabase::AddFavicon( - const GURL& icon_url, - favicon_base::IconType icon_type) { - - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "INSERT INTO favicons (url, icon_type) VALUES (?, ?)")); - statement.BindString(0, URLDatabase::GURLToDatabaseURL(icon_url)); - statement.BindInt(1, icon_type); - - if (!statement.Run()) - return 0; - return db_.GetLastInsertRowId(); -} - -favicon_base::FaviconID ThumbnailDatabase::AddFavicon( - const GURL& icon_url, - favicon_base::IconType icon_type, - const scoped_refptr& icon_data, - base::Time time, - const gfx::Size& pixel_size) { - favicon_base::FaviconID icon_id = AddFavicon(icon_url, icon_type); - if (!icon_id || !AddFaviconBitmap(icon_id, icon_data, time, pixel_size)) - return 0; - - return icon_id; -} - -bool ThumbnailDatabase::DeleteFavicon(favicon_base::FaviconID id) { - sql::Statement statement; - statement.Assign(db_.GetCachedStatement(SQL_FROM_HERE, - "DELETE FROM favicons WHERE id = ?")); - statement.BindInt64(0, id); - if (!statement.Run()) - return false; - - statement.Assign(db_.GetCachedStatement(SQL_FROM_HERE, - "DELETE FROM favicon_bitmaps WHERE icon_id = ?")); - statement.BindInt64(0, id); - return statement.Run(); -} - -bool ThumbnailDatabase::GetIconMappingsForPageURL( - const GURL& page_url, - int required_icon_types, - std::vector* filtered_mapping_data) { - std::vector mapping_data; - if (!GetIconMappingsForPageURL(page_url, &mapping_data)) - return false; - - bool result = false; - for (std::vector::iterator m = mapping_data.begin(); - m != mapping_data.end(); ++m) { - if (m->icon_type & required_icon_types) { - result = true; - if (!filtered_mapping_data) - return result; - - // Restrict icon type of subsequent matches to |m->icon_type|. - // |m->icon_type| is the largest IconType in |mapping_data| because - // |mapping_data| is sorted in descending order of IconType. - required_icon_types = m->icon_type; - - filtered_mapping_data->push_back(*m); - } - } - return result; -} - -bool ThumbnailDatabase::GetIconMappingsForPageURL( - const GURL& page_url, - std::vector* mapping_data) { - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "SELECT icon_mapping.id, icon_mapping.icon_id, favicons.icon_type, " - "favicons.url " - "FROM icon_mapping " - "INNER JOIN favicons " - "ON icon_mapping.icon_id = favicons.id " - "WHERE icon_mapping.page_url=? " - "ORDER BY favicons.icon_type DESC")); - statement.BindString(0, URLDatabase::GURLToDatabaseURL(page_url)); - - bool result = false; - while (statement.Step()) { - result = true; - if (!mapping_data) - return result; - - IconMapping icon_mapping; - FillIconMapping(statement, page_url, &icon_mapping); - mapping_data->push_back(icon_mapping); - } - return result; -} - -IconMappingID ThumbnailDatabase::AddIconMapping( - const GURL& page_url, - favicon_base::FaviconID icon_id) { - const char kSql[] = - "INSERT INTO icon_mapping (page_url, icon_id) VALUES (?, ?)"; - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, kSql)); - statement.BindString(0, URLDatabase::GURLToDatabaseURL(page_url)); - statement.BindInt64(1, icon_id); - - if (!statement.Run()) - return 0; - - return db_.GetLastInsertRowId(); -} - -bool ThumbnailDatabase::UpdateIconMapping(IconMappingID mapping_id, - favicon_base::FaviconID icon_id) { - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "UPDATE icon_mapping SET icon_id=? WHERE id=?")); - statement.BindInt64(0, icon_id); - statement.BindInt64(1, mapping_id); - - return statement.Run(); -} - -bool ThumbnailDatabase::DeleteIconMappings(const GURL& page_url) { - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "DELETE FROM icon_mapping WHERE page_url = ?")); - statement.BindString(0, URLDatabase::GURLToDatabaseURL(page_url)); - - return statement.Run(); -} - -bool ThumbnailDatabase::DeleteIconMapping(IconMappingID mapping_id) { - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "DELETE FROM icon_mapping WHERE id=?")); - statement.BindInt64(0, mapping_id); - - return statement.Run(); -} - -bool ThumbnailDatabase::HasMappingFor(favicon_base::FaviconID id) { - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "SELECT id FROM icon_mapping " - "WHERE icon_id=?")); - statement.BindInt64(0, id); - - return statement.Step(); -} - -bool ThumbnailDatabase::CloneIconMappings(const GURL& old_page_url, - const GURL& new_page_url) { - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "SELECT icon_id FROM icon_mapping " - "WHERE page_url=?")); - if (!statement.is_valid()) - return false; - - // Do nothing if there are existing bindings - statement.BindString(0, URLDatabase::GURLToDatabaseURL(new_page_url)); - if (statement.Step()) - return true; - - statement.Assign(db_.GetCachedStatement(SQL_FROM_HERE, - "INSERT INTO icon_mapping (page_url, icon_id) " - "SELECT ?, icon_id FROM icon_mapping " - "WHERE page_url = ?")); - - statement.BindString(0, URLDatabase::GURLToDatabaseURL(new_page_url)); - statement.BindString(1, URLDatabase::GURLToDatabaseURL(old_page_url)); - return statement.Run(); -} - -bool ThumbnailDatabase::InitIconMappingEnumerator( - favicon_base::IconType type, - IconMappingEnumerator* enumerator) { - DCHECK(!enumerator->statement_.is_valid()); - enumerator->statement_.Assign(db_.GetCachedStatement( - SQL_FROM_HERE, - "SELECT icon_mapping.id, icon_mapping.icon_id, favicons.icon_type, " - "favicons.url, icon_mapping.page_url " - "FROM icon_mapping JOIN favicons ON (" - "icon_mapping.icon_id = favicons.id) " - "WHERE favicons.icon_type = ?")); - enumerator->statement_.BindInt(0, type); - return enumerator->statement_.is_valid(); -} - -bool ThumbnailDatabase::RetainDataForPageUrls( - const std::vector& urls_to_keep) { - sql::Transaction transaction(&db_); - if (!transaction.Begin()) - return false; - - // temp.icon_id_mapping generates new icon ids as consecutive - // integers starting from 1, and maps them to the old icon ids. - { - const char kIconMappingCreate[] = - "CREATE TEMP TABLE icon_id_mapping " - "(" - "new_icon_id INTEGER PRIMARY KEY," - "old_icon_id INTEGER NOT NULL UNIQUE" - ")"; - if (!db_.Execute(kIconMappingCreate)) - return false; - - // Insert the icon ids for retained urls, skipping duplicates. - const char kIconMappingSql[] = - "INSERT OR IGNORE INTO temp.icon_id_mapping (old_icon_id) " - "SELECT icon_id FROM icon_mapping WHERE page_url = ?"; - sql::Statement statement(db_.GetUniqueStatement(kIconMappingSql)); - for (std::vector::const_iterator - i = urls_to_keep.begin(); i != urls_to_keep.end(); ++i) { - statement.BindString(0, URLDatabase::GURLToDatabaseURL(*i)); - if (!statement.Run()) - return false; - statement.Reset(true); - } - } - - const char kRenameIconMappingTable[] = - "ALTER TABLE icon_mapping RENAME TO old_icon_mapping"; - const char kCopyIconMapping[] = - "INSERT INTO icon_mapping (page_url, icon_id) " - "SELECT old.page_url, mapping.new_icon_id " - "FROM old_icon_mapping AS old " - "JOIN temp.icon_id_mapping AS mapping " - "ON (old.icon_id = mapping.old_icon_id)"; - const char kDropOldIconMappingTable[] = "DROP TABLE old_icon_mapping"; - - const char kRenameFaviconsTable[] = - "ALTER TABLE favicons RENAME TO old_favicons"; - const char kCopyFavicons[] = - "INSERT INTO favicons (id, url, icon_type) " - "SELECT mapping.new_icon_id, old.url, old.icon_type " - "FROM old_favicons AS old " - "JOIN temp.icon_id_mapping AS mapping " - "ON (old.id = mapping.old_icon_id)"; - const char kDropOldFaviconsTable[] = "DROP TABLE old_favicons"; - - const char kRenameFaviconBitmapsTable[] = - "ALTER TABLE favicon_bitmaps RENAME TO old_favicon_bitmaps"; - const char kCopyFaviconBitmaps[] = - "INSERT INTO favicon_bitmaps " - " (icon_id, last_updated, image_data, width, height) " - "SELECT mapping.new_icon_id, old.last_updated, " - " old.image_data, old.width, old.height " - "FROM old_favicon_bitmaps AS old " - "JOIN temp.icon_id_mapping AS mapping " - "ON (old.icon_id = mapping.old_icon_id)"; - const char kDropOldFaviconBitmapsTable[] = - "DROP TABLE old_favicon_bitmaps"; - - // Rename existing tables to new location. - if (!db_.Execute(kRenameIconMappingTable) || - !db_.Execute(kRenameFaviconsTable) || - !db_.Execute(kRenameFaviconBitmapsTable)) { - return false; - } - - // Initialize the replacement tables. At this point the old indices - // still exist (pointing to the old_* tables), so do not initialize - // the indices. - if (!InitTables(&db_)) - return false; - - // Copy all of the data over. - if (!db_.Execute(kCopyIconMapping) || - !db_.Execute(kCopyFavicons) || - !db_.Execute(kCopyFaviconBitmaps)) { - return false; - } - - // Drop the old_* tables, which also drops the indices. - if (!db_.Execute(kDropOldIconMappingTable) || - !db_.Execute(kDropOldFaviconsTable) || - !db_.Execute(kDropOldFaviconBitmapsTable)) { - return false; - } - - // Recreate the indices. - // TODO(shess): UNIQUE indices could fail due to duplication. This - // could happen in case of corruption. - if (!InitIndices(&db_)) - return false; - - const char kIconMappingDrop[] = "DROP TABLE temp.icon_id_mapping"; - if (!db_.Execute(kIconMappingDrop)) - return false; - - return transaction.Commit(); -} - -sql::InitStatus ThumbnailDatabase::OpenDatabase(sql::Connection* db, - const base::FilePath& db_name) { - size_t startup_kb = 0; - int64 size_64; - if (base::GetFileSize(db_name, &size_64)) - startup_kb = static_cast(size_64 / 1024); - - db->set_histogram_tag("Thumbnail"); - db->set_error_callback(base::Bind(&DatabaseErrorCallback, - db, db_name, startup_kb, history_client_)); - - // Thumbnails db now only stores favicons, so we don't need that big a page - // size or cache. - db->set_page_size(2048); - db->set_cache_size(32); - - // Run the database in exclusive mode. Nobody else should be accessing the - // database while we're running, and this will give somewhat improved perf. - db->set_exclusive_locking(); - - if (!db->Open(db_name)) - return sql::INIT_FAILURE; - - return sql::INIT_OK; -} - -sql::InitStatus ThumbnailDatabase::InitImpl(const base::FilePath& db_name) { - sql::InitStatus status = OpenDatabase(&db_, db_name); - if (status != sql::INIT_OK) - return status; - - // Clear databases which are too old to process. - DCHECK_LT(kDeprecatedVersionNumber, kCurrentVersionNumber); - sql::MetaTable::RazeIfDeprecated(&db_, kDeprecatedVersionNumber); - - // TODO(shess): Sqlite.Version.Thumbnail shows versions 22, 23, and - // 25. Future versions are not destroyed because that could lead to - // data loss if the profile is opened by a later channel, but - // perhaps a heuristic like >kCurrentVersionNumber+3 could be used. - - // Scope initialization in a transaction so we can't be partially initialized. - sql::Transaction transaction(&db_); - if (!transaction.Begin()) - return sql::INIT_FAILURE; - - // TODO(shess): Failing Begin() implies that something serious is - // wrong with the database. Raze() may be in order. - -#if defined(OS_MACOSX) - // Exclude the thumbnails file from backups. - base::mac::SetFileBackupExclusion(db_name); -#endif - - // thumbnails table has been obsolete for a long time, remove any - // detrious. - ignore_result(db_.Execute("DROP TABLE IF EXISTS thumbnails")); - - // At some point, operations involving temporary tables weren't done - // atomically and users have been stranded. Drop those tables and - // move on. - // TODO(shess): Prove it? Audit all cases and see if it's possible - // that this implies non-atomic update, and should thus be handled - // via the corruption handler. - ignore_result(db_.Execute("DROP TABLE IF EXISTS temp_favicons")); - ignore_result(db_.Execute("DROP TABLE IF EXISTS temp_favicon_bitmaps")); - ignore_result(db_.Execute("DROP TABLE IF EXISTS temp_icon_mapping")); - - // Create the tables. - if (!meta_table_.Init(&db_, kCurrentVersionNumber, - kCompatibleVersionNumber) || - !InitTables(&db_) || - !InitIndices(&db_)) { - return sql::INIT_FAILURE; - } - - // Version check. We should not encounter a database too old for us to handle - // in the wild, so we try to continue in that case. - if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) { - LOG(WARNING) << "Thumbnail database is too new."; - return sql::INIT_TOO_NEW; - } - - int cur_version = meta_table_.GetVersionNumber(); - - if (!db_.DoesColumnExist("favicons", "icon_type")) { - LOG(ERROR) << "Raze because of missing favicon.icon_type"; - RecordInvalidStructure(STRUCTURE_EVENT_VERSION4); - - db_.RazeAndClose(); - return sql::INIT_FAILURE; - } - - if (cur_version < 7 && !db_.DoesColumnExist("favicons", "sizes")) { - LOG(ERROR) << "Raze because of missing favicon.sizes"; - RecordInvalidStructure(STRUCTURE_EVENT_VERSION5); - - db_.RazeAndClose(); - return sql::INIT_FAILURE; - } - - if (cur_version == 5) { - ++cur_version; - if (!UpgradeToVersion6()) - return CantUpgradeToVersion(cur_version); - } - - if (cur_version == 6) { - ++cur_version; - if (!UpgradeToVersion7()) - return CantUpgradeToVersion(cur_version); - } - - LOG_IF(WARNING, cur_version < kCurrentVersionNumber) << - "Thumbnail database version " << cur_version << " is too old to handle."; - - // Initialization is complete. - if (!transaction.Commit()) - return sql::INIT_FAILURE; - - // Raze the database if the structure of the favicons database is not what - // it should be. This error cannot be detected via the SQL error code because - // the error code for running SQL statements against a database with missing - // columns is SQLITE_ERROR which is not unique enough to act upon. - // TODO(pkotwicz): Revisit this in M27 and see if the razing can be removed. - // (crbug.com/166453) - if (IsFaviconDBStructureIncorrect()) { - LOG(ERROR) << "Raze because of invalid favicon db structure."; - RecordInvalidStructure(STRUCTURE_EVENT_FAVICON); - - db_.RazeAndClose(); - return sql::INIT_FAILURE; - } - - return sql::INIT_OK; -} - -sql::InitStatus ThumbnailDatabase::CantUpgradeToVersion(int cur_version) { - LOG(WARNING) << "Unable to update to thumbnail database to version " << - cur_version << "."; - db_.Close(); - return sql::INIT_FAILURE; -} - -bool ThumbnailDatabase::UpgradeToVersion6() { - // Move bitmap data from favicons to favicon_bitmaps. - bool success = - db_.Execute("INSERT INTO favicon_bitmaps (icon_id, last_updated, " - "image_data, width, height)" - "SELECT id, last_updated, image_data, 0, 0 FROM favicons") && - db_.Execute("CREATE TABLE temp_favicons (" - "id INTEGER PRIMARY KEY," - "url LONGVARCHAR NOT NULL," - "icon_type INTEGER DEFAULT 1," - // default icon_type FAVICON to be consistent with - // past migration. - "sizes LONGVARCHAR)") && - db_.Execute("INSERT INTO temp_favicons (id, url, icon_type) " - "SELECT id, url, icon_type FROM favicons") && - db_.Execute("DROP TABLE favicons") && - db_.Execute("ALTER TABLE temp_favicons RENAME TO favicons"); - // NOTE(shess): v7 will re-create the index. - if (!success) - return false; - - meta_table_.SetVersionNumber(6); - meta_table_.SetCompatibleVersionNumber(std::min(6, kCompatibleVersionNumber)); - return true; -} - -bool ThumbnailDatabase::UpgradeToVersion7() { - // Sizes column was never used, remove it. - bool success = - db_.Execute("CREATE TABLE temp_favicons (" - "id INTEGER PRIMARY KEY," - "url LONGVARCHAR NOT NULL," - // default icon_type FAVICON to be consistent with - // past migration. - "icon_type INTEGER DEFAULT 1)") && - db_.Execute("INSERT INTO temp_favicons (id, url, icon_type) " - "SELECT id, url, icon_type FROM favicons") && - db_.Execute("DROP TABLE favicons") && - db_.Execute("ALTER TABLE temp_favicons RENAME TO favicons") && - db_.Execute("CREATE INDEX IF NOT EXISTS favicons_url ON favicons(url)"); - - if (!success) - return false; - - meta_table_.SetVersionNumber(7); - meta_table_.SetCompatibleVersionNumber(std::min(7, kCompatibleVersionNumber)); - return true; -} - -bool ThumbnailDatabase::IsFaviconDBStructureIncorrect() { - return !db_.IsSQLValid("SELECT id, url, icon_type FROM favicons"); -} - -} // namespace history diff --git a/chrome/browser/history/thumbnail_database.h b/chrome/browser/history/thumbnail_database.h deleted file mode 100644 index 14942f1..0000000 --- a/chrome/browser/history/thumbnail_database.h +++ /dev/null @@ -1,278 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_HISTORY_THUMBNAIL_DATABASE_H_ -#define CHROME_BROWSER_HISTORY_THUMBNAIL_DATABASE_H_ - -#include - -#include "base/gtest_prod_util.h" -#include "base/memory/ref_counted.h" -#include "components/history/core/browser/history_types.h" -#include "sql/connection.h" -#include "sql/init_status.h" -#include "sql/meta_table.h" -#include "sql/statement.h" - -namespace base { -class FilePath; -class RefCountedMemory; -class Time; -} - -namespace history { - -class HistoryClient; - -// This database interface is owned by the history backend and runs on the -// history thread. It is a totally separate component from history partially -// because we may want to move it to its own thread in the future. The -// operations we will do on this database will be slow, but we can tolerate -// higher latency (it's OK for thumbnails to come in slower than the rest -// of the data). Moving this to a separate thread would not block potentially -// higher priority history operations. -class ThumbnailDatabase { - public: - explicit ThumbnailDatabase(HistoryClient* history_client); - ~ThumbnailDatabase(); - - // Must be called after creation but before any other methods are called. - // When not INIT_OK, no other functions should be called. - sql::InitStatus Init(const base::FilePath& db_name); - - // Computes and records various metrics for the database. Should only be - // called once and only upon successful Init. - void ComputeDatabaseMetrics(); - - // Transactions on the database. - void BeginTransaction(); - void CommitTransaction(); - int transaction_nesting() const { - return db_.transaction_nesting(); - } - void RollbackTransaction(); - - // Vacuums the database. This will cause sqlite to defragment and collect - // unused space in the file. It can be VERY SLOW. - void Vacuum(); - - // Try to trim the cache memory used by the database. If |aggressively| is - // true try to trim all unused cache, otherwise trim by half. - void TrimMemory(bool aggressively); - - // Favicon Bitmaps ----------------------------------------------------------- - - // Returns true if there are favicon bitmaps for |icon_id|. If - // |bitmap_id_sizes| is non NULL, sets it to a list of the favicon bitmap ids - // and their associated pixel sizes for the favicon with |icon_id|. - // The list contains results for the bitmaps which are cached in the - // favicon_bitmaps table. The pixel sizes are a subset of the sizes in the - // 'sizes' field of the favicons table for |icon_id|. - bool GetFaviconBitmapIDSizes( - favicon_base::FaviconID icon_id, - std::vector* bitmap_id_sizes); - - // Returns true if there are any matched bitmaps for the given |icon_id|. All - // matched results are returned if |favicon_bitmaps| is not NULL. - bool GetFaviconBitmaps(favicon_base::FaviconID icon_id, - std::vector* favicon_bitmaps); - - // Gets the last updated time, bitmap data, and pixel size of the favicon - // bitmap at |bitmap_id|. Returns true if successful. - bool GetFaviconBitmap(FaviconBitmapID bitmap_id, - base::Time* last_updated, - scoped_refptr* png_icon_data, - gfx::Size* pixel_size); - - // Adds a bitmap component at |pixel_size| for the favicon with |icon_id|. - // Only favicons representing a .ico file should have multiple favicon bitmaps - // per favicon. - // |icon_data| is the png encoded data. - // The |time| indicates the access time, and is used to detect when the - // favicon should be refreshed. - // |pixel_size| is the pixel dimensions of |icon_data|. - // Returns the id of the added bitmap or 0 if unsuccessful. - FaviconBitmapID AddFaviconBitmap( - favicon_base::FaviconID icon_id, - const scoped_refptr& icon_data, - base::Time time, - const gfx::Size& pixel_size); - - // Sets the bitmap data and the last updated time for the favicon bitmap at - // |bitmap_id|. - // Returns true if successful. - bool SetFaviconBitmap(FaviconBitmapID bitmap_id, - scoped_refptr bitmap_data, - base::Time time); - - // Sets the last updated time for the favicon bitmap at |bitmap_id|. - // Returns true if successful. - bool SetFaviconBitmapLastUpdateTime(FaviconBitmapID bitmap_id, - base::Time time); - - // Deletes the favicon bitmap with |bitmap_id|. - // Returns true if successful. - bool DeleteFaviconBitmap(FaviconBitmapID bitmap_id); - - // Favicons ------------------------------------------------------------------ - - // Sets the the favicon as out of date. This will set |last_updated| for all - // of the bitmaps for |icon_id| to be out of date. - bool SetFaviconOutOfDate(favicon_base::FaviconID icon_id); - - // Returns the id of the entry in the favicon database with the specified url - // and icon type. If |required_icon_type| contains multiple icon types and - // there are more than one matched icon in database, only one icon will be - // returned in the priority of TOUCH_PRECOMPOSED_ICON, TOUCH_ICON, and - // FAVICON, and the icon type is returned in icon_type parameter if it is not - // NULL. - // Returns 0 if no entry exists for the specified url. - favicon_base::FaviconID GetFaviconIDForFaviconURL( - const GURL& icon_url, - int required_icon_type, - favicon_base::IconType* icon_type); - - // Gets the icon_url, icon_type and sizes for the specified |icon_id|. - bool GetFaviconHeader(favicon_base::FaviconID icon_id, - GURL* icon_url, - favicon_base::IconType* icon_type); - - // Adds favicon with |icon_url|, |icon_type| and |favicon_sizes| to the - // favicon db, returning its id. - favicon_base::FaviconID AddFavicon(const GURL& icon_url, - favicon_base::IconType icon_type); - - // Adds a favicon with a single bitmap. This call is equivalent to calling - // AddFavicon and AddFaviconBitmap. - favicon_base::FaviconID AddFavicon( - const GURL& icon_url, - favicon_base::IconType icon_type, - const scoped_refptr& icon_data, - base::Time time, - const gfx::Size& pixel_size); - - // Delete the favicon with the provided id. Returns false on failure - bool DeleteFavicon(favicon_base::FaviconID id); - - // Icon Mapping -------------------------------------------------------------- - // - // Returns true if there is a matched icon mapping for the given page and - // icon type. - // The matched icon mapping is returned in the icon_mapping parameter if it is - // not NULL. - - // Returns true if there are icon mappings for the given page and icon types. - // If |required_icon_types| contains multiple icon types and there is more - // than one matched icon type in the database, icons of only a single type - // will be returned in the priority of TOUCH_PRECOMPOSED_ICON, TOUCH_ICON, - // and FAVICON. - // The matched icon mappings are returned in the |mapping_data| parameter if - // it is not NULL. - bool GetIconMappingsForPageURL(const GURL& page_url, - int required_icon_types, - std::vector* mapping_data); - - // Returns true if there is any matched icon mapping for the given page. - // All matched icon mappings are returned in descent order of IconType if - // mapping_data is not NULL. - bool GetIconMappingsForPageURL(const GURL& page_url, - std::vector* mapping_data); - - // Adds a mapping between the given page_url and icon_id. - // Returns the new mapping id if the adding succeeds, otherwise 0 is returned. - IconMappingID AddIconMapping(const GURL& page_url, - favicon_base::FaviconID icon_id); - - // Updates the page and icon mapping for the given mapping_id with the given - // icon_id. - // Returns true if the update succeeded. - bool UpdateIconMapping(IconMappingID mapping_id, - favicon_base::FaviconID icon_id); - - // Deletes the icon mapping entries for the given page url. - // Returns true if the deletion succeeded. - bool DeleteIconMappings(const GURL& page_url); - - // Deletes the icon mapping with |mapping_id|. - // Returns true if the deletion succeeded. - bool DeleteIconMapping(IconMappingID mapping_id); - - // Checks whether a favicon is used by any URLs in the database. - bool HasMappingFor(favicon_base::FaviconID id); - - // Clones the existing mappings from |old_page_url| if |new_page_url| has no - // mappings. Otherwise, will leave mappings alone. - bool CloneIconMappings(const GURL& old_page_url, const GURL& new_page_url); - - // The class to enumerate icon mappings. Use InitIconMappingEnumerator to - // initialize. - class IconMappingEnumerator { - public: - IconMappingEnumerator(); - ~IconMappingEnumerator(); - - // Get the next icon mapping, return false if no more are available. - bool GetNextIconMapping(IconMapping* icon_mapping); - - private: - friend class ThumbnailDatabase; - - // Used to query database and return the data for filling IconMapping in - // each call of GetNextIconMapping(). - sql::Statement statement_; - - DISALLOW_COPY_AND_ASSIGN(IconMappingEnumerator); - }; - - // Return all icon mappings of the given |icon_type|. - bool InitIconMappingEnumerator(favicon_base::IconType type, - IconMappingEnumerator* enumerator); - - // Remove all data except that associated with the passed page urls. - // Returns false in case of failure. A nested transaction is used, - // so failure causes any outer transaction to be rolled back. - bool RetainDataForPageUrls(const std::vector& urls_to_keep); - - private: - FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, RetainDataForPageUrls); - FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, Version3); - FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, Version4); - FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, Version5); - FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, Version6); - FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, Version7); - FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, WildSchema); - - // Open database on a given filename. If the file does not exist, - // it is created. - // |db| is the database to open. - // |db_name| is a path to the database file. - sql::InitStatus OpenDatabase(sql::Connection* db, - const base::FilePath& db_name); - - // Helper function to implement internals of Init(). This allows - // Init() to retry in case of failure, since some failures run - // recovery code. - sql::InitStatus InitImpl(const base::FilePath& db_name); - - // Helper function to handle cleanup on upgrade failures. - sql::InitStatus CantUpgradeToVersion(int cur_version); - - // Adds support for size in favicons table. - bool UpgradeToVersion6(); - - // Removes sizes column. - bool UpgradeToVersion7(); - - // Returns true if the |favicons| database is missing a column. - bool IsFaviconDBStructureIncorrect(); - - sql::Connection db_; - sql::MetaTable meta_table_; - - HistoryClient* history_client_; -}; - -} // namespace history - -#endif // CHROME_BROWSER_HISTORY_THUMBNAIL_DATABASE_H_ diff --git a/chrome/browser/history/thumbnail_database_unittest.cc b/chrome/browser/history/thumbnail_database_unittest.cc index f5ec54d..0b2c6ab 100644 --- a/chrome/browser/history/thumbnail_database_unittest.cc +++ b/chrome/browser/history/thumbnail_database_unittest.cc @@ -11,8 +11,8 @@ #include "base/files/scoped_temp_dir.h" #include "base/memory/ref_counted_memory.h" #include "base/path_service.h" -#include "chrome/browser/history/thumbnail_database.h" #include "chrome/common/chrome_paths.h" +#include "components/history/core/browser/thumbnail_database.h" #include "sql/connection.h" #include "sql/recovery.h" #include "sql/test/scoped_error_ignorer.h" diff --git a/chrome/browser/history/top_sites.h b/chrome/browser/history/top_sites.h index 3f5260f..96a82b0 100644 --- a/chrome/browser/history/top_sites.h +++ b/chrome/browser/history/top_sites.h @@ -9,10 +9,12 @@ #include "base/callback.h" #include "base/gtest_prod_util.h" #include "base/memory/ref_counted.h" -#include "chrome/browser/history/history_service.h" +#include "base/observer_list.h" +#include "base/task/cancelable_task_tracker.h" #include "components/history/core/browser/history_types.h" #include "components/history/core/browser/top_sites_observer.h" #include "components/history/core/common/thumbnail_score.h" +#include "content/public/browser/notification_observer.h" #include "third_party/skia/include/core/SkColor.h" #include "ui/gfx/image/image.h" diff --git a/chrome/browser/history/top_sites_cache.cc b/chrome/browser/history/top_sites_cache.cc deleted file mode 100644 index 9b0f5e45..0000000 --- a/chrome/browser/history/top_sites_cache.cc +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/top_sites_cache.h" - -#include "base/logging.h" -#include "base/memory/ref_counted_memory.h" - -namespace history { - -TopSitesCache::CanonicalURLQuery::CanonicalURLQuery(const GURL& url) { - most_visited_url_.redirects.push_back(url); - entry_.first = &most_visited_url_; - entry_.second = 0u; -} - -TopSitesCache::CanonicalURLQuery::~CanonicalURLQuery() { -} - -TopSitesCache::TopSitesCache() { - clear_query_ref_.ClearQuery(); - clear_query_ref_.ClearRef(); - clear_path_query_ref_.ClearQuery(); - clear_path_query_ref_.ClearRef(); - clear_path_query_ref_.ClearPath(); -} - -TopSitesCache::~TopSitesCache() { -} - -void TopSitesCache::SetTopSites(const MostVisitedURLList& top_sites) { - top_sites_ = top_sites; - CountForcedURLs(); - GenerateCanonicalURLs(); -} - -void TopSitesCache::SetThumbnails(const URLToImagesMap& images) { - images_ = images; -} - -Images* TopSitesCache::GetImage(const GURL& url) { - return &images_[GetCanonicalURL(url)]; -} - -bool TopSitesCache::GetPageThumbnail( - const GURL& url, - scoped_refptr* bytes) const { - std::map::const_iterator found = - images_.find(GetCanonicalURL(url)); - if (found != images_.end()) { - base::RefCountedMemory* data = found->second.thumbnail.get(); - if (data) { - *bytes = data; - return true; - } - } - return false; -} - -bool TopSitesCache::GetPageThumbnailScore(const GURL& url, - ThumbnailScore* score) const { - std::map::const_iterator found = - images_.find(GetCanonicalURL(url)); - if (found != images_.end()) { - *score = found->second.thumbnail_score; - return true; - } - return false; -} - -const GURL& TopSitesCache::GetCanonicalURL(const GURL& url) const { - CanonicalURLs::const_iterator it = GetCanonicalURLsIterator(url); - return it == canonical_urls_.end() ? url : it->first.first->url; -} - -GURL TopSitesCache::GetGeneralizedCanonicalURL(const GURL& url) const { - CanonicalURLs::const_iterator it_hi = - canonical_urls_.lower_bound(CanonicalURLQuery(url).entry()); - if (it_hi != canonical_urls_.end()) { - // Test match ignoring "?query#ref". This also handles exact match. - if (url.ReplaceComponents(clear_query_ref_) == - GetURLFromIterator(it_hi).ReplaceComponents(clear_query_ref_)) { - return it_hi->first.first->url; - } - } - // Everything on or after |it_hi| is irrelevant. - - GURL base_url(url.ReplaceComponents(clear_path_query_ref_)); - CanonicalURLs::const_iterator it_lo = - canonical_urls_.lower_bound(CanonicalURLQuery(base_url).entry()); - if (it_lo == canonical_urls_.end()) - return GURL::EmptyGURL(); - GURL compare_url_lo(GetURLFromIterator(it_lo)); - if (!HaveSameSchemeHostAndPort(base_url, compare_url_lo) || - !IsPathPrefix(base_url.path(), compare_url_lo.path())) { - return GURL::EmptyGURL(); - } - // Everything before |it_lo| is irrelevant. - - // Search in [|it_lo|, |it_hi|) in reversed order. The first URL found that's - // a prefix of |url| (ignoring "?query#ref") would be returned. - for (CanonicalURLs::const_iterator it = it_hi; it != it_lo;) { - --it; - GURL compare_url(GetURLFromIterator(it)); - DCHECK(HaveSameSchemeHostAndPort(compare_url, url)); - if (IsPathPrefix(compare_url.path(), url.path())) - return it->first.first->url; - } - - return GURL::EmptyGURL(); -} - -bool TopSitesCache::IsKnownURL(const GURL& url) const { - return GetCanonicalURLsIterator(url) != canonical_urls_.end(); -} - -size_t TopSitesCache::GetURLIndex(const GURL& url) const { - DCHECK(IsKnownURL(url)); - return GetCanonicalURLsIterator(url)->second; -} - -size_t TopSitesCache::GetNumNonForcedURLs() const { - return top_sites_.size() - num_forced_urls_; -} - -size_t TopSitesCache::GetNumForcedURLs() const { - return num_forced_urls_; -} - -void TopSitesCache::CountForcedURLs() { - num_forced_urls_ = 0; - while (num_forced_urls_ < top_sites_.size()) { - // Forced sites are all at the beginning. - if (top_sites_[num_forced_urls_].last_forced_time.is_null()) - break; - num_forced_urls_++; - } -#if DCHECK_IS_ON - // In debug, ensure the cache user has no forced URLs pass that point. - for (size_t i = num_forced_urls_; i < top_sites_.size(); ++i) { - DCHECK(top_sites_[i].last_forced_time.is_null()) - << "All the forced URLs must appear before non-forced URLs."; - } -#endif -} - -void TopSitesCache::GenerateCanonicalURLs() { - canonical_urls_.clear(); - for (size_t i = 0; i < top_sites_.size(); i++) - StoreRedirectChain(top_sites_[i].redirects, i); -} - -void TopSitesCache::StoreRedirectChain(const RedirectList& redirects, - size_t destination) { - // |redirects| is empty if the user pinned a site and there are not enough top - // sites before the pinned site. - - // Map all the redirected URLs to the destination. - for (size_t i = 0; i < redirects.size(); i++) { - // If this redirect is already known, don't replace it with a new one. - if (!IsKnownURL(redirects[i])) { - CanonicalURLEntry entry; - entry.first = &(top_sites_[destination]); - entry.second = i; - canonical_urls_[entry] = destination; - } - } -} - -TopSitesCache::CanonicalURLs::const_iterator - TopSitesCache::GetCanonicalURLsIterator(const GURL& url) const { - return canonical_urls_.find(CanonicalURLQuery(url).entry()); -} - -const GURL& TopSitesCache::GetURLFromIterator( - CanonicalURLs::const_iterator it) const { - DCHECK(it != canonical_urls_.end()); - return it->first.first->redirects[it->first.second]; -} - -} // namespace history diff --git a/chrome/browser/history/top_sites_cache.h b/chrome/browser/history/top_sites_cache.h deleted file mode 100644 index de3ee6d..0000000 --- a/chrome/browser/history/top_sites_cache.h +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_ -#define CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_ - -#include -#include - -#include "base/memory/ref_counted.h" -#include "chrome/browser/history/url_utils.h" -#include "components/history/core/browser/history_types.h" -#include "url/gurl.h" - -class GURL; - -namespace history { - -// TopSiteCache caches thumbnails for visited pages. Retrieving thumbnails from -// a given input URL is a two-stage process: -// -// input URL --(map 1)--> canonical URL --(map 2)--> image. -// -// (map 1) searches for an URL in |canonical_urls_| that "matches" (see below) -// input URL. If found, canonical URL assigned to the result. Otherwise the -// input URL is considered to already be a canonical URL. -// -// (map 2) simply looks up canonical URL in |images_|. -// -// The rule to "match" URL in |canonical_urls_| always favors exact match. -// - In GetCanonicalURL(), exact match is the only case examined. -// - In GetGeneralizedCanonicalURL(), we also perform "generalized" URL matches, -// i.e., stored URLs in |canonical_urls_| that are prefixes of input URL, -// ignoring "?query#ref". -// For the latter two "URL prefix matches", we prefer the match that is closest -// to input URL, w.r.t. path hierarchy. - -// TopSitesCache caches the top sites and thumbnails for TopSites. -class TopSitesCache { - public: - TopSitesCache(); - ~TopSitesCache(); - - // Set the top sites. In |top_sites| all forced URLs must appear before - // non-forced URLs. This is only checked in debug. - void SetTopSites(const MostVisitedURLList& top_sites); - const MostVisitedURLList& top_sites() const { return top_sites_; } - - // The thumbnails. - void SetThumbnails(const URLToImagesMap& images); - const URLToImagesMap& images() const { return images_; } - - // Returns the thumbnail as an Image for the specified url. This adds an entry - // for |url| if one has not yet been added. - Images* GetImage(const GURL& url); - - // Fetches the thumbnail for the specified url. Returns true if there is a - // thumbnail for the specified url. It is possible for a URL to be in TopSites - // but not have an thumbnail. - bool GetPageThumbnail(const GURL& url, - scoped_refptr* bytes) const; - - // Fetches the thumbnail score for the specified url. Returns true if - // there is a thumbnail score for the specified url. - bool GetPageThumbnailScore(const GURL& url, ThumbnailScore* score) const; - - // Returns the canonical URL for |url|. - const GURL& GetCanonicalURL(const GURL& url) const; - - // Searches for a URL in |canonical_urls_| that is a URL prefix of |url|. - // Prefers an exact match if it exists, or the least generalized match while - // ignoring "?query#ref". Returns the resulting canonical URL if match is - // found, otherwise returns an empty GURL. - GURL GetGeneralizedCanonicalURL(const GURL& url) const; - - // Returns true if |url| is known. - bool IsKnownURL(const GURL& url) const; - - // Returns the index into |top_sites_| for |url|. - size_t GetURLIndex(const GURL& url) const; - - // Returns the number of non-forced URLs in the cache. - size_t GetNumNonForcedURLs() const; - - // Returns the number of forced URLs in the cache. - size_t GetNumForcedURLs() const; - - private: - // The entries in CanonicalURLs, see CanonicalURLs for details. The second - // argument gives the index of the URL into MostVisitedURLs redirects. - typedef std::pair CanonicalURLEntry; - - // Comparator used for CanonicalURLs. - class CanonicalURLComparator { - public: - bool operator()(const CanonicalURLEntry& e1, - const CanonicalURLEntry& e2) const { - return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(), - e2.first->redirects[e2.second].spec()); - } - }; - - // Creates the object needed to form std::map queries into |canonical_urls_|, - // wrapping all required temporary data to allow inlining. - class CanonicalURLQuery { - public: - explicit CanonicalURLQuery(const GURL& url); - ~CanonicalURLQuery(); - const CanonicalURLEntry& entry() { return entry_; } - - private: - MostVisitedURL most_visited_url_; - CanonicalURLEntry entry_; - }; - - // This is used to map from redirect url to the MostVisitedURL the redirect is - // from. Ideally this would be map (second param indexing into - // top_sites_), but this results in duplicating all redirect urls. As some - // sites have a lot of redirects, we instead use the MostVisitedURL* and the - // index of the redirect as the key, and the index into top_sites_ as the - // value. This way we aren't duplicating GURLs. CanonicalURLComparator - // enforces the ordering as if we were using GURLs. - typedef std::map CanonicalURLs; - - // Count the number of forced URLs. - void CountForcedURLs(); - - // Generates the set of canonical urls from |top_sites_|. - void GenerateCanonicalURLs(); - - // Stores a set of redirects. This is used by GenerateCanonicalURLs. - void StoreRedirectChain(const RedirectList& redirects, size_t destination); - - // Returns the iterator into |canonical_urls_| for the |url|. - CanonicalURLs::const_iterator GetCanonicalURLsIterator(const GURL& url) const; - - // Returns the GURL corresponding to an iterator in |canonical_urls_|. - const GURL& GetURLFromIterator(CanonicalURLs::const_iterator it) const; - - // The number of top sites with forced URLs. - size_t num_forced_urls_; - - // The top sites. This list must always contain the forced URLs first followed - // by the non-forced URLs. This is not strictly enforced but is checked in - // debug. - MostVisitedURLList top_sites_; - - // The images. These map from canonical url to image. - URLToImagesMap images_; - - // Generated from the redirects to and from the most visited pages. See - // description above typedef for details. - CanonicalURLs canonical_urls_; - - // Helper to clear "?query#ref" from any GURL. This is set in the constructor - // and never modified after. - GURL::Replacements clear_query_ref_; - - // Helper to clear "/path?query#ref" from any GURL. This is set in the - // constructor and never modified after. - GURL::Replacements clear_path_query_ref_; - - DISALLOW_COPY_AND_ASSIGN(TopSitesCache); -}; - -} // namespace history - -#endif // CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_ diff --git a/chrome/browser/history/top_sites_cache_unittest.cc b/chrome/browser/history/top_sites_cache_unittest.cc deleted file mode 100644 index cc3eb79..0000000 --- a/chrome/browser/history/top_sites_cache_unittest.cc +++ /dev/null @@ -1,258 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/top_sites_cache.h" - -#include - -#include "base/basictypes.h" -#include "base/logging.h" -#include "base/strings/string16.h" -#include "base/strings/string_number_conversions.h" -#include "base/strings/utf_string_conversions.h" -#include "testing/gtest/include/gtest/gtest.h" - -namespace history { - -namespace { - -class TopSitesCacheTest : public testing::Test { - public: - TopSitesCacheTest() { - } - - protected: - // Initializes |top_sites_| on |spec|, which is a list of URL strings with - // optional indents: indentated URLs redirect to the last non-indented URL. - // Titles are assigned as "Title 1", "Title 2", etc., in the order of - // appearance. See |kTopSitesSpecBasic| for an example. This function does not - // update |cache_| so you can manipulate |top_sites_| before you update it. - void BuildTopSites(const char** spec, size_t size); - - // Initializes |top_sites_| and |cache_| based on |spec|. - void InitTopSiteCache(const char** spec, size_t size); - - MostVisitedURLList top_sites_; - TopSitesCache cache_; - - private: - DISALLOW_COPY_AND_ASSIGN(TopSitesCacheTest); -}; - -void TopSitesCacheTest::BuildTopSites(const char** spec, size_t size) { - std::set urls_seen; - for (size_t i = 0; i < size; ++i) { - const char* spec_item = spec[i]; - while (*spec_item && *spec_item == ' ') // Eat indent. - ++spec_item; - if (urls_seen.find(spec_item) != urls_seen.end()) - NOTREACHED() << "Duplicate URL found: " << spec_item; - urls_seen.insert(spec_item); - if (spec_item == spec[i]) { // No indent: add new MostVisitedURL. - base::string16 title(base::ASCIIToUTF16("Title ") + - base::Uint64ToString16(top_sites_.size() + 1)); - top_sites_.push_back(MostVisitedURL(GURL(spec_item), title)); - } - ASSERT_TRUE(!top_sites_.empty()); - // Set up redirect to canonical URL. Canonical URL redirects to itself, too. - top_sites_.back().redirects.push_back(GURL(spec_item)); - } -} - -void TopSitesCacheTest::InitTopSiteCache(const char** spec, size_t size) { - BuildTopSites(spec, size); - cache_.SetTopSites(top_sites_); -} - -const char* kTopSitesSpecBasic[] = { - "http://www.google.com", - " http://www.gogle.com", // Redirects. - " http://www.gooogle.com", // Redirects. - "http://www.youtube.com/a/b", - " http://www.youtube.com/a/b?test=1", // Redirects. - "https://www.google.com/", - " https://www.gogle.com", // Redirects. - "http://www.example.com:3141/", -}; - -TEST_F(TopSitesCacheTest, GetCanonicalURL) { - InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic)); - struct { - const char* expected; - const char* query; - } test_cases[] = { - // Already is canonical: redirects. - {"http://www.google.com/", "http://www.google.com"}, - // Exact match with stored URL: redirects. - {"http://www.google.com/", "http://www.gooogle.com"}, - // Recognizes despite trailing "/": redirects - {"http://www.google.com/", "http://www.gooogle.com/"}, - // Exact match with URL with query: redirects. - {"http://www.youtube.com/a/b", "http://www.youtube.com/a/b?test=1"}, - // No match with URL with query: as-is. - {"http://www.youtube.com/a/b?test", "http://www.youtube.com/a/b?test"}, - // Never-seen-before URL: as-is. - {"http://maps.google.com/", "http://maps.google.com/"}, - // Changing port number, does not match: as-is. - {"http://www.example.com:1234/", "http://www.example.com:1234"}, - // Smart enough to know that port 80 is HTTP: redirects. - {"http://www.google.com/", "http://www.gooogle.com:80"}, - // Prefix should not work: as-is. - {"http://www.youtube.com/a", "http://www.youtube.com/a"}, - }; - for (size_t i = 0; i < arraysize(test_cases); ++i) { - std::string expected(test_cases[i].expected); - std::string query(test_cases[i].query); - EXPECT_EQ(expected, cache_.GetCanonicalURL(GURL(query)).spec()) - << " for test_case[" << i << "]"; - } -} - -TEST_F(TopSitesCacheTest, IsKnownUrl) { - InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic)); - // Matches. - EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com"))); - EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.gooogle.com"))); - EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com/"))); - - // Non-matches. - EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com?"))); - EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.net"))); - EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com/stuff"))); - EXPECT_FALSE(cache_.IsKnownURL(GURL("https://www.gooogle.com"))); - EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.youtube.com/a"))); -} - -const char* kTopSitesSpecPrefix[] = { - "http://www.google.com/", - " http://www.google.com/test?q=3", // Redirects. - " http://www.google.com/test/y?d", // Redirects. - " http://www.chromium.org/a/b", // Redirects. - "http://www.google.com/2", - " http://www.google.com/test/q", // Redirects. - " http://www.google.com/test/y?b", // Redirects. - "http://www.google.com/3", - " http://www.google.com/testing", // Redirects. - "http://www.google.com/test-hyphen", - "http://www.google.com/sh", - " http://www.google.com/sh/1/2/3", // Redirects. - "http://www.google.com/sh/1", -}; - -TEST_F(TopSitesCacheTest, GetCanonicalURLExactMatch) { - InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix)); - for (size_t i = 0; i < arraysize(kTopSitesSpecPrefix); ++i) { - // Go through each entry in kTopSitesSpecPrefix, trimming space. - const char* s = kTopSitesSpecPrefix[i]; - while (*s && *s == ' ') - ++s; - // Get the answer from direct lookup. - GURL stored_url(s); - GURL expected(cache_.GetCanonicalURL(stored_url)); - // Test generalization. - GURL result(cache_.GetGeneralizedCanonicalURL(stored_url)); - EXPECT_EQ(expected, result) << " for kTopSitesSpecPrefix[" << i << "]"; - } -} - -TEST_F(TopSitesCacheTest, GetGeneralizedCanonicalURL) { - InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix)); - struct { - const char* expected; - const char* query; - } test_cases[] = { - // Exact match after trimming "?query": redirects. - {"http://www.google.com/", "http://www.google.com/test"}, - // Same, but different code path: redirects. - {"http://www.google.com/", "http://www.google.com/test/y?e"}, - {"http://www.google.com/", "http://www.google.com/test/y?c"}, - // Same, but code path leads to different result: redirects. - {"http://www.google.com/2", "http://www.google.com/test/y?a"}, - // Generalized match: redirects. - {"http://www.google.com/3", "http://www.google.com/3/1/4/1/5/9"}, - // Generalized match with trailing "/": redirects. - {"http://www.google.com/3", "http://www.google.com/3/1/4/1/5/9/"}, - // Unique generalization match: redirects. - {"http://www.google.com/", "http://www.chromium.org/a/b/c"}, - // Multiple exact matches after trimming: redirects to first. - {"http://www.google.com/2", "http://www.google.com/test/y"}, - // Multiple generalized matches: redirects to least general. - {"http://www.google.com/sh", "http://www.google.com/sh/1/2/3/4/"}, - // Multiple generalized matches: redirects to least general. - {"http://www.google.com/sh", "http://www.google.com/sh/1/2/3/4/"}, - // Competing generalized match: take the most specilized. - {"http://www.google.com/2", "http://www.google.com/test/q"}, - // No generalized match, early element: fails. - {"", "http://www.a.com/"}, - // No generalized match, intermediate element: fails. - {"", "http://www.e-is-between-chromium-and-google.com/"}, - // No generalized match, late element: fails. - {"", "http://www.zzzzzzz.com/"}, - // String prefix match but not URL-prefix match: fails. - {"", "http://www.chromium.org/a/beeswax"}, - // String prefix match and URL-prefix match: redirects. - {"http://www.google.com/", "http://www.google.com/shhhhhh"}, - // Different protocol: fails. - {"", "https://www.google.com/test"}, - // Smart enough to know that port 80 is HTTP: redirects. - {"http://www.google.com/", "http://www.google.com:80/test"}, - // Specialized match only: fails. - {"", "http://www.chromium.org/a"}, - }; - for (size_t i = 0; i < arraysize(test_cases); ++i) { - std::string expected(test_cases[i].expected); - std::string query(test_cases[i].query); - GURL result(cache_.GetGeneralizedCanonicalURL(GURL(query))); - EXPECT_EQ(expected, result.spec()) << " for test_case[" << i << "]"; - } -} - -// This tests a special case where there are 2 generalized matches, and both -// should be checked to find the correct match. -TEST_F(TopSitesCacheTest, GetPrefixCanonicalURLDiffByQuery) { - const char* top_sites_spec[] = { - "http://www.dest.com/1", - " http://www.source.com/a?m=5", // Redirects. - "http://www.dest.com/2", - " http://www.source.com/a/t?q=3", // Redirects. - }; - InitTopSiteCache(top_sites_spec, arraysize(top_sites_spec)); - - struct { - const char* expected; - const char* query; - } test_cases[] = { - // Slightly before "http://www.source.com/a?m=5". - {"http://www.dest.com/1", "http://www.source.com/a?l=5"}, - // Slightly after "http://www.source.com/a?m=5". - {"http://www.dest.com/1", "http://www.source.com/a?n=5"}, - // Slightly before "http://www.source.com/a/t?q=3". - {"http://www.dest.com/2", "http://www.source.com/a/t?q=2"}, - // Slightly after "http://www.source.com/a/t?q=3". - {"http://www.dest.com/2", "http://www.source.com/a/t?q=4"}, - }; - - for (size_t i = 0; i < arraysize(test_cases); ++i) { - std::string expected(test_cases[i].expected); - std::string query(test_cases[i].query); - GURL result(cache_.GetGeneralizedCanonicalURL(GURL(query))); - EXPECT_EQ(expected, result.spec()) << " for test_case[" << i << "]"; - } -} - -// This test ensures forced URLs behave in the expected way. -TEST_F(TopSitesCacheTest, CacheForcedURLs) { - // Forced URLs must always appear at the beginning of the list. - BuildTopSites(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic)); - top_sites_[0].last_forced_time = base::Time::FromJsTime(1000); - top_sites_[1].last_forced_time = base::Time::FromJsTime(2000); - cache_.SetTopSites(top_sites_); - - EXPECT_EQ(2u, cache_.GetNumForcedURLs()); - EXPECT_EQ(2u, cache_.GetNumNonForcedURLs()); -} - -} // namespace - -} // namespace history diff --git a/chrome/browser/history/top_sites_impl.cc b/chrome/browser/history/top_sites_impl.cc index d6d2f16..853e8dac 100644 --- a/chrome/browser/history/top_sites_impl.cc +++ b/chrome/browser/history/top_sites_impl.cc @@ -24,12 +24,12 @@ #include "chrome/browser/history/history_backend.h" #include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/history_service_factory.h" -#include "chrome/browser/history/top_sites_cache.h" -#include "chrome/browser/history/url_utils.h" #include "chrome/browser/profiles/profile.h" #include "chrome/common/pref_names.h" #include "components/history/core/browser/history_db_task.h" #include "components/history/core/browser/page_usage_data.h" +#include "components/history/core/browser/top_sites_cache.h" +#include "components/history/core/browser/url_utils.h" #include "components/history/core/common/thumbnail_score.h" #include "content/public/browser/browser_thread.h" #include "content/public/browser/navigation_controller.h" diff --git a/chrome/browser/history/top_sites_impl_unittest.cc b/chrome/browser/history/top_sites_impl_unittest.cc index de3b63b..381c8fb 100644 --- a/chrome/browser/history/top_sites_impl_unittest.cc +++ b/chrome/browser/history/top_sites_impl_unittest.cc @@ -12,13 +12,13 @@ #include "chrome/browser/history/history_service_factory.h" #include "chrome/browser/history/history_unittest_base.h" #include "chrome/browser/history/top_sites.h" -#include "chrome/browser/history/top_sites_cache.h" #include "chrome/browser/history/top_sites_impl.h" #include "chrome/common/chrome_constants.h" #include "chrome/common/chrome_paths.h" #include "chrome/test/base/testing_profile.h" #include "components/history/core/browser/history_db_task.h" #include "components/history/core/browser/history_types.h" +#include "components/history/core/browser/top_sites_cache.h" #include "content/public/browser/notification_service.h" #include "content/public/test/test_browser_thread.h" #include "testing/gtest/include/gtest/gtest.h" diff --git a/chrome/browser/history/url_index_private_data.h b/chrome/browser/history/url_index_private_data.h index 6fe6f18..a2b4a6b 100644 --- a/chrome/browser/history/url_index_private_data.h +++ b/chrome/browser/history/url_index_private_data.h @@ -12,9 +12,9 @@ #include "base/gtest_prod_util.h" #include "base/memory/ref_counted.h" #include "chrome/browser/history/history_service.h" -#include "chrome/browser/history/in_memory_url_index_cache.pb.h" -#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/browser/history/scored_history_match.h" +#include "components/history/core/browser/in_memory_url_index_cache.pb.h" +#include "components/history/core/browser/in_memory_url_index_types.h" class HistoryQuickProviderTest; diff --git a/chrome/browser/history/url_utils.cc b/chrome/browser/history/url_utils.cc deleted file mode 100644 index 0c7369a2..0000000 --- a/chrome/browser/history/url_utils.cc +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/url_utils.h" - -#include - -#include "url/gurl.h" - -namespace history { - -namespace { - -// Comparator to enforce '\0' < '?' < '#' < '/' < other characters. -int GetURLCharPriority(char ch) { - switch (ch) { - case '\0': return 0; - case '?': return 1; - case '#': return 2; - case '/': return 3; - } - return 4; -} - -} // namespace - -// Instead of splitting URLs and extract path components, we can implement -// CanonicalURLStringCompare() using string operations only. The key idea is, -// treating '/' to be less than any valid path characters would make it behave -// as a separator, so e.g., "test" < "test-case" would be enforced by -// "test/..." < "test-case/...". We also force "?" < "/", so "test?query" < -// "test/stuff". Since the routine is merely lexicographical string comparison -// with remapping of chracter ordering, so it is a valid strict-weak ordering. -bool CanonicalURLStringCompare(const std::string& s1, const std::string& s2) { - const std::string::value_type* ch1 = s1.c_str(); - const std::string::value_type* ch2 = s2.c_str(); - while (*ch1 && *ch2 && *ch1 == *ch2) { - ++ch1; - ++ch2; - } - int pri_diff = GetURLCharPriority(*ch1) - GetURLCharPriority(*ch2); - // We want false to be returned if |pri_diff| > 0. - return (pri_diff != 0) ? pri_diff < 0 : *ch1 < *ch2; -} - -bool HaveSameSchemeHostAndPort(const GURL&url1, const GURL& url2) { - return url1.scheme() == url2.scheme() && url1.host() == url2.host() && - url1.port() == url2.port(); -} - -bool IsPathPrefix(const std::string& p1, const std::string& p2) { - if (p1.length() > p2.length()) - return false; - std::pair - first_diff = std::mismatch(p1.begin(), p1.end(), p2.begin()); - // Necessary condition: |p1| is a string prefix of |p2|. - if (first_diff.first != p1.end()) - return false; // E.g.: (|p1| = "/test", |p2| = "/exam") => false. - - // |p1| is string prefix. - if (first_diff.second == p2.end()) // Is exact match? - return true; // E.g.: ("/test", "/test") => true. - // |p1| is strict string prefix, check full match of last path component. - if (!p1.empty() && *p1.rbegin() == '/') // Ends in '/'? - return true; // E.g.: ("/test/", "/test/stuff") => true. - - // Finally, |p1| does not end in "/": check first extra character in |p2|. - // E.g.: ("/test", "/test/stuff") => true; ("/test", "/testing") => false. - return *(first_diff.second) == '/'; -} - -GURL ToggleHTTPAndHTTPS(const GURL& url) { - std::string new_scheme; - if (url.SchemeIs("http")) - new_scheme = "https"; - else if (url.SchemeIs("https")) - new_scheme = "http"; - else - return GURL::EmptyGURL(); - url::Component comp; - comp.len = new_scheme.length(); - GURL::Replacements replacement; - replacement.SetScheme(new_scheme.c_str(), comp); - return url.ReplaceComponents(replacement); -} - -} // namespace history diff --git a/chrome/browser/history/url_utils.h b/chrome/browser/history/url_utils.h deleted file mode 100644 index 9e1ed2c..0000000 --- a/chrome/browser/history/url_utils.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_HISTORY_URL_UTILS_H_ -#define CHROME_BROWSER_HISTORY_URL_UTILS_H_ - -#include - -class GURL; - -namespace history { - -// CanonicalURLStringCompare performs lexicographical comparison of two strings -// that represent valid URLs, so that if the pre-path (scheme, host, and port) -// parts are equal, then the path parts are compared by treating path components -// (delimited by "/") as separate tokens that form units of comparison. -// For example, let us compare |s1| and |s2|, with -// |s1| = "http://www.google.com:80/base/test/ab/cd?query/stuff" -// |s2| = "http://www.google.com:80/base/test-case/yz#ref/stuff" -// The pre-path parts "http://www.google.com:80/" match. We treat the paths as -// |s1| => ["base", "test", "ab", "cd"] -// |s2| => ["base", "test-case", "yz"] -// Components 1 "base" are identical. Components 2 yield "test" < "test-case", -// so we consider |s1| < |s2|, and return true. Note that naive string -// comparison would yield the opposite (|s1| > |s2|), since '/' > '-' in ASCII. -// Note that path can be terminated by "?query" or "#ref". The post-path parts -// are compared in an arbitrary (but consistent) way. -bool CanonicalURLStringCompare(const std::string& s1, const std::string& s2); - -// Returns whether |url1| and |url2| have the same scheme, host, and port. -bool HaveSameSchemeHostAndPort(const GURL&url1, const GURL& url2); - -// Treats |path1| and |path2| as lists of path components (e.g., ["a", "bb"] -// for "/a/bb"). Returns whether |path1|'s list is a prefix of |path2|'s list. -// This is used to define "URL prefix". Note that "test" does not count as a -// prefix of "testing", even though "test" is a (string) prefix of "testing". -bool IsPathPrefix(const std::string& p1, const std::string& p2); - -// Converts |url| from HTTP to HTTPS, and vice versa, then returns the result. -// If |url| is neither HTTP nor HTTPS, returns an empty URL. -GURL ToggleHTTPAndHTTPS(const GURL& url); - -} // namespace history - -#endif // CHROME_BROWSER_HISTORY_URL_UTILS_H_ diff --git a/chrome/browser/history/url_utils_unittest.cc b/chrome/browser/history/url_utils_unittest.cc deleted file mode 100644 index 970c5c0..0000000 --- a/chrome/browser/history/url_utils_unittest.cc +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/url_utils.h" - -#include "testing/gtest/include/gtest/gtest.h" -#include "url/gurl.h" - -namespace history { - -namespace { - -TEST(HistoryUrlUtilsTest, CanonicalURLStringCompare) { - // Comprehensive test by comparing each pair in sorted list. O(n^2). - const char* sorted_list[] = { - "http://www.gogle.com/redirects_to_google", - "http://www.google.com", - "http://www.google.com/", - "http://www.google.com/?q", - "http://www.google.com/A", - "http://www.google.com/index.html", - "http://www.google.com/test", - "http://www.google.com/test?query", - "http://www.google.com/test?r=3", - "http://www.google.com/test#hash", - "http://www.google.com/test/?query", - "http://www.google.com/test/#hash", - "http://www.google.com/test/zzzzz", - "http://www.google.com/test$dollar", - "http://www.google.com/test%E9%9B%80", - "http://www.google.com/test-case", - "http://www.google.com:80/", - "https://www.google.com", - }; - for (size_t i = 0; i < arraysize(sorted_list); ++i) { - EXPECT_FALSE(CanonicalURLStringCompare(sorted_list[i], sorted_list[i])) - << " for \"" << sorted_list[i] << "\" < \"" << sorted_list[i] << "\""; - // Every disjoint pair-wise comparison. - for (size_t j = i + 1; j < arraysize(sorted_list); ++j) { - EXPECT_TRUE(CanonicalURLStringCompare(sorted_list[i], sorted_list[j])) - << " for \"" << sorted_list[i] << "\" < \"" << sorted_list[j] << "\""; - EXPECT_FALSE(CanonicalURLStringCompare(sorted_list[j], sorted_list[i])) - << " for \"" << sorted_list[j] << "\" < \"" << sorted_list[i] << "\""; - } - } -} - -TEST(HistoryUrlUtilsTest, HaveSameSchemeHostAndPort) { - struct { - const char* s1; - const char* s2; - } true_cases[] = { - {"http://www.google.com", "http://www.google.com"}, - {"http://www.google.com/a/b", "http://www.google.com/a/b"}, - {"http://www.google.com?test=3", "http://www.google.com/"}, - {"http://www.google.com/#hash", "http://www.google.com/?q"}, - {"http://www.google.com/", "http://www.google.com/test/with/dir/"}, - {"http://www.google.com:360", "http://www.google.com:360/?q=1234"}, - {"http://www.google.com:80", "http://www.google.com/gurl/is/smart"}, - {"http://www.google.com/test", "http://www.google.com/test/with/dir/"}, - {"http://www.google.com/test?", "http://www.google.com/test/with/dir/"}, - }; - for (size_t i = 0; i < arraysize(true_cases); ++i) { - EXPECT_TRUE(HaveSameSchemeHostAndPort(GURL(true_cases[i].s1), - GURL(true_cases[i].s2))) - << " for true_cases[" << i << "]"; - } - struct { - const char* s1; - const char* s2; - } false_cases[] = { - {"http://www.google.co", "http://www.google.com"}, - {"http://google.com", "http://www.google.com"}, - {"http://www.google.com", "https://www.google.com"}, - {"http://www.google.com/path", "http://www.google.com:137/path"}, - {"http://www.google.com/same/dir", "http://www.youtube.com/same/dir"}, - }; - for (size_t i = 0; i < arraysize(false_cases); ++i) { - EXPECT_FALSE(HaveSameSchemeHostAndPort(GURL(false_cases[i].s1), - GURL(false_cases[i].s2))) - << " for false_cases[" << i << "]"; - } -} - -TEST(HistoryUrlUtilsTest, IsPathPrefix) { - struct { - const char* p1; - const char* p2; - } true_cases[] = { - {"", ""}, - {"", "/"}, - {"/", "/"}, - {"/a/b", "/a/b"}, - {"/", "/test/with/dir/"}, - {"/test", "/test/with/dir/"}, - {"/test/", "/test/with/dir"}, - }; - for (size_t i = 0; i < arraysize(true_cases); ++i) { - EXPECT_TRUE(IsPathPrefix(true_cases[i].p1, true_cases[i].p2)) - << " for true_cases[" << i << "]"; - } - struct { - const char* p1; - const char* p2; - } false_cases[] = { - {"/test", ""}, - {"/", ""}, // Arguable. - {"/a/b/", "/a/b"}, // Arguable. - {"/te", "/test"}, - {"/test", "/test-bed"}, - {"/test-", "/test"}, - }; - for (size_t i = 0; i < arraysize(false_cases); ++i) { - EXPECT_FALSE(IsPathPrefix(false_cases[i].p1, false_cases[i].p2)) - << " for false_cases[" << i << "]"; - } -} - -TEST(HistoryUrlUtilsTest, ToggleHTTPAndHTTPS) { - EXPECT_EQ(GURL("http://www.google.com/test?q#r"), - ToggleHTTPAndHTTPS(GURL("https://www.google.com/test?q#r"))); - EXPECT_EQ(GURL("https://www.google.com:137/"), - ToggleHTTPAndHTTPS(GURL("http://www.google.com:137/"))); - EXPECT_EQ(GURL::EmptyGURL(), - ToggleHTTPAndHTTPS(GURL("ftp://www.google.com/"))); -} - -} // namespace - -} // namespace history diff --git a/chrome/browser/history/visit_database.cc b/chrome/browser/history/visit_database.cc deleted file mode 100644 index 3891d70..0000000 --- a/chrome/browser/history/visit_database.cc +++ /dev/null @@ -1,625 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/visit_database.h" - -#include -#include -#include -#include - -#include "base/logging.h" -#include "base/strings/string_number_conversions.h" -#include "chrome/browser/history/visit_filter.h" -#include "chrome/common/url_constants.h" -#include "components/history/core/browser/url_database.h" -#include "sql/statement.h" -#include "ui/base/page_transition_types.h" - -namespace history { - -VisitDatabase::VisitDatabase() { -} - -VisitDatabase::~VisitDatabase() { -} - -bool VisitDatabase::InitVisitTable() { - if (!GetDB().DoesTableExist("visits")) { - if (!GetDB().Execute("CREATE TABLE visits(" - "id INTEGER PRIMARY KEY," - "url INTEGER NOT NULL," // key of the URL this corresponds to - "visit_time INTEGER NOT NULL," - "from_visit INTEGER," - "transition INTEGER DEFAULT 0 NOT NULL," - "segment_id INTEGER," - // Some old DBs may have an "is_indexed" field here, but this is no - // longer used and should NOT be read or written from any longer. - "visit_duration INTEGER DEFAULT 0 NOT NULL)")) - return false; - } - - // Visit source table contains the source information for all the visits. To - // save space, we do not record those user browsed visits which would be the - // majority in this table. Only other sources are recorded. - // Due to the tight relationship between visit_source and visits table, they - // should be created and dropped at the same time. - if (!GetDB().DoesTableExist("visit_source")) { - if (!GetDB().Execute("CREATE TABLE visit_source(" - "id INTEGER PRIMARY KEY,source INTEGER NOT NULL)")) - return false; - } - - // Index over url so we can quickly find visits for a page. - if (!GetDB().Execute( - "CREATE INDEX IF NOT EXISTS visits_url_index ON visits (url)")) - return false; - - // Create an index over from visits so that we can efficiently find - // referrers and redirects. - if (!GetDB().Execute( - "CREATE INDEX IF NOT EXISTS visits_from_index ON " - "visits (from_visit)")) - return false; - - // Create an index over time so that we can efficiently find the visits in a - // given time range (most history views are time-based). - if (!GetDB().Execute( - "CREATE INDEX IF NOT EXISTS visits_time_index ON " - "visits (visit_time)")) - return false; - - return true; -} - -bool VisitDatabase::DropVisitTable() { - // This will also drop the indices over the table. - return - GetDB().Execute("DROP TABLE IF EXISTS visit_source") && - GetDB().Execute("DROP TABLE visits"); -} - -// Must be in sync with HISTORY_VISIT_ROW_FIELDS. -// static -void VisitDatabase::FillVisitRow(sql::Statement& statement, VisitRow* visit) { - visit->visit_id = statement.ColumnInt64(0); - visit->url_id = statement.ColumnInt64(1); - visit->visit_time = base::Time::FromInternalValue(statement.ColumnInt64(2)); - visit->referring_visit = statement.ColumnInt64(3); - visit->transition = ui::PageTransitionFromInt(statement.ColumnInt(4)); - visit->segment_id = statement.ColumnInt64(5); - visit->visit_duration = - base::TimeDelta::FromInternalValue(statement.ColumnInt64(6)); -} - -// static -bool VisitDatabase::FillVisitVector(sql::Statement& statement, - VisitVector* visits) { - if (!statement.is_valid()) - return false; - - while (statement.Step()) { - history::VisitRow visit; - FillVisitRow(statement, &visit); - visits->push_back(visit); - } - - return statement.Succeeded(); -} - -// static -bool VisitDatabase::FillVisitVectorWithOptions(sql::Statement& statement, - const QueryOptions& options, - VisitVector* visits) { - std::set found_urls; - - // Keeps track of the day that |found_urls| is holding the URLs for, in order - // to handle removing per-day duplicates. - base::Time found_urls_midnight; - - while (statement.Step()) { - VisitRow visit; - FillVisitRow(statement, &visit); - - if (options.duplicate_policy != QueryOptions::KEEP_ALL_DUPLICATES) { - if (options.duplicate_policy == QueryOptions::REMOVE_DUPLICATES_PER_DAY && - found_urls_midnight != visit.visit_time.LocalMidnight()) { - found_urls.clear(); - found_urls_midnight = visit.visit_time.LocalMidnight(); - } - // Make sure the URL this visit corresponds to is unique. - if (found_urls.find(visit.url_id) != found_urls.end()) - continue; - found_urls.insert(visit.url_id); - } - - if (static_cast(visits->size()) >= options.EffectiveMaxCount()) - return true; - visits->push_back(visit); - } - return false; -} - -VisitID VisitDatabase::AddVisit(VisitRow* visit, VisitSource source) { - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "INSERT INTO visits " - "(url, visit_time, from_visit, transition, segment_id, " - "visit_duration) VALUES (?,?,?,?,?,?)")); - statement.BindInt64(0, visit->url_id); - statement.BindInt64(1, visit->visit_time.ToInternalValue()); - statement.BindInt64(2, visit->referring_visit); - statement.BindInt64(3, visit->transition); - statement.BindInt64(4, visit->segment_id); - statement.BindInt64(5, visit->visit_duration.ToInternalValue()); - - if (!statement.Run()) { - DVLOG(0) << "Failed to execute visit insert statement: " - << "url_id = " << visit->url_id; - return 0; - } - - visit->visit_id = GetDB().GetLastInsertRowId(); - - if (source != SOURCE_BROWSED) { - // Record the source of this visit when it is not browsed. - sql::Statement statement1(GetDB().GetCachedStatement(SQL_FROM_HERE, - "INSERT INTO visit_source (id, source) VALUES (?,?)")); - statement1.BindInt64(0, visit->visit_id); - statement1.BindInt64(1, source); - - if (!statement1.Run()) { - DVLOG(0) << "Failed to execute visit_source insert statement: " - << "id = " << visit->visit_id; - return 0; - } - } - - return visit->visit_id; -} - -void VisitDatabase::DeleteVisit(const VisitRow& visit) { - // Patch around this visit. Any visits that this went to will now have their - // "source" be the deleted visit's source. - sql::Statement update_chain(GetDB().GetCachedStatement(SQL_FROM_HERE, - "UPDATE visits SET from_visit=? WHERE from_visit=?")); - update_chain.BindInt64(0, visit.referring_visit); - update_chain.BindInt64(1, visit.visit_id); - if (!update_chain.Run()) - return; - - // Now delete the actual visit. - sql::Statement del(GetDB().GetCachedStatement(SQL_FROM_HERE, - "DELETE FROM visits WHERE id=?")); - del.BindInt64(0, visit.visit_id); - if (!del.Run()) - return; - - // Try to delete the entry in visit_source table as well. - // If the visit was browsed, there is no corresponding entry in visit_source - // table, and nothing will be deleted. - del.Assign(GetDB().GetCachedStatement(SQL_FROM_HERE, - "DELETE FROM visit_source WHERE id=?")); - del.BindInt64(0, visit.visit_id); - del.Run(); -} - -bool VisitDatabase::GetRowForVisit(VisitID visit_id, VisitRow* out_visit) { - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits WHERE id=?")); - statement.BindInt64(0, visit_id); - - if (!statement.Step()) - return false; - - FillVisitRow(statement, out_visit); - - // We got a different visit than we asked for, something is wrong. - DCHECK_EQ(visit_id, out_visit->visit_id); - if (visit_id != out_visit->visit_id) - return false; - - return true; -} - -bool VisitDatabase::UpdateVisitRow(const VisitRow& visit) { - // Don't store inconsistent data to the database. - DCHECK_NE(visit.visit_id, visit.referring_visit); - if (visit.visit_id == visit.referring_visit) - return false; - - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "UPDATE visits SET " - "url=?,visit_time=?,from_visit=?,transition=?,segment_id=?," - "visit_duration=? WHERE id=?")); - statement.BindInt64(0, visit.url_id); - statement.BindInt64(1, visit.visit_time.ToInternalValue()); - statement.BindInt64(2, visit.referring_visit); - statement.BindInt64(3, visit.transition); - statement.BindInt64(4, visit.segment_id); - statement.BindInt64(5, visit.visit_duration.ToInternalValue()); - statement.BindInt64(6, visit.visit_id); - - return statement.Run(); -} - -bool VisitDatabase::GetVisitsForURL(URLID url_id, VisitVector* visits) { - visits->clear(); - - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT" HISTORY_VISIT_ROW_FIELDS - "FROM visits " - "WHERE url=? " - "ORDER BY visit_time ASC")); - statement.BindInt64(0, url_id); - return FillVisitVector(statement, visits); -} - -bool VisitDatabase::GetVisibleVisitsForURL(URLID url_id, - const QueryOptions& options, - VisitVector* visits) { - visits->clear(); - - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT" HISTORY_VISIT_ROW_FIELDS - "FROM visits " - "WHERE url=? AND visit_time >= ? AND visit_time < ? " - "AND (transition & ?) != 0 " // CHAIN_END - "AND (transition & ?) NOT IN (?, ?, ?) " // NO SUBFRAME or - // KEYWORD_GENERATED - "ORDER BY visit_time DESC")); - statement.BindInt64(0, url_id); - statement.BindInt64(1, options.EffectiveBeginTime()); - statement.BindInt64(2, options.EffectiveEndTime()); - statement.BindInt(3, ui::PAGE_TRANSITION_CHAIN_END); - statement.BindInt(4, ui::PAGE_TRANSITION_CORE_MASK); - statement.BindInt(5, ui::PAGE_TRANSITION_AUTO_SUBFRAME); - statement.BindInt(6, ui::PAGE_TRANSITION_MANUAL_SUBFRAME); - statement.BindInt(7, ui::PAGE_TRANSITION_KEYWORD_GENERATED); - - return FillVisitVectorWithOptions(statement, options, visits); -} - -bool VisitDatabase::GetVisitsForTimes(const std::vector& times, - VisitVector* visits) { - visits->clear(); - - for (std::vector::const_iterator it = times.begin(); - it != times.end(); ++it) { - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " - "WHERE visit_time == ?")); - - statement.BindInt64(0, it->ToInternalValue()); - - if (!FillVisitVector(statement, visits)) - return false; - } - return true; -} - -bool VisitDatabase::GetAllVisitsInRange(base::Time begin_time, - base::Time end_time, - int max_results, - VisitVector* visits) { - visits->clear(); - - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " - "WHERE visit_time >= ? AND visit_time < ?" - "ORDER BY visit_time LIMIT ?")); - - // See GetVisibleVisitsInRange for more info on how these times are bound. - int64 end = end_time.ToInternalValue(); - statement.BindInt64(0, begin_time.ToInternalValue()); - statement.BindInt64(1, end ? end : std::numeric_limits::max()); - statement.BindInt64(2, - max_results ? max_results : std::numeric_limits::max()); - - return FillVisitVector(statement, visits); -} - -bool VisitDatabase::GetVisitsInRangeForTransition( - base::Time begin_time, - base::Time end_time, - int max_results, - ui::PageTransition transition, - VisitVector* visits) { - DCHECK(visits); - visits->clear(); - - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " - "WHERE visit_time >= ? AND visit_time < ? " - "AND (transition & ?) == ?" - "ORDER BY visit_time LIMIT ?")); - - // See GetVisibleVisitsInRange for more info on how these times are bound. - int64 end = end_time.ToInternalValue(); - statement.BindInt64(0, begin_time.ToInternalValue()); - statement.BindInt64(1, end ? end : std::numeric_limits::max()); - statement.BindInt(2, ui::PAGE_TRANSITION_CORE_MASK); - statement.BindInt(3, transition); - statement.BindInt64(4, - max_results ? max_results : std::numeric_limits::max()); - - return FillVisitVector(statement, visits); -} - -bool VisitDatabase::GetVisibleVisitsInRange(const QueryOptions& options, - VisitVector* visits) { - visits->clear(); - // The visit_time values can be duplicated in a redirect chain, so we sort - // by id too, to ensure a consistent ordering just in case. - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " - "WHERE visit_time >= ? AND visit_time < ? " - "AND (transition & ?) != 0 " // CHAIN_END - "AND (transition & ?) NOT IN (?, ?, ?) " // NO SUBFRAME or - // KEYWORD_GENERATED - "ORDER BY visit_time DESC, id DESC")); - - statement.BindInt64(0, options.EffectiveBeginTime()); - statement.BindInt64(1, options.EffectiveEndTime()); - statement.BindInt(2, ui::PAGE_TRANSITION_CHAIN_END); - statement.BindInt(3, ui::PAGE_TRANSITION_CORE_MASK); - statement.BindInt(4, ui::PAGE_TRANSITION_AUTO_SUBFRAME); - statement.BindInt(5, ui::PAGE_TRANSITION_MANUAL_SUBFRAME); - statement.BindInt(6, ui::PAGE_TRANSITION_KEYWORD_GENERATED); - - return FillVisitVectorWithOptions(statement, options, visits); -} - -void VisitDatabase::GetDirectVisitsDuringTimes(const VisitFilter& time_filter, - int max_results, - VisitVector* visits) { - visits->clear(); - if (max_results) - visits->reserve(max_results); - for (VisitFilter::TimeVector::const_iterator it = time_filter.times().begin(); - it != time_filter.times().end(); ++it) { - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " - "WHERE visit_time >= ? AND visit_time < ? " - "AND (transition & ?) != 0 " // CHAIN_START - "AND (transition & ?) IN (?, ?) " // TYPED or AUTO_BOOKMARK only - "ORDER BY visit_time DESC, id DESC")); - - statement.BindInt64(0, it->first.ToInternalValue()); - statement.BindInt64(1, it->second.ToInternalValue()); - statement.BindInt(2, ui::PAGE_TRANSITION_CHAIN_START); - statement.BindInt(3, ui::PAGE_TRANSITION_CORE_MASK); - statement.BindInt(4, ui::PAGE_TRANSITION_TYPED); - statement.BindInt(5, ui::PAGE_TRANSITION_AUTO_BOOKMARK); - - while (statement.Step()) { - VisitRow visit; - FillVisitRow(statement, &visit); - visits->push_back(visit); - - if (max_results > 0 && static_cast(visits->size()) >= max_results) - return; - } - } -} - -VisitID VisitDatabase::GetMostRecentVisitForURL(URLID url_id, - VisitRow* visit_row) { - // The visit_time values can be duplicated in a redirect chain, so we sort - // by id too, to ensure a consistent ordering just in case. - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " - "WHERE url=? " - "ORDER BY visit_time DESC, id DESC " - "LIMIT 1")); - statement.BindInt64(0, url_id); - if (!statement.Step()) - return 0; // No visits for this URL. - - if (visit_row) { - FillVisitRow(statement, visit_row); - return visit_row->visit_id; - } - return statement.ColumnInt64(0); -} - -bool VisitDatabase::GetMostRecentVisitsForURL(URLID url_id, - int max_results, - VisitVector* visits) { - visits->clear(); - - // The visit_time values can be duplicated in a redirect chain, so we sort - // by id too, to ensure a consistent ordering just in case. - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT" HISTORY_VISIT_ROW_FIELDS - "FROM visits " - "WHERE url=? " - "ORDER BY visit_time DESC, id DESC " - "LIMIT ?")); - statement.BindInt64(0, url_id); - statement.BindInt(1, max_results); - - return FillVisitVector(statement, visits); -} - -bool VisitDatabase::GetRedirectFromVisit(VisitID from_visit, - VisitID* to_visit, - GURL* to_url) { - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT v.id,u.url " - "FROM visits v JOIN urls u ON v.url = u.id " - "WHERE v.from_visit = ? " - "AND (v.transition & ?) != 0")); // IS_REDIRECT_MASK - statement.BindInt64(0, from_visit); - statement.BindInt(1, ui::PAGE_TRANSITION_IS_REDIRECT_MASK); - - if (!statement.Step()) - return false; // No redirect from this visit. (Or SQL error) - if (to_visit) - *to_visit = statement.ColumnInt64(0); - if (to_url) - *to_url = GURL(statement.ColumnString(1)); - return true; -} - -bool VisitDatabase::GetRedirectToVisit(VisitID to_visit, - VisitID* from_visit, - GURL* from_url) { - VisitRow row; - if (!GetRowForVisit(to_visit, &row)) - return false; - - if (from_visit) - *from_visit = row.referring_visit; - - if (from_url) { - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT u.url " - "FROM visits v JOIN urls u ON v.url = u.id " - "WHERE v.id = ?")); - statement.BindInt64(0, row.referring_visit); - - if (!statement.Step()) - return false; - - *from_url = GURL(statement.ColumnString(0)); - } - return true; -} - -bool VisitDatabase::GetVisibleVisitCountToHost(const GURL& url, - int* count, - base::Time* first_visit) { - if (!url.SchemeIs(url::kHttpScheme) && - !url.SchemeIs(url::kHttpsScheme)) - return false; - - // We need to search for URLs with a matching host/port. One way to query for - // this is to use the LIKE operator, eg 'url LIKE http://google.com/%'. This - // is inefficient though in that it doesn't use the index and each entry must - // be visited. The same query can be executed by using >= and < operator. - // The query becomes: - // 'url >= http://google.com/' and url < http://google.com0'. - // 0 is used as it is one character greater than '/'. - const std::string host_query_min = url.GetOrigin().spec(); - if (host_query_min.empty()) - return false; - - // We also want to restrict ourselves to main frame navigations that are not - // in the middle of redirect chains, hence the transition checks. - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT MIN(v.visit_time), COUNT(*) " - "FROM visits v INNER JOIN urls u ON v.url = u.id " - "WHERE u.url >= ? AND u.url < ? " - "AND (transition & ?) != 0 " - "AND (transition & ?) NOT IN (?, ?, ?)")); - statement.BindString(0, host_query_min); - statement.BindString(1, - host_query_min.substr(0, host_query_min.size() - 1) + '0'); - statement.BindInt(2, ui::PAGE_TRANSITION_CHAIN_END); - statement.BindInt(3, ui::PAGE_TRANSITION_CORE_MASK); - statement.BindInt(4, ui::PAGE_TRANSITION_AUTO_SUBFRAME); - statement.BindInt(5, ui::PAGE_TRANSITION_MANUAL_SUBFRAME); - statement.BindInt(6, ui::PAGE_TRANSITION_KEYWORD_GENERATED); - - if (!statement.Step()) { - // We've never been to this page before. - *count = 0; - return true; - } - - if (!statement.Succeeded()) - return false; - - *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0)); - *count = statement.ColumnInt(1); - return true; -} - -bool VisitDatabase::GetStartDate(base::Time* first_visit) { - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT MIN(visit_time) FROM visits WHERE visit_time != 0")); - if (!statement.Step() || statement.ColumnInt64(0) == 0) { - *first_visit = base::Time::Now(); - return false; - } - *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0)); - return true; -} - -void VisitDatabase::GetVisitsSource(const VisitVector& visits, - VisitSourceMap* sources) { - DCHECK(sources); - sources->clear(); - - // We query the source in batch. Here defines the batch size. - const size_t batch_size = 500; - size_t visits_size = visits.size(); - - size_t start_index = 0, end_index = 0; - while (end_index < visits_size) { - start_index = end_index; - end_index = end_index + batch_size < visits_size ? end_index + batch_size - : visits_size; - - // Compose the sql statement with a list of ids. - std::string sql = "SELECT id,source FROM visit_source "; - sql.append("WHERE id IN ("); - // Append all the ids in the statement. - for (size_t j = start_index; j < end_index; j++) { - if (j != start_index) - sql.push_back(','); - sql.append(base::Int64ToString(visits[j].visit_id)); - } - sql.append(") ORDER BY id"); - sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str())); - - // Get the source entries out of the query result. - while (statement.Step()) { - std::pair source_entry(statement.ColumnInt64(0), - static_cast(statement.ColumnInt(1))); - sources->insert(source_entry); - } - } -} - -bool VisitDatabase::MigrateVisitsWithoutDuration() { - if (!GetDB().DoesTableExist("visits")) { - NOTREACHED() << " Visits table should exist before migration"; - return false; - } - - if (!GetDB().DoesColumnExist("visits", "visit_duration")) { - // Old versions don't have the visit_duration column, we modify the table - // to add that field. - if (!GetDB().Execute("ALTER TABLE visits " - "ADD COLUMN visit_duration INTEGER DEFAULT 0 NOT NULL")) - return false; - } - return true; -} - -void VisitDatabase::GetBriefVisitInfoOfMostRecentVisits( - int max_visits, - std::vector* result_vector) { - result_vector->clear(); - - sql::Statement statement(GetDB().GetUniqueStatement( - "SELECT url,visit_time,transition FROM visits " - "ORDER BY id DESC LIMIT ?")); - - statement.BindInt64(0, max_visits); - - if (!statement.is_valid()) - return; - - while (statement.Step()) { - BriefVisitInfo info; - info.url_id = statement.ColumnInt64(0); - info.time = base::Time::FromInternalValue(statement.ColumnInt64(1)); - info.transition = ui::PageTransitionFromInt(statement.ColumnInt(2)); - result_vector->push_back(info); - } -} - -} // namespace history diff --git a/chrome/browser/history/visit_database.h b/chrome/browser/history/visit_database.h deleted file mode 100644 index 8f50848..0000000 --- a/chrome/browser/history/visit_database.h +++ /dev/null @@ -1,233 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_HISTORY_VISIT_DATABASE_H_ -#define CHROME_BROWSER_HISTORY_VISIT_DATABASE_H_ - -#include - -#include "components/history/core/browser/history_types.h" - -namespace sql { -class Connection; -class Statement; -} - -namespace history { - -class VisitFilter; - -// A visit database is one which stores visits for URLs, that is, times and -// linking information. A visit database must also be a URLDatabase, as this -// modifies tables used by URLs directly and could be thought of as inheriting -// from URLDatabase. However, this inheritance is not explicit as things would -// get too complicated and have multiple inheritance. -class VisitDatabase { - public: - // Must call InitVisitTable() before using to make sure the database is - // initialized. - VisitDatabase(); - virtual ~VisitDatabase(); - - // Deletes the visit table. Used for rapidly clearing all visits. In this - // case, InitVisitTable would be called immediately afterward to re-create it. - // Returns true on success. - bool DropVisitTable(); - - // Adds a line to the visit database with the given information, returning - // the added row ID on success, 0 on failure. The given visit is updated with - // the new row ID on success. In addition, adds its source into visit_source - // table. - VisitID AddVisit(VisitRow* visit, VisitSource source); - - // Deletes the given visit from the database. If a visit with the given ID - // doesn't exist, it will not do anything. - void DeleteVisit(const VisitRow& visit); - - // Query a VisitInfo giving an visit id, filling the given VisitRow. - // Returns true on success. - bool GetRowForVisit(VisitID visit_id, VisitRow* out_visit); - - // Updates an existing row. The new information is set on the row, using the - // VisitID as the key. The visit must exist. Returns true on success. - bool UpdateVisitRow(const VisitRow& visit); - - // Fills in the given vector with all of the visits for the given page ID, - // sorted in ascending order of date. Returns true on success (although there - // may still be no matches). - bool GetVisitsForURL(URLID url_id, VisitVector* visits); - - // Fills in the given vector with the visits for the given page ID which - // should be user-visible, which excludes things like redirects and subframes, - // and match the set of options passed, sorted in ascending order of date. - // - // Returns true if there are more results available, i.e. if the number of - // results was restricted by |options.max_count|. - bool GetVisibleVisitsForURL(URLID url_id, - const QueryOptions& options, - VisitVector* visits); - - // Fills the vector with all visits with times in the given list. - // - // The results will be in no particular order. Also, no duplicate - // detection is performed, so if |times| has duplicate times, - // |visits| may have duplicate visits. - bool GetVisitsForTimes(const std::vector& times, - VisitVector* visits); - - // Fills all visits in the time range [begin, end) to the given vector. Either - // time can be is_null(), in which case the times in that direction are - // unbounded. - // - // If |max_results| is non-zero, up to that many results will be returned. If - // there are more results than that, the oldest ones will be returned. (This - // is used for history expiration.) - // - // The results will be in increasing order of date. - bool GetAllVisitsInRange(base::Time begin_time, base::Time end_time, - int max_results, VisitVector* visits); - - // Fills all visits with specified transition in the time range [begin, end) - // to the given vector. Either time can be is_null(), in which case the times - // in that direction are unbounded. - // - // If |max_results| is non-zero, up to that many results will be returned. If - // there are more results than that, the oldest ones will be returned. (This - // is used for history expiration.) - // - // The results will be in increasing order of date. - bool GetVisitsInRangeForTransition(base::Time begin_time, - base::Time end_time, - int max_results, - ui::PageTransition transition, - VisitVector* visits); - - // Fills all visits in the given time range into the given vector that should - // be user-visible, which excludes things like redirects and subframes. The - // begin time is inclusive, the end time is exclusive. Either time can be - // is_null(), in which case the times in that direction are unbounded. - // - // Up to |max_count| visits will be returned. If there are more visits than - // that, the most recent |max_count| will be returned. If 0, all visits in the - // range will be computed. - // - // Only one visit for each URL will be returned, and it will be the most - // recent one in the time range. - // - // Returns true if there are more results available, i.e. if the number of - // results was restricted by |options.max_count|. - bool GetVisibleVisitsInRange(const QueryOptions& options, - VisitVector* visits); - - // Fills all visits in the given time ranges into the given vector that are - // visits made directly by the user (typed or bookmarked visits only). The - // begin time is inclusive, the end time is exclusive. - // - // Up to |max_count| visits will be returned. If there are more visits than - // that, the most recent |max_count| will be returned. If 0, all visits in the - // range will be computed. - void GetDirectVisitsDuringTimes(const VisitFilter& time_filter, - int max_count, - VisitVector* visits); - - // Returns the visit ID for the most recent visit of the given URL ID, or 0 - // if there is no visit for the URL. - // - // If non-NULL, the given visit row will be filled with the information of - // the found visit. When no visit is found, the row will be unchanged. - VisitID GetMostRecentVisitForURL(URLID url_id, - VisitRow* visit_row); - - // Returns the |max_results| most recent visit sessions for |url_id|. - // - // Returns false if there's a failure preparing the statement. True - // otherwise. (No results are indicated with an empty |visits| - // vector.) - bool GetMostRecentVisitsForURL(URLID url_id, - int max_results, - VisitVector* visits); - - // Finds a redirect coming from the given |from_visit|. If a redirect is - // found, it fills the visit ID and URL into the out variables and returns - // true. If there is no redirect from the given visit, returns false. - // - // If there is more than one redirect, this will compute a random one. But - // duplicates should be very rare, and we don't actually care which one we - // get in most cases. These will occur when the user goes back and gets - // redirected again. - // - // to_visit and to_url can be NULL in which case they are ignored. - bool GetRedirectFromVisit(VisitID from_visit, - VisitID* to_visit, - GURL* to_url); - - // Similar to the above function except finds a redirect going to a given - // |to_visit|. - bool GetRedirectToVisit(VisitID to_visit, - VisitID* from_visit, - GURL* from_url); - - // Gets the number of user-visible visits to all URLs on the same - // scheme/host/port as |url|, as well as the time of the earliest visit. - // "User-visible" is defined as in GetVisibleVisitsInRange() above, i.e. - // excluding redirects and subframes. - // This function is only valid for HTTP and HTTPS URLs; all other schemes - // cause the function to return false. - bool GetVisibleVisitCountToHost(const GURL& url, - int* count, - base::Time* first_visit); - - // Get the time of the first item in our database. - bool GetStartDate(base::Time* first_visit); - - // Get the source information about the given visits. - void GetVisitsSource(const VisitVector& visits, - VisitSourceMap* sources); - - // Obtains BriefVisitInfo for the specified number of most recent visits - // from the visit database. - void GetBriefVisitInfoOfMostRecentVisits( - int max_visits, - std::vector* result_vector); - - protected: - // Returns the database for the functions in this interface. - virtual sql::Connection& GetDB() = 0; - - // Called by the derived classes on initialization to make sure the tables - // and indices are properly set up. Must be called before anything else. - bool InitVisitTable(); - - // Convenience to fill a VisitRow. Assumes the visit values are bound starting - // at index 0. - static void FillVisitRow(sql::Statement& statement, VisitRow* visit); - - // Convenience to fill a VisitVector. Assumes that statement.step() - // hasn't happened yet. - static bool FillVisitVector(sql::Statement& statement, VisitVector* visits); - - // Convenience to fill a VisitVector while respecting the set of options. - // |statement| should order the query decending by visit_time to ensure - // correct duplicate management behavior. Assumes that statement.step() - // hasn't happened yet. - static bool FillVisitVectorWithOptions(sql::Statement& statement, - const QueryOptions& options, - VisitVector* visits); - - // Called by the derived classes to migrate the older visits table which - // don't have visit_duration column yet. - bool MigrateVisitsWithoutDuration(); - - private: - - DISALLOW_COPY_AND_ASSIGN(VisitDatabase); -}; - -// Rows, in order, of the visit table. -#define HISTORY_VISIT_ROW_FIELDS \ - " id,url,visit_time,from_visit,transition,segment_id,visit_duration " - -} // namespace history - -#endif // CHROME_BROWSER_HISTORY_VISIT_DATABASE_H_ diff --git a/chrome/browser/history/visit_database_unittest.cc b/chrome/browser/history/visit_database_unittest.cc deleted file mode 100644 index 98943f3..0000000 --- a/chrome/browser/history/visit_database_unittest.cc +++ /dev/null @@ -1,419 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include -#include - -#include "base/files/file_path.h" -#include "base/files/scoped_temp_dir.h" -#include "base/strings/string_util.h" -#include "base/time/time.h" -#include "chrome/browser/history/visit_database.h" -#include "components/history/core/browser/url_database.h" -#include "sql/connection.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "testing/platform_test.h" - -using base::Time; -using base::TimeDelta; - -namespace history { - -namespace { - -bool IsVisitInfoEqual(const VisitRow& a, - const VisitRow& b) { - return a.visit_id == b.visit_id && - a.url_id == b.url_id && - a.visit_time == b.visit_time && - a.referring_visit == b.referring_visit && - a.transition == b.transition; -} - -} // namespace - -class VisitDatabaseTest : public PlatformTest, - public URLDatabase, - public VisitDatabase { - public: - VisitDatabaseTest() { - } - - private: - // Test setup. - void SetUp() override { - PlatformTest::SetUp(); - ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); - base::FilePath db_file = temp_dir_.path().AppendASCII("VisitTest.db"); - - EXPECT_TRUE(db_.Open(db_file)); - - // Initialize the tables for this test. - CreateURLTable(false); - CreateMainURLIndex(); - InitVisitTable(); - } - void TearDown() override { - db_.Close(); - PlatformTest::TearDown(); - } - - // Provided for URL/VisitDatabase. - sql::Connection& GetDB() override { return db_; } - - base::ScopedTempDir temp_dir_; - sql::Connection db_; -}; - -TEST_F(VisitDatabaseTest, Add) { - // Add one visit. - VisitRow visit_info1(1, Time::Now(), 0, ui::PAGE_TRANSITION_LINK, 0); - EXPECT_TRUE(AddVisit(&visit_info1, SOURCE_BROWSED)); - - // Add second visit for the same page. - VisitRow visit_info2(visit_info1.url_id, - visit_info1.visit_time + TimeDelta::FromSeconds(1), 1, - ui::PAGE_TRANSITION_TYPED, 0); - EXPECT_TRUE(AddVisit(&visit_info2, SOURCE_BROWSED)); - - // Add third visit for a different page. - VisitRow visit_info3(2, - visit_info1.visit_time + TimeDelta::FromSeconds(2), 0, - ui::PAGE_TRANSITION_LINK, 0); - EXPECT_TRUE(AddVisit(&visit_info3, SOURCE_BROWSED)); - - // Query the first two. - std::vector matches; - EXPECT_TRUE(GetVisitsForURL(visit_info1.url_id, &matches)); - EXPECT_EQ(static_cast(2), matches.size()); - - // Make sure we got both (order in result set is visit time). - EXPECT_TRUE(IsVisitInfoEqual(matches[0], visit_info1) && - IsVisitInfoEqual(matches[1], visit_info2)); -} - -TEST_F(VisitDatabaseTest, Delete) { - // Add three visits that form a chain of navigation, and then delete the - // middle one. We should be left with the outer two visits, and the chain - // should link them. - static const int kTime1 = 1000; - VisitRow visit_info1(1, Time::FromInternalValue(kTime1), 0, - ui::PAGE_TRANSITION_LINK, 0); - EXPECT_TRUE(AddVisit(&visit_info1, SOURCE_BROWSED)); - - static const int kTime2 = kTime1 + 1; - VisitRow visit_info2(1, Time::FromInternalValue(kTime2), - visit_info1.visit_id, ui::PAGE_TRANSITION_LINK, 0); - EXPECT_TRUE(AddVisit(&visit_info2, SOURCE_BROWSED)); - - static const int kTime3 = kTime2 + 1; - VisitRow visit_info3(1, Time::FromInternalValue(kTime3), - visit_info2.visit_id, ui::PAGE_TRANSITION_LINK, 0); - EXPECT_TRUE(AddVisit(&visit_info3, SOURCE_BROWSED)); - - // First make sure all the visits are there. - std::vector matches; - EXPECT_TRUE(GetVisitsForURL(visit_info1.url_id, &matches)); - EXPECT_EQ(static_cast(3), matches.size()); - EXPECT_TRUE(IsVisitInfoEqual(matches[0], visit_info1) && - IsVisitInfoEqual(matches[1], visit_info2) && - IsVisitInfoEqual(matches[2], visit_info3)); - - // Delete the middle one. - DeleteVisit(visit_info2); - - // The outer two should be left, and the last one should have the first as - // the referrer. - visit_info3.referring_visit = visit_info1.visit_id; - matches.clear(); - EXPECT_TRUE(GetVisitsForURL(visit_info1.url_id, &matches)); - EXPECT_EQ(static_cast(2), matches.size()); - EXPECT_TRUE(IsVisitInfoEqual(matches[0], visit_info1) && - IsVisitInfoEqual(matches[1], visit_info3)); -} - -TEST_F(VisitDatabaseTest, Update) { - // Make something in the database. - VisitRow original(1, Time::Now(), 23, ui::PageTransitionFromInt(0), 19); - AddVisit(&original, SOURCE_BROWSED); - - // Mutate that row. - VisitRow modification(original); - modification.url_id = 2; - modification.transition = ui::PAGE_TRANSITION_TYPED; - modification.visit_time = Time::Now() + TimeDelta::FromDays(1); - modification.referring_visit = 9292; - UpdateVisitRow(modification); - - // Check that the mutated version was written. - VisitRow final; - GetRowForVisit(original.visit_id, &final); - EXPECT_TRUE(IsVisitInfoEqual(modification, final)); -} - -// TODO(brettw) write test for GetMostRecentVisitForURL! - -namespace { - -std::vector GetTestVisitRows() { - // Tests can be sensitive to the local timezone, so use a local time as the - // basis for all visit times. - base::Time base_time = Time::UnixEpoch().LocalMidnight(); - - // Add one visit. - VisitRow visit_info1(1, base_time + TimeDelta::FromMinutes(1), 0, - ui::PageTransitionFromInt( - ui::PAGE_TRANSITION_LINK | - ui::PAGE_TRANSITION_CHAIN_START | - ui::PAGE_TRANSITION_CHAIN_END), - 0); - visit_info1.visit_id = 1; - - // Add second visit for the same page. - VisitRow visit_info2(visit_info1.url_id, - visit_info1.visit_time + TimeDelta::FromSeconds(1), 1, - ui::PageTransitionFromInt( - ui::PAGE_TRANSITION_TYPED | - ui::PAGE_TRANSITION_CHAIN_START | - ui::PAGE_TRANSITION_CHAIN_END), - 0); - visit_info2.visit_id = 2; - - // Add third visit for a different page. - VisitRow visit_info3(2, - visit_info1.visit_time + TimeDelta::FromSeconds(2), 0, - ui::PageTransitionFromInt( - ui::PAGE_TRANSITION_LINK | - ui::PAGE_TRANSITION_CHAIN_START), - 0); - visit_info3.visit_id = 3; - - // Add a redirect visit from the last page. - VisitRow visit_info4(3, - visit_info1.visit_time + TimeDelta::FromSeconds(3), visit_info3.visit_id, - ui::PageTransitionFromInt( - ui::PAGE_TRANSITION_SERVER_REDIRECT | - ui::PAGE_TRANSITION_CHAIN_END), - 0); - visit_info4.visit_id = 4; - - // Add a subframe visit. - VisitRow visit_info5(4, - visit_info1.visit_time + TimeDelta::FromSeconds(4), visit_info4.visit_id, - ui::PageTransitionFromInt( - ui::PAGE_TRANSITION_AUTO_SUBFRAME | - ui::PAGE_TRANSITION_CHAIN_START | - ui::PAGE_TRANSITION_CHAIN_END), - 0); - visit_info5.visit_id = 5; - - // Add third visit for the same URL as visit 1 and 2, but exactly a day - // later than visit 2. - VisitRow visit_info6(visit_info1.url_id, - visit_info2.visit_time + TimeDelta::FromDays(1), 1, - ui::PageTransitionFromInt( - ui::PAGE_TRANSITION_TYPED | - ui::PAGE_TRANSITION_CHAIN_START | - ui::PAGE_TRANSITION_CHAIN_END), - 0); - visit_info6.visit_id = 6; - - std::vector test_visit_rows; - test_visit_rows.push_back(visit_info1); - test_visit_rows.push_back(visit_info2); - test_visit_rows.push_back(visit_info3); - test_visit_rows.push_back(visit_info4); - test_visit_rows.push_back(visit_info5); - test_visit_rows.push_back(visit_info6); - return test_visit_rows; -} - -} // namespace - -TEST_F(VisitDatabaseTest, GetVisitsForTimes) { - std::vector test_visit_rows = GetTestVisitRows(); - - for (size_t i = 0; i < test_visit_rows.size(); ++i) { - EXPECT_TRUE(AddVisit(&test_visit_rows[i], SOURCE_BROWSED)); - } - - // Query the visits for all our times. We should get all visits. - { - std::vector times; - for (size_t i = 0; i < test_visit_rows.size(); ++i) { - times.push_back(test_visit_rows[i].visit_time); - } - VisitVector results; - GetVisitsForTimes(times, &results); - EXPECT_EQ(test_visit_rows.size(), results.size()); - } - - // Query the visits for a single time. - for (size_t i = 0; i < test_visit_rows.size(); ++i) { - std::vector times; - times.push_back(test_visit_rows[i].visit_time); - VisitVector results; - GetVisitsForTimes(times, &results); - ASSERT_EQ(static_cast(1), results.size()); - EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[i])); - } -} - -TEST_F(VisitDatabaseTest, GetAllVisitsInRange) { - std::vector test_visit_rows = GetTestVisitRows(); - - for (size_t i = 0; i < test_visit_rows.size(); ++i) { - EXPECT_TRUE(AddVisit(&test_visit_rows[i], SOURCE_BROWSED)); - } - - // Query the visits for all time. We should get all visits. - VisitVector results; - GetAllVisitsInRange(Time(), Time(), 0, &results); - ASSERT_EQ(test_visit_rows.size(), results.size()); - for (size_t i = 0; i < test_visit_rows.size(); ++i) { - EXPECT_TRUE(IsVisitInfoEqual(results[i], test_visit_rows[i])); - } - - // Query a time range and make sure beginning is inclusive and ending is - // exclusive. - GetAllVisitsInRange(test_visit_rows[1].visit_time, - test_visit_rows[3].visit_time, 0, - &results); - ASSERT_EQ(static_cast(2), results.size()); - EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[1])); - EXPECT_TRUE(IsVisitInfoEqual(results[1], test_visit_rows[2])); - - // Query for a max count and make sure we get only that number. - GetAllVisitsInRange(Time(), Time(), 1, &results); - ASSERT_EQ(static_cast(1), results.size()); - EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[0])); -} - -TEST_F(VisitDatabaseTest, GetVisibleVisitsInRange) { - std::vector test_visit_rows = GetTestVisitRows(); - - for (size_t i = 0; i < test_visit_rows.size(); ++i) { - EXPECT_TRUE(AddVisit(&test_visit_rows[i], SOURCE_BROWSED)); - } - - // Query the visits for all time. We should not get the first or the second - // visit (duplicates of the sixth) or the redirect or subframe visits. - VisitVector results; - QueryOptions options; - GetVisibleVisitsInRange(options, &results); - ASSERT_EQ(static_cast(2), results.size()); - EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); - EXPECT_TRUE(IsVisitInfoEqual(results[1], test_visit_rows[3])); - - // Now try with only per-day de-duping -- the second visit should appear, - // since it's a duplicate of visit6 but on a different day. - options.duplicate_policy = QueryOptions::REMOVE_DUPLICATES_PER_DAY; - GetVisibleVisitsInRange(options, &results); - ASSERT_EQ(static_cast(3), results.size()); - EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); - EXPECT_TRUE(IsVisitInfoEqual(results[1], test_visit_rows[3])); - EXPECT_TRUE(IsVisitInfoEqual(results[2], test_visit_rows[1])); - - // Now try without de-duping, expect to see all visible visits. - options.duplicate_policy = QueryOptions::KEEP_ALL_DUPLICATES; - GetVisibleVisitsInRange(options, &results); - ASSERT_EQ(static_cast(4), results.size()); - EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); - EXPECT_TRUE(IsVisitInfoEqual(results[1], test_visit_rows[3])); - EXPECT_TRUE(IsVisitInfoEqual(results[2], test_visit_rows[1])); - EXPECT_TRUE(IsVisitInfoEqual(results[3], test_visit_rows[0])); - - // Set the end time to exclude the second visit. The first visit should be - // returned. Even though the second is a more recent visit, it's not in the - // query range. - options.end_time = test_visit_rows[1].visit_time; - GetVisibleVisitsInRange(options, &results); - ASSERT_EQ(static_cast(1), results.size()); - EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[0])); - - options = QueryOptions(); // Reset to options to default. - - // Query for a max count and make sure we get only that number. - options.max_count = 1; - GetVisibleVisitsInRange(options, &results); - ASSERT_EQ(static_cast(1), results.size()); - EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); - - // Query a time range and make sure beginning is inclusive and ending is - // exclusive. - options.begin_time = test_visit_rows[1].visit_time; - options.end_time = test_visit_rows[3].visit_time; - options.max_count = 0; - GetVisibleVisitsInRange(options, &results); - ASSERT_EQ(static_cast(1), results.size()); - EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[1])); -} - -TEST_F(VisitDatabaseTest, VisitSource) { - // Add visits. - VisitRow visit_info1(111, Time::Now(), 0, ui::PAGE_TRANSITION_LINK, 0); - ASSERT_TRUE(AddVisit(&visit_info1, SOURCE_BROWSED)); - - VisitRow visit_info2(112, Time::Now(), 1, ui::PAGE_TRANSITION_TYPED, 0); - ASSERT_TRUE(AddVisit(&visit_info2, SOURCE_SYNCED)); - - VisitRow visit_info3(113, Time::Now(), 0, ui::PAGE_TRANSITION_TYPED, 0); - ASSERT_TRUE(AddVisit(&visit_info3, SOURCE_EXTENSION)); - - // Query each visit. - std::vector matches; - ASSERT_TRUE(GetVisitsForURL(111, &matches)); - ASSERT_EQ(1U, matches.size()); - VisitSourceMap sources; - GetVisitsSource(matches, &sources); - EXPECT_EQ(0U, sources.size()); - - ASSERT_TRUE(GetVisitsForURL(112, &matches)); - ASSERT_EQ(1U, matches.size()); - GetVisitsSource(matches, &sources); - ASSERT_EQ(1U, sources.size()); - EXPECT_EQ(SOURCE_SYNCED, sources[matches[0].visit_id]); - - ASSERT_TRUE(GetVisitsForURL(113, &matches)); - ASSERT_EQ(1U, matches.size()); - GetVisitsSource(matches, &sources); - ASSERT_EQ(1U, sources.size()); - EXPECT_EQ(SOURCE_EXTENSION, sources[matches[0].visit_id]); -} - -TEST_F(VisitDatabaseTest, GetVisibleVisitsForURL) { - std::vector test_visit_rows = GetTestVisitRows(); - - for (size_t i = 0; i < test_visit_rows.size(); ++i) { - EXPECT_TRUE(AddVisit(&test_visit_rows[i], SOURCE_BROWSED)); - } - - // Query the visits for the first url id. We should not get the first or the - // second visit (duplicates of the sixth) or any other urls, redirects or - // subframe visits. - VisitVector results; - QueryOptions options; - int url_id = test_visit_rows[0].url_id; - GetVisibleVisitsForURL(url_id, options, &results); - ASSERT_EQ(static_cast(1), results.size()); - EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); - - // Now try with only per-day de-duping -- the second visit should appear, - // since it's a duplicate of visit6 but on a different day. - options.duplicate_policy = QueryOptions::REMOVE_DUPLICATES_PER_DAY; - GetVisibleVisitsForURL(url_id, options, &results); - ASSERT_EQ(static_cast(2), results.size()); - EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); - EXPECT_TRUE(IsVisitInfoEqual(results[1], test_visit_rows[1])); - - // Now try without de-duping, expect to see all visible visits to url id 1. - options.duplicate_policy = QueryOptions::KEEP_ALL_DUPLICATES; - GetVisibleVisitsForURL(url_id, options, &results); - ASSERT_EQ(static_cast(3), results.size()); - EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); - EXPECT_TRUE(IsVisitInfoEqual(results[1], test_visit_rows[1])); - EXPECT_TRUE(IsVisitInfoEqual(results[2], test_visit_rows[0])); -} - -} // namespace history diff --git a/chrome/browser/history/visit_filter.cc b/chrome/browser/history/visit_filter.cc deleted file mode 100644 index 9147dd73..0000000 --- a/chrome/browser/history/visit_filter.cc +++ /dev/null @@ -1,358 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/visit_filter.h" - -#include - -#include - -#include "base/logging.h" -#include "base/time/time.h" -#include "components/history/core/browser/history_types.h" - -namespace history { - -const double kLn2 = 0.6931471805599453; - -VisitFilter::VisitFilter() - : day_(DAY_UNDEFINED), - max_results_(0), - sorting_order_(ORDER_BY_RECENCY) { -} - -VisitFilter::~VisitFilter() { -} - -void VisitFilter::SetFilterTime(const base::Time& filter_time) { - filter_time_ = filter_time; - UpdateTimeVector(); -} - -void VisitFilter::SetFilterWidth(const base::TimeDelta& filter_width) { - filter_width_ = filter_width; - UpdateTimeVector(); -} - -void VisitFilter::SetDayOfTheWeekFilter(int day) { - day_ = day; - UpdateTimeVector(); -} - -void VisitFilter::SetDayTypeFilter(bool workday) { - day_ = workday ? WORKDAY : HOLIDAY; - UpdateTimeVector(); -} - -void VisitFilter::ClearFilters() { - filter_time_ = base::Time(); - filter_width_ = base::TimeDelta::FromHours(0); - day_ = DAY_UNDEFINED; - UpdateTimeVector(); -} - -bool VisitFilter::UpdateTimeVector() { - - TimeVector days_of_the_week; - if (day_ >= 0 && day_ <= 6) { - GetTimesOnTheDayOfTheWeek(day_, filter_time_, max_results_, - &days_of_the_week); - } else if (day_ == WORKDAY || day_ == HOLIDAY) { - GetTimesOnTheSameDayType( - (day_ == WORKDAY), filter_time_, max_results_, &days_of_the_week); - } - - TimeVector times_of_the_day; - if (filter_width_ != base::TimeDelta::FromSeconds(0)) { - if (sorting_order_ == ORDER_BY_TIME_GAUSSIAN) { - // Limit queries to 5 standard deviations. - GetTimesInRange(filter_time_ - 5 * filter_width_, - filter_time_ + 5 * filter_width_, - max_results_, ×_of_the_day); - } else { - GetTimesInRange(filter_time_ - filter_width_, - filter_time_ + filter_width_, - max_results_, ×_of_the_day); - } - } - - if (times_of_the_day.empty()) { - if (days_of_the_week.empty()) - times_.clear(); - else - times_.swap(days_of_the_week); - } else { - if (days_of_the_week.empty()) - times_.swap(times_of_the_day); - else - IntersectTimeVectors(times_of_the_day, days_of_the_week, ×_); - } - - return !times_.empty(); -} - -// static -void VisitFilter::GetTimesInRange(base::Time begin_time_of_the_day, - base::Time end_time_of_the_day, - size_t max_results, - TimeVector* times) { - DCHECK(times); - times->clear(); - times->reserve(max_results); - const size_t kMaxReturnedResults = 62; // 2 months (<= 62 days). - - if (!max_results) - max_results = kMaxReturnedResults; - - // If range is more than 24 hours, return a contiguous interval covering - // |max_results| days. - base::TimeDelta one_day = base::TimeDelta::FromDays(1); - if (end_time_of_the_day - begin_time_of_the_day >= one_day) { - times->push_back( - std::make_pair(begin_time_of_the_day - one_day * (max_results - 1), - end_time_of_the_day)); - return; - } - - for (size_t i = 0; i < max_results; ++i) { - times->push_back( - std::make_pair(begin_time_of_the_day - base::TimeDelta::FromDays(i), - end_time_of_the_day - base::TimeDelta::FromDays(i))); - } -} - -double VisitFilter::GetVisitScore(const VisitRow& visit) const { - // Decay score by half each week. - base::TimeDelta time_passed = filter_time_ - visit.visit_time; - // Clamp to 0 in case time jumps backwards (e.g. due to DST). - double decay_exponent = std::max(0.0, kLn2 * static_cast( - time_passed.InMicroseconds()) / base::Time::kMicrosecondsPerWeek); - double staleness = 1.0 / exp(decay_exponent); - - double score = 0; - switch (sorting_order()) { - case ORDER_BY_RECENCY: - score = 1.0; // Let the staleness factor take care of it. - break; - case ORDER_BY_VISIT_COUNT: - score = 1.0; // Every visit counts the same. - staleness = 1.0; // No decay on this one. - break; - case ORDER_BY_TIME_GAUSSIAN: { - double offset = - GetTimeOfDayDifference(filter_time_, - visit.visit_time).InMicroseconds(); - double sd = filter_width_.InMicroseconds(); - - // Calculate score using the normal distribution density function. - score = exp(-(offset * offset) / (2 * sd * sd)); - break; - } - case ORDER_BY_TIME_LINEAR: { - base::TimeDelta offset = GetTimeOfDayDifference(filter_time_, - visit.visit_time); - if (offset > filter_width_) { - score = 0; - } else { - score = 1 - offset.InMicroseconds() / static_cast( - filter_width_.InMicroseconds()); - } - break; - } - case ORDER_BY_DURATION_SPENT: - default: - NOTREACHED() << "Not implemented!"; - } - return staleness * score; -} - -base::TimeDelta -VisitFilter::GetTimeOfDayDifference(base::Time t1, base::Time t2) { - base::TimeDelta time_of_day1 = t1 - t1.LocalMidnight(); - base::TimeDelta time_of_day2 = t2 - t2.LocalMidnight(); - - base::TimeDelta difference; - if (time_of_day1 < time_of_day2) - difference = time_of_day2 - time_of_day1; - else - difference = time_of_day1 - time_of_day2; - - // If the difference is more than 12 hours, we'll get closer by 'wrapping' - // around the day barrier. - if (difference > base::TimeDelta::FromHours(12)) - difference = base::TimeDelta::FromHours(24) - difference; - - return difference; -} - -// static -void VisitFilter::GetTimesOnTheDayOfTheWeek(int day, - base::Time week, - size_t max_results, - TimeVector* times) { - DCHECK(times); - - base::Time::Exploded exploded_time; - if (week.is_null()) - week = base::Time::Now(); - week.LocalExplode(&exploded_time); - base::TimeDelta shift = base::TimeDelta::FromDays( - exploded_time.day_of_week - day); - - base::Time day_base = week.LocalMidnight(); - day_base -= shift; - - times->clear(); - times->reserve(max_results); - - base::TimeDelta one_day = base::TimeDelta::FromDays(1); - - const size_t kMaxReturnedResults = 9; // 2 months (<= 9 weeks). - - if (!max_results) - max_results = kMaxReturnedResults; - - for (size_t i = 0; i < max_results; ++i) { - times->push_back( - std::make_pair(day_base - base::TimeDelta::FromDays(i * 7), - day_base + one_day - base::TimeDelta::FromDays(i * 7))); - } -} - -// static -void VisitFilter::GetTimesOnTheSameDayType(bool workday, - base::Time week, - size_t max_results, - TimeVector* times) { - DCHECK(times); - if (week.is_null()) - week = base::Time::Now(); - // TODO(georgey): internationalize workdays/weekends/holidays. - if (!workday) { - TimeVector sunday; - TimeVector saturday; - base::Time::Exploded exploded_time; - week.LocalExplode(&exploded_time); - - GetTimesOnTheDayOfTheWeek(exploded_time.day_of_week ? 7 : 0, week, - max_results, &sunday); - GetTimesOnTheDayOfTheWeek(exploded_time.day_of_week ? 6 : -1, week, - max_results, &saturday); - UniteTimeVectors(sunday, saturday, times); - if (max_results && times->size() > max_results) - times->resize(max_results); - } else { - TimeVector vectors[3]; - GetTimesOnTheDayOfTheWeek(1, week, max_results, &vectors[0]); - for (size_t i = 2; i <= 5; ++i) { - GetTimesOnTheDayOfTheWeek(i, week, max_results, &vectors[(i - 1) % 3]); - UniteTimeVectors(vectors[(i - 2) % 3], vectors[(i - 1) % 3], - &vectors[i % 3]); - if (max_results && vectors[i % 3].size() > max_results) - vectors[i % 3].resize(max_results); - vectors[i % 3].swap(vectors[(i - 1) % 3]); - } - // 1 == 5 - 1 % 3 - times->swap(vectors[1]); - } -} - -// static -bool VisitFilter::UniteTimeVectors(const TimeVector& vector1, - const TimeVector& vector2, - TimeVector* result) { - // The vectors are sorted going back in time, but each pair has |first| as the - // beginning of time period and |second| as the end, for example: - // { 19:20, 20:00 } { 17:00, 18:10 } { 11:33, 11:35 }... - // The pairs in one vector are guaranteed not to intersect. - DCHECK(result); - result->clear(); - result->reserve(vector1.size() + vector2.size()); - - size_t vi[2]; - const TimeVector* vectors[2] = { &vector1, &vector2 }; - for (vi[0] = 0, vi[1] = 0; - vi[0] < vectors[0]->size() && vi[1] < vectors[1]->size();) { - std::pair united_timeslot; - // Check which element occurs later (for the following diagrams time is - // increasing to the right, 'f' means first, 's' means second). - // after the folowing 2 statements: - // vectors[iterator_index][vi[iterator_index]] f---s - // vectors[1 - iterator_index][vi[1 - iterator_index]] f---s - // united_timeslot f---s - // or - // vectors[iterator_index][vi[iterator_index]] f---s - // vectors[1 - iterator_index][vi[1 - iterator_index]] f-s - // united_timeslot f---s - size_t iterator_index = - ((*vectors[0])[vi[0]].second >= (*vectors[1])[vi[1]].second) ? 0 : 1; - united_timeslot = (*vectors[iterator_index])[vi[iterator_index]]; - ++vi[iterator_index]; - bool added_timeslot; - // Merge all timeslots intersecting with |united_timeslot|. - do { - added_timeslot = false; - for (size_t i = 0; i <= 1; ++i) { - if (vi[i] < vectors[i]->size() && - (*vectors[i])[vi[i]].second >= united_timeslot.first) { - // vectors[i][vi[i]] f---s - // united_timeslot f---s - // or - // united_timeslot f------s - added_timeslot = true; - if ((*vectors[i])[vi[i]].first < united_timeslot.first) { - // vectors[i][vi[i]] f---s - // united_timeslot f---s - // results in: - // united_timeslot f-----s - united_timeslot.first = (*vectors[i])[vi[i]].first; - } - ++vi[i]; - } - } - } while (added_timeslot); - result->push_back(united_timeslot); - } - for (size_t i = 0; i <= 1; ++i) { - for (; vi[i] < vectors[i]->size(); ++vi[i]) - result->push_back((*vectors[i])[vi[i]]); - } - return !result->empty(); -} - -// static -bool VisitFilter::IntersectTimeVectors(const TimeVector& vector1, - const TimeVector& vector2, - TimeVector* result) { - DCHECK(result); - result->clear(); - result->reserve(std::max(vector1.size(), vector2.size())); - - TimeVector::const_iterator vi[2]; - for (vi[0] = vector1.begin(), vi[1] = vector2.begin(); - vi[0] != vector1.end() && vi[1] != vector2.end();) { - size_t it_index = (vi[0]->second >= vi[1]->second) ? 0 : 1; - if (vi[it_index]->first >= vi[1 - it_index]->second) { - // vector 1 ++++ - // vector 2 ++ - ++vi[it_index]; - } else if (vi[it_index]->first >= vi[1 - it_index]->first) { - // vector 1 ++++ - // vector 2 +++++ - result->push_back(std::make_pair(vi[it_index]->first, - vi[1 - it_index]->second)); - ++vi[it_index]; - } else { - // vector 1 ++++ - // vector 2 ++ - result->push_back(std::make_pair(vi[1 - it_index]->first, - vi[1 - it_index]->second)); - ++vi[1 - it_index]; - } - } - - return !result->empty(); -} - -} // namespace history diff --git a/chrome/browser/history/visit_filter.h b/chrome/browser/history/visit_filter.h deleted file mode 100644 index deeacf3..0000000 --- a/chrome/browser/history/visit_filter.h +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_HISTORY_VISIT_FILTER_H_ -#define CHROME_BROWSER_HISTORY_VISIT_FILTER_H_ - -#include - -#include "base/gtest_prod_util.h" -#include "base/time/time.h" - -namespace history { - -class VisitRow; - -// Helper class for creation of filters for VisitDatabase that is used to filter -// out visits by time of the day, day of the week, workdays, holidays, duration -// of the visit, location and the combinations of that. -// It also stores sorting order of the returned resilts. -class VisitFilter { - public: - VisitFilter(); - virtual ~VisitFilter(); - - // Vector of time intervals [begin time, end time]. All of the following - // functions produce vectors that are sorted in order from most recent to - // least recent and have intervals that do not intersect. - // |first| always points to the beginning of the time period, |second| - to - // the end. - typedef std::vector > TimeVector; - - // Returns time vector associated with the object. - const TimeVector& times() const { - return times_; - } - - // Sets |max_results| of the results to be returned. 0 means "return results - // for the two months prior to passed time". - void set_max_results(size_t max_results) { - max_results_ = max_results; - if (times_.size() > max_results_) - times_.resize(max_results_); - } - - // Sets the time that should be used as a basis for the filter. Normally this - // is the time that a query is made. - void SetFilterTime(const base::Time& filter_time); - - // Sets the amount of time around the filter time to take into account. This - // only applies to the filter time's time-of-day, restrictions on how long - // back in time to look should be controlled by changing |max_results|. - // - // How the filter width is used depends on the sorting order. For - // |ORDER_BY_TIME_LINEAR| it is the distance to the cutoff point, while for - // |ORDER_BY_TIME_GAUSSIAN| it is the standard deviation. - void SetFilterWidth(const base::TimeDelta& filter_width); - - // The following two filters are exclusive - setting one, clears the other - // one. - - // Sets the filter to use only visits that happened on the specified day of - // the week. - // |day| - day of the week: 0 - sunday, 1 - monday, etc. - void SetDayOfTheWeekFilter(int day); - - // Sets the filter to use only visits that happened on a holiday/workday. - // |workday| - if true means Monday-Friday, if false means Saturday-Sunday. - // TODO(georgey) - internationalize it. - void SetDayTypeFilter(bool workday); - - // Sorting order that results after applying this filter are sorted by. - enum SortingOrder { - ORDER_BY_RECENCY, // Most recent visits are most relevant ones. (default) - ORDER_BY_VISIT_COUNT, // Most visited are listed first. - ORDER_BY_DURATION_SPENT, // The sites that user spents more time in are - // sorted first. - ORDER_BY_TIME_GAUSSIAN, // Visits that happened closer to the filter time's - // time-of-day are scored higher. The dropoff in - // score follows a normal distribution curve with - // the filter width as the standard deviation. - ORDER_BY_TIME_LINEAR, // Visits that happened closer to the filter time's - // time-of-day are score higher. The dropoff in score - // is a linear function, with filter width being the - // point where a visit does not count at all anymore. - }; - - double GetVisitScore(const VisitRow& visit) const; - - void set_sorting_order(SortingOrder order) { - sorting_order_ = order; - UpdateTimeVector(); - } - - SortingOrder sorting_order() const { - return sorting_order_; - } - - // Clears all of the filters. - void ClearFilters(); - - private: - FRIEND_TEST_ALL_PREFIXES(VisitFilterTest, CheckFilters); - FRIEND_TEST_ALL_PREFIXES(VisitFilterTest, GetTimesInRange); - FRIEND_TEST_ALL_PREFIXES(VisitFilterTest, GetTimesOnTheDayOfTheWeek); - FRIEND_TEST_ALL_PREFIXES(VisitFilterTest, GetTimesOnTheSameDayType); - FRIEND_TEST_ALL_PREFIXES(VisitFilterTest, UniteTimeVectors); - FRIEND_TEST_ALL_PREFIXES(VisitFilterTest, IntersectTimeVectors); - - // Internal helper for the update. - bool UpdateTimeVector(); - - // Internal helper for getting the times in range. See SetTimeInRangeFilter(). - static void GetTimesInRange(base::Time begin_time_of_the_day, - base::Time end_time_of_the_day, - size_t max_results, - TimeVector* times); - - // Internal helper for getting the days in range. See SetDayOfTheWeekFilter(). - // |day| could be outside of the range: -4 (3 - 7) means Wednesday last week, - // 17 (3 + 2 * 7) means Wednesday in two weeks. - static void GetTimesOnTheDayOfTheWeek(int day, - base::Time week, - size_t max_results, - TimeVector* times); - - // Internal helper for getting the days in range. See SetDayTypeFilter(). - static void GetTimesOnTheSameDayType(bool workday, - base::Time week, - size_t max_results, - TimeVector* times); - - // Unites two vectors, so the new vector has non-intersecting union of the - // original ranges. Returns true if the result is non-empty, false otherwise. - static bool UniteTimeVectors(const TimeVector& vector1, - const TimeVector& vector2, - TimeVector* result); - - // Intersects two vectors, so the new vector has ranges that are covered by - // both of the original ranges. Returns true if the result is non-empty, false - // otherwise. - static bool IntersectTimeVectors(const TimeVector& vector1, - const TimeVector& vector2, - TimeVector* result); - - // Returns the time-of-day difference between the two times. The result will - // always represent a value between 0 and 12 hours inclusive. - static base::TimeDelta GetTimeOfDayDifference(base::Time t1, base::Time t2); - - base::Time filter_time_; - base::TimeDelta filter_width_; - enum { - DAY_UNDEFINED = -1, - WORKDAY = 7, - HOLIDAY = 8, - }; - int day_; - TimeVector times_; - size_t max_results_; - SortingOrder sorting_order_; -}; - -} // history - -#endif // CHROME_BROWSER_HISTORY_VISIT_FILTER_H_ diff --git a/chrome/browser/history/visit_filter_unittest.cc b/chrome/browser/history/visit_filter_unittest.cc deleted file mode 100644 index 051a09e..0000000 --- a/chrome/browser/history/visit_filter_unittest.cc +++ /dev/null @@ -1,314 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/visit_filter.h" - -#include - -#include "base/logging.h" -#include "base/time/time.h" -#include "components/history/core/browser/history_types.h" -#include "testing/gtest/include/gtest/gtest.h" - -namespace { - -// So the tests won't go into the other day +/- several hours, return midday of -// today. -base::Time GetClosestMidday() { - return base::Time::Now().LocalMidnight() + base::TimeDelta::FromHours(12); -} - -} // namespace - -namespace history { - -class VisitFilterTest : public testing::Test { - public: - VisitFilterTest(); - - protected: - void SetUp() override; - void TearDown() override; -}; - -VisitFilterTest::VisitFilterTest() { -} - -void VisitFilterTest::SetUp() { -} - -void VisitFilterTest::TearDown() { -} - -TEST_F(VisitFilterTest, CheckFilters) { - base::Time t(GetClosestMidday()); - base::TimeDelta two_hours(base::TimeDelta::FromHours(2)); - VisitFilter f; - f.set_max_results(21U); - f.SetFilterTime(t); - f.SetFilterWidth(two_hours); - EXPECT_EQ(21U, f.times().size()); - for (size_t i = 0; i < f.times().size(); ++i) { - base::Time t_interval(t); - t_interval -= base::TimeDelta::FromDays(i); - EXPECT_EQ(t_interval - two_hours, f.times()[i].first) << - "Fails at index:" << i; - EXPECT_EQ(t_interval + two_hours, f.times()[i].second) << - "Fails at index:" << i; - } - base::Time::Exploded et; - t.LocalExplode(&et); - f.SetDayOfTheWeekFilter(et.day_of_week); - // 3 weeks in 21 days. - ASSERT_EQ(3U, f.times().size()); - for (size_t i = 1; i < f.times().size(); ++i) { - base::Time t_interval(t); - t_interval -= base::TimeDelta::FromDays(i); - EXPECT_EQ(f.times()[i].first + base::TimeDelta::FromDays(7), - f.times()[i - 1].first) << - "Fails at index:" << i; - EXPECT_EQ(f.times()[i].second + base::TimeDelta::FromDays(7), - f.times()[i - 1].second) << - "Fails at index:" << i; - EXPECT_EQ(two_hours * 2, - f.times()[i].second - f.times()[i].first) << - "Fails at index:" << i; - } -} - -TEST_F(VisitFilterTest, GetTimesInRange) { - base::Time::Exploded et = { 2011, 7, 0, 19, 22, 15, 11, 0 }; - base::Time t(base::Time::FromLocalExploded(et)); - base::TimeDelta two_hours(base::TimeDelta::FromHours(2)); - VisitFilter::TimeVector times; - VisitFilter::GetTimesInRange(t - two_hours, t + two_hours, 10U, ×); - EXPECT_GT(11U, times.size()); - for (size_t i = 0; i < times.size(); ++i) { - base::Time t_interval(t); - t_interval -= base::TimeDelta::FromDays(i); - EXPECT_EQ(t_interval - two_hours, times[i].first) << "Fails at index:" << i; - EXPECT_EQ(t_interval + two_hours, times[i].second) << - "Fails at index:" << i; - } -} - -TEST_F(VisitFilterTest, GetTimesOnTheDayOfTheWeek) { - base::Time t(GetClosestMidday()); - VisitFilter::TimeVector times; - base::Time::Exploded et; - t.LocalExplode(&et); - VisitFilter::GetTimesOnTheDayOfTheWeek(et.day_of_week, t, 10U, ×); - EXPECT_GT(11U, times.size()); - et.hour = 0; - et.minute = 0; - et.second = 0; - et.millisecond = 0; - for (size_t i = 0; i < times.size(); ++i) { - base::Time t_interval(base::Time::FromLocalExploded(et)); - t_interval -= base::TimeDelta::FromDays(7 * i); - EXPECT_EQ(t_interval, times[i].first) << "Fails at index:" << i; - EXPECT_EQ(t_interval + base::TimeDelta::FromDays(1), times[i].second) << - "Fails at index:" << i; - } -} - -TEST_F(VisitFilterTest, GetTimesOnTheSameDayType) { - base::Time::Exploded et = { 2011, 7, 0, 19, 22, 15, 11, 0 }; - base::Time t(base::Time::FromLocalExploded(et)); - VisitFilter::TimeVector times; - t.LocalExplode(&et); - VisitFilter::GetTimesOnTheSameDayType(et.day_of_week, t, 10U, ×); - EXPECT_GT(11U, times.size()); - et.hour = 0; - et.minute = 0; - et.second = 0; - et.millisecond = 0; - base::Time t_start(base::Time::FromLocalExploded(et)); - base::TimeDelta t_length; - if (et.day_of_week == 0 || et.day_of_week == 6) { - // Sunday and Saturday. - t_length = base::TimeDelta::FromDays(2); - if (et.day_of_week == 0) - t_start -= base::TimeDelta::FromDays(1); - } else { - t_length = base::TimeDelta::FromDays(5); - if (et.day_of_week != 1) - t_start -= base::TimeDelta::FromDays(et.day_of_week - 1); - } - for (size_t i = 0; i < times.size(); ++i) { - base::Time t_interval(t_start); - t_interval -= base::TimeDelta::FromDays(7 * i); - EXPECT_EQ(t_interval, times[i].first) << "Fails at index:" << i; - EXPECT_EQ(t_interval + t_length, times[i].second) << "Fails at index:" << i; - } -} - -TEST_F(VisitFilterTest, UniteTimeVectors) { - base::Time t(base::Time::Now()); - base::TimeDelta one_hour(base::TimeDelta::FromHours(1)); - base::TimeDelta one_day(base::TimeDelta::FromDays(1)); - VisitFilter::TimeVector times1; - times1.push_back(std::make_pair(t - one_hour, t + one_hour)); - times1.push_back(std::make_pair(t - one_hour - one_day, - t + one_hour - one_day)); - times1.push_back(std::make_pair(t - one_hour - one_day * 2, - t + one_hour - one_day * 2)); - times1.push_back(std::make_pair(t - one_hour - one_day * 3, - t + one_hour - one_day * 3)); - - VisitFilter::TimeVector times2; - // Should lie completely within times1[0]. - times2.push_back(std::make_pair(t - one_hour / 2, t + one_hour / 2)); - // Should lie just before times1[1]. - times2.push_back(std::make_pair(t + one_hour * 2 - one_day, - t + one_hour * 3 - one_day)); - // Should intersect with times1. - times2.push_back(std::make_pair(t - one_day * 2, - t + one_hour * 2 - one_day * 2)); - times2.push_back(std::make_pair(t - one_hour * 2 - one_day * 3, - t - one_day * 3)); - - VisitFilter::TimeVector result; - EXPECT_TRUE(VisitFilter::UniteTimeVectors(times1, times2, &result)); - ASSERT_EQ(5U, result.size()); - EXPECT_EQ(t - one_hour, result[0].first); - EXPECT_EQ(t + one_hour, result[0].second); - EXPECT_EQ(t + one_hour * 2 - one_day, result[1].first); - EXPECT_EQ(t + one_hour * 3 - one_day, result[1].second); - EXPECT_EQ(t - one_hour - one_day, result[2].first); - EXPECT_EQ(t + one_hour - one_day, result[2].second); - EXPECT_EQ(t - one_hour - one_day * 2, result[3].first); - EXPECT_EQ(t + one_hour * 2 - one_day * 2, result[3].second); - EXPECT_EQ(t - one_hour * 2 - one_day * 3, result[4].first); - EXPECT_EQ(t + one_hour - one_day * 3, result[4].second); - - EXPECT_FALSE(VisitFilter::UniteTimeVectors(VisitFilter::TimeVector(), - VisitFilter::TimeVector(), - &result)); - EXPECT_TRUE(result.empty()); -} - -TEST_F(VisitFilterTest, IntersectTimeVectors) { - base::Time t(base::Time::Now()); - base::TimeDelta one_hour(base::TimeDelta::FromHours(1)); - base::TimeDelta one_day(base::TimeDelta::FromDays(1)); - VisitFilter::TimeVector times1; - times1.push_back(std::make_pair(t - one_hour, t + one_hour)); - - VisitFilter::TimeVector times2; - // Should lie just before times1[0]. - times2.push_back(std::make_pair(t + one_hour * 2, - t + one_hour * 3)); - - VisitFilter::TimeVector result; - EXPECT_FALSE(VisitFilter::IntersectTimeVectors(times1, times2, &result)); - EXPECT_TRUE(result.empty()); - - times1.push_back(std::make_pair(t - one_hour - one_day, - t + one_hour - one_day)); - times1.push_back(std::make_pair(t - one_hour - one_day * 2, - t + one_hour - one_day * 2)); - times1.push_back(std::make_pair(t - one_hour - one_day * 3, - t + one_hour - one_day * 3)); - - // Should lie completely within times1[1]. - times2.push_back(std::make_pair(t - one_hour / 2 - one_day, - t + one_hour / 2 - one_day)); - // Should intersect with times1. - times2.push_back(std::make_pair(t - one_day * 2, - t + one_hour * 2 - one_day * 2)); - times2.push_back(std::make_pair(t - one_hour * 2 - one_day * 3, - t - one_day * 3)); - - EXPECT_TRUE(VisitFilter::IntersectTimeVectors(times1, times2, &result)); - ASSERT_EQ(3U, result.size()); - EXPECT_EQ(t - one_hour / 2 - one_day, result[0].first); - EXPECT_EQ(t + one_hour / 2 - one_day, result[0].second); - EXPECT_EQ(t - one_day * 2, result[1].first); - EXPECT_EQ(t + one_hour - one_day * 2, result[1].second); - EXPECT_EQ(t - one_hour - one_day * 3, result[2].first); - EXPECT_EQ(t - one_day * 3, result[2].second); - - // Check that touching ranges do not intersect. - times1.clear(); - times1.push_back(std::make_pair(t - one_hour, t)); - times2.clear(); - times2.push_back(std::make_pair(t, t + one_hour)); - EXPECT_FALSE(VisitFilter::IntersectTimeVectors(times1, times2, &result)); - EXPECT_TRUE(result.empty()); -} - -TEST_F(VisitFilterTest, GetVisitScore) { - base::Time filter_time; - ASSERT_TRUE(base::Time::FromString("Tue, 24 Apr 2012, 12:00:00", - &filter_time)); - VisitFilter filter; - VisitRow visit; - - filter.set_sorting_order(VisitFilter::ORDER_BY_RECENCY); - filter.SetFilterTime(filter_time); - filter.SetFilterWidth(base::TimeDelta::FromHours(1)); - - double one_week_one_hour_staleness = pow(2, -(24.0 * 7.0 + 1.0) / - (24.0 * 7.0)); - - // No decay on current visit. - visit.visit_time = filter_time; - EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); - // Half score after a week. - visit.visit_time = filter_time - base::TimeDelta::FromDays(7); - EXPECT_DOUBLE_EQ(0.5, filter.GetVisitScore(visit)); - // Future visits should be treated as current. - visit.visit_time = filter_time + base::TimeDelta::FromDays(1); - EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); - - filter.set_sorting_order(VisitFilter::ORDER_BY_VISIT_COUNT); - // Every visit should score 1 with this filter. - visit.visit_time = filter_time; - EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); - visit.visit_time = filter_time - base::TimeDelta::FromDays(7); - EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); - visit.visit_time = filter_time + base::TimeDelta::FromDays(7); - EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); - - filter.set_sorting_order(VisitFilter::ORDER_BY_TIME_LINEAR); - visit.visit_time = filter_time; - EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); - // Half the filter width forward in time should get half the score for the - // time difference, but no staleness decay. - visit.visit_time = filter_time + base::TimeDelta::FromMinutes(30); - EXPECT_DOUBLE_EQ(0.5, filter.GetVisitScore(visit)); - // One week back in time gets full time difference score, but a staleness - // factor of 0.5 - visit.visit_time = filter_time - base::TimeDelta::FromDays(7); - EXPECT_DOUBLE_EQ(0.5, filter.GetVisitScore(visit)); - // One week plus half a filter width should have it's score halved before - // the staleness factor. - filter.SetFilterWidth(base::TimeDelta::FromHours(2)); - visit.visit_time = filter_time - base::TimeDelta::FromDays(7) - - base::TimeDelta::FromHours(1); - EXPECT_DOUBLE_EQ(0.5 * one_week_one_hour_staleness, - filter.GetVisitScore(visit)); - filter.SetFilterWidth(base::TimeDelta::FromHours(1)); - - filter.set_sorting_order(VisitFilter::ORDER_BY_TIME_GAUSSIAN); - visit.visit_time = filter_time; - EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); - // Going forward in time to test the normal distribution function. - visit.visit_time = filter_time + base::TimeDelta::FromHours(1); - EXPECT_DOUBLE_EQ(exp(-0.5), filter.GetVisitScore(visit)); - visit.visit_time = filter_time + base::TimeDelta::FromMinutes(30); - EXPECT_DOUBLE_EQ(exp(-0.125), filter.GetVisitScore(visit)); - // One week back in time gets full time difference score, but a staleness - // factor of 0.5 - visit.visit_time = filter_time - base::TimeDelta::FromDays(7); - EXPECT_DOUBLE_EQ(0.5, filter.GetVisitScore(visit)); - // One standard deviation of decay, plus the staleness factor. - visit.visit_time = filter_time - base::TimeDelta::FromDays(7) - - base::TimeDelta::FromHours(1); - EXPECT_DOUBLE_EQ(exp(-0.5) * one_week_one_hour_staleness, - filter.GetVisitScore(visit)); -} - -} // namespace history diff --git a/chrome/browser/history/visit_tracker.cc b/chrome/browser/history/visit_tracker.cc deleted file mode 100644 index c541d87..0000000 --- a/chrome/browser/history/visit_tracker.cc +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/visit_tracker.h" - -#include "base/stl_util.h" - -namespace history { - -// When the list gets longer than 'MaxItems', CleanupTransitionList will resize -// the list down to 'ResizeTo' size. This is so we only do few block moves of -// the data rather than constantly shuffle stuff around in the vector. -static const size_t kMaxItemsInTransitionList = 96; -static const size_t kResizeBigTransitionListTo = 64; -static_assert(kResizeBigTransitionListTo < kMaxItemsInTransitionList, - "maxium number of items must be larger than we are resizing to"); - -VisitTracker::VisitTracker() { -} - -VisitTracker::~VisitTracker() { - STLDeleteContainerPairSecondPointers(contexts_.begin(), contexts_.end()); -} - -// This function is potentially slow because it may do up to two brute-force -// searches of the transitions list. This transitions list is kept to a -// relatively small number by CleanupTransitionList so it shouldn't be a big -// deal. However, if this ends up being noticable for performance, we may want -// to optimize lookup. -VisitID VisitTracker::GetLastVisit(ContextID context_id, - int32 page_id, - const GURL& referrer) { - if (referrer.is_empty() || !context_id) - return 0; - - ContextList::iterator i = contexts_.find(context_id); - if (i == contexts_.end()) - return 0; // We don't have any entries for this context. - TransitionList& transitions = *i->second; - - // Recall that a page ID is associated with a single session history entry. - // In the case of automatically loaded iframes, many visits/URLs can have the - // same page ID. - // - // We search backwards, starting at the current page ID, for the referring - // URL. This won't always be correct. For example, if a render process has - // the same page open in two different tabs, or even in two different frames, - // we can get confused about which was which. We can have the renderer - // report more precise referrer information in the future, but this is a - // hard problem and doesn't affect much in terms of real-world issues. - // - // We assume that the page IDs are increasing over time, so larger IDs than - // the current input ID happened in the future (this will occur if the user - // goes back). We can ignore future transitions because if you navigate, go - // back, and navigate some more, we'd like to have one node with two out - // edges in our visit graph. - for (int i = static_cast(transitions.size()) - 1; i >= 0; i--) { - if (transitions[i].page_id <= page_id && transitions[i].url == referrer) { - // Found it. - return transitions[i].visit_id; - } - } - - // We can't find the referrer. - return 0; -} - -void VisitTracker::AddVisit(ContextID context_id, - int32 page_id, - const GURL& url, - VisitID visit_id) { - TransitionList* transitions = contexts_[context_id]; - if (!transitions) { - transitions = new TransitionList; - contexts_[context_id] = transitions; - } - - Transition t; - t.url = url; - t.page_id = page_id; - t.visit_id = visit_id; - transitions->push_back(t); - - CleanupTransitionList(transitions); -} - -void VisitTracker::ClearCachedDataForContextID(ContextID context_id) { - ContextList::iterator i = contexts_.find(context_id); - if (i == contexts_.end()) - return; // We don't have any entries for this context. - - delete i->second; - contexts_.erase(i); -} - - -void VisitTracker::CleanupTransitionList(TransitionList* transitions) { - if (transitions->size() <= kMaxItemsInTransitionList) - return; // Nothing to do. - - transitions->erase(transitions->begin(), - transitions->begin() + kResizeBigTransitionListTo); -} - -} // namespace history diff --git a/chrome/browser/history/visit_tracker.h b/chrome/browser/history/visit_tracker.h deleted file mode 100644 index 890a889..0000000 --- a/chrome/browser/history/visit_tracker.h +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_HISTORY_VISIT_TRACKER_H__ -#define CHROME_BROWSER_HISTORY_VISIT_TRACKER_H__ - -#include -#include - -#include "base/basictypes.h" -#include "components/history/core/browser/history_types.h" - -namespace history { - -// Tracks history transitions between pages. The history backend uses this to -// link up page transitions to form a chain of page visits, and to set the -// transition type properly. -// -// This class is not thread safe. -class VisitTracker { - public: - VisitTracker(); - ~VisitTracker(); - - // Notifications ------------------------------------------------------------- - - void AddVisit(ContextID context_id, - int32 page_id, - const GURL& url, - VisitID visit_id); - - // When a RenderProcessHost is destroyed, we want to clear out our saved - // transitions/visit IDs for it. - void ClearCachedDataForContextID(ContextID context_id); - - // Querying ------------------------------------------------------------------ - - // Returns the visit ID for the transition given information about the visit - // supplied by the renderer. We will return 0 if there is no appropriate - // referring visit. - VisitID GetLastVisit(ContextID context_id, int32 page_id, const GURL& url); - - private: - struct Transition { - GURL url; // URL that the event happened to. - int32 page_id; // ID generated by the render process host. - VisitID visit_id; // Visit ID generated by history. - }; - typedef std::vector TransitionList; - typedef std::map ContextList; - - // Expires oldish items in the given transition list. This keeps the list - // size small by removing items that are unlikely to be needed, which is - // important for GetReferrer which does brute-force searches of this list. - void CleanupTransitionList(TransitionList* transitions); - - // Maps render view hosts to lists of recent transitions. - ContextList contexts_; - - DISALLOW_COPY_AND_ASSIGN(VisitTracker); -}; - -} // namespace history - -#endif // CHROME_BROWSER_HISTORY_VISIT_TRACKER_H__ diff --git a/chrome/browser/history/visit_tracker_unittest.cc b/chrome/browser/history/visit_tracker_unittest.cc deleted file mode 100644 index 4ed609c..0000000 --- a/chrome/browser/history/visit_tracker_unittest.cc +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/visit_tracker.h" -#include "base/basictypes.h" -#include "testing/gtest/include/gtest/gtest.h" - -using history::ContextID; -using history::VisitTracker; - -namespace { - -struct VisitToTest { - // Identifies the context. - int context_id_int; - int32 page_id; - - // Used when adding this to the tracker - const char* url; - const history::VisitID visit_id; - - // Used when finding the referrer - const char* referrer; - - // the correct referring visit ID to compare to the computed one - history::VisitID referring_visit_id; -}; - -void RunTest(VisitTracker* tracker, VisitToTest* test, int test_count) { - for (int i = 0; i < test_count; i++) { - // Our host pointer is actually just an int, convert it (it will not get - // dereferenced). - ContextID context_id = reinterpret_cast(test[i].context_id_int); - - // Check the referrer for this visit. - history::VisitID ref_visit = tracker->GetLastVisit( - context_id, test[i].page_id, GURL(test[i].referrer)); - EXPECT_EQ(test[i].referring_visit_id, ref_visit); - - // Now add this visit. - tracker->AddVisit( - context_id, test[i].page_id, GURL(test[i].url), test[i].visit_id); - } -} - -} // namespace - -// A simple test that makes sure we transition between main pages in the -// presence of back/forward. -TEST(VisitTracker, SimpleTransitions) { - VisitToTest test_simple[] = { - // Started here: - {1, 1, "http://www.google.com/", 1, "", 0}, - // Clicked a link: - {1, 2, "http://images.google.com/", 2, "http://www.google.com/", 1}, - // Went back, then clicked a link: - {1, 3, "http://video.google.com/", 3, "http://www.google.com/", 1}, - }; - - VisitTracker tracker; - RunTest(&tracker, test_simple, arraysize(test_simple)); -} - -// Test that referrer is properly computed when there are different frame -// navigations happening. -TEST(VisitTracker, Frames) { - VisitToTest test_frames[] = { - // Started here: - {1, 1, "http://foo.com/", 1, "", 0}, - // Which had an auto-loaded subframe: - {1, 1, "http://foo.com/ad.html", 2, "http://foo.com/", 1}, - // ...and another auto-loaded subframe: - {1, 1, "http://foo.com/ad2.html", 3, "http://foo.com/", 1}, - // ...and the user navigated the first subframe to somwhere else - {1, 2, "http://bar.com/", 4, "http://foo.com/ad.html", 2}, - // ...and then the second subframe somewhere else - {1, 3, "http://fud.com/", 5, "http://foo.com/ad2.html", 3}, - // ...and then the main frame somewhere else. - {1, 4, "http://www.google.com/", 6, "http://foo.com/", 1}, - }; - - VisitTracker tracker; - RunTest(&tracker, test_frames, arraysize(test_frames)); -} - -// Test frame navigation to make sure that the referrer is properly computed -// when there are multiple processes navigating the same pages. -TEST(VisitTracker, MultiProcess) { - VisitToTest test_processes[] = { - // Process 1 and 2 start here: - {1, 1, "http://foo.com/", 1, "", 0}, - {2, 1, "http://foo.com/", 2, "", 0}, - // They have some subframes: - {1, 1, "http://foo.com/ad.html", 3, "http://foo.com/", 1}, - {2, 1, "http://foo.com/ad.html", 4, "http://foo.com/", 2}, - // Subframes are navigated: - {1, 2, "http://bar.com/", 5, "http://foo.com/ad.html", 3}, - {2, 2, "http://bar.com/", 6, "http://foo.com/ad.html", 4}, - // Main frame is navigated: - {1, 3, "http://www.google.com/", 7, "http://foo.com/", 1}, - {2, 3, "http://www.google.com/", 8, "http://foo.com/", 2}, - }; - - VisitTracker tracker; - RunTest(&tracker, test_processes, arraysize(test_processes)); -} - -// Test that processes get removed properly. -TEST(VisitTracker, ProcessRemove) { - // Simple navigation from one process. - VisitToTest part1[] = { - {1, 1, "http://www.google.com/", 1, "", 0}, - {1, 2, "http://images.google.com/", 2, "http://www.google.com/", 1}, - }; - - VisitTracker tracker; - RunTest(&tracker, part1, arraysize(part1)); - - // Say that context has been invalidated. - tracker.ClearCachedDataForContextID(reinterpret_cast(1)); - - // Simple navigation from a new process with the same ID, it should not find - // a referrer. - VisitToTest part2[] = { - {1, 1, "http://images.google.com/", 2, "http://www.google.com/", 0}, - }; - RunTest(&tracker, part2, arraysize(part2)); -} diff --git a/chrome/browser/history/visitsegment_database.cc b/chrome/browser/history/visitsegment_database.cc deleted file mode 100644 index 3a54c3d..0000000 --- a/chrome/browser/history/visitsegment_database.cc +++ /dev/null @@ -1,327 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/visitsegment_database.h" - -#include - -#include -#include -#include - -#include "base/logging.h" -#include "base/stl_util.h" -#include "base/strings/string_util.h" -#include "base/strings/utf_string_conversions.h" -#include "components/history/core/browser/page_usage_data.h" -#include "sql/statement.h" -#include "sql/transaction.h" - -// The following tables are used to store url segment information. -// -// segments -// id Primary key -// name A unique string to represent that segment. (URL derived) -// url_id ID of the url currently used to represent this segment. -// -// segment_usage -// id Primary key -// segment_id Corresponding segment id -// time_slot time stamp identifying for what day this entry is about -// visit_count Number of visit in the segment -// - -namespace history { - -VisitSegmentDatabase::VisitSegmentDatabase() { -} - -VisitSegmentDatabase::~VisitSegmentDatabase() { -} - -bool VisitSegmentDatabase::InitSegmentTables() { - // Segments table. - if (!GetDB().DoesTableExist("segments")) { - if (!GetDB().Execute("CREATE TABLE segments (" - "id INTEGER PRIMARY KEY," - "name VARCHAR," - "url_id INTEGER NON NULL)")) { - return false; - } - - if (!GetDB().Execute( - "CREATE INDEX segments_name ON segments(name)")) { - return false; - } - } - - // This was added later, so we need to try to create it even if the table - // already exists. - if (!GetDB().Execute("CREATE INDEX IF NOT EXISTS segments_url_id ON " - "segments(url_id)")) - return false; - - // Segment usage table. - if (!GetDB().DoesTableExist("segment_usage")) { - if (!GetDB().Execute("CREATE TABLE segment_usage (" - "id INTEGER PRIMARY KEY," - "segment_id INTEGER NOT NULL," - "time_slot INTEGER NOT NULL," - "visit_count INTEGER DEFAULT 0 NOT NULL)")) { - return false; - } - if (!GetDB().Execute( - "CREATE INDEX segment_usage_time_slot_segment_id ON " - "segment_usage(time_slot, segment_id)")) { - return false; - } - } - - // Added in a later version, so we always need to try to creat this index. - if (!GetDB().Execute("CREATE INDEX IF NOT EXISTS segments_usage_seg_id " - "ON segment_usage(segment_id)")) - return false; - - return true; -} - -bool VisitSegmentDatabase::DropSegmentTables() { - // Dropping the tables will implicitly delete the indices. - return GetDB().Execute("DROP TABLE segments") && - GetDB().Execute("DROP TABLE segment_usage"); -} - -// Note: the segment name is derived from the URL but is not a URL. It is -// a string that can be easily recreated from various URLS. Maybe this should -// be an MD5 to limit the length. -// -// static -std::string VisitSegmentDatabase::ComputeSegmentName(const GURL& url) { - // TODO(brettw) this should probably use the registry controlled - // domains service. - GURL::Replacements r; - const char kWWWDot[] = "www."; - const int kWWWDotLen = arraysize(kWWWDot) - 1; - - std::string host = url.host(); - const char* host_c = host.c_str(); - // Remove www. to avoid some dups. - if (static_cast(host.size()) > kWWWDotLen && - LowerCaseEqualsASCII(host_c, host_c + kWWWDotLen, kWWWDot)) { - r.SetHost(host.c_str(), - url::Component(kWWWDotLen, - static_cast(host.size()) - kWWWDotLen)); - } - // Remove other stuff we don't want. - r.ClearUsername(); - r.ClearPassword(); - r.ClearQuery(); - r.ClearRef(); - r.ClearPort(); - - return url.ReplaceComponents(r).spec(); -} - -SegmentID VisitSegmentDatabase::GetSegmentNamed( - const std::string& segment_name) { - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT id FROM segments WHERE name = ?")); - statement.BindString(0, segment_name); - - if (statement.Step()) - return statement.ColumnInt64(0); - return 0; -} - -bool VisitSegmentDatabase::UpdateSegmentRepresentationURL(SegmentID segment_id, - URLID url_id) { - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "UPDATE segments SET url_id = ? WHERE id = ?")); - statement.BindInt64(0, url_id); - statement.BindInt64(1, segment_id); - - return statement.Run(); -} - -URLID VisitSegmentDatabase::GetSegmentRepresentationURL(SegmentID segment_id) { - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT url_id FROM segments WHERE id = ?")); - statement.BindInt64(0, segment_id); - - if (statement.Step()) - return statement.ColumnInt64(0); - return 0; -} - -SegmentID VisitSegmentDatabase::CreateSegment(URLID url_id, - const std::string& segment_name) { - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "INSERT INTO segments (name, url_id) VALUES (?,?)")); - statement.BindString(0, segment_name); - statement.BindInt64(1, url_id); - - if (statement.Run()) - return GetDB().GetLastInsertRowId(); - return 0; -} - -bool VisitSegmentDatabase::IncreaseSegmentVisitCount(SegmentID segment_id, - base::Time ts, - int amount) { - base::Time t = ts.LocalMidnight(); - - sql::Statement select(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT id, visit_count FROM segment_usage " - "WHERE time_slot = ? AND segment_id = ?")); - select.BindInt64(0, t.ToInternalValue()); - select.BindInt64(1, segment_id); - - if (!select.is_valid()) - return false; - - if (select.Step()) { - sql::Statement update(GetDB().GetCachedStatement(SQL_FROM_HERE, - "UPDATE segment_usage SET visit_count = ? WHERE id = ?")); - update.BindInt64(0, select.ColumnInt64(1) + static_cast(amount)); - update.BindInt64(1, select.ColumnInt64(0)); - - return update.Run(); - } else { - sql::Statement insert(GetDB().GetCachedStatement(SQL_FROM_HERE, - "INSERT INTO segment_usage " - "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); - insert.BindInt64(0, segment_id); - insert.BindInt64(1, t.ToInternalValue()); - insert.BindInt64(2, static_cast(amount)); - - return insert.Run(); - } -} - -void VisitSegmentDatabase::QuerySegmentUsage( - base::Time from_time, - int max_result_count, - std::vector* results) { - // This function gathers the highest-ranked segments in two queries. - // The first gathers scores for all segments. - // The second gathers segment data (url, title, etc.) for the highest-ranked - // segments. - - // Gather all the segment scores. - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT segment_id, time_slot, visit_count " - "FROM segment_usage WHERE time_slot >= ? " - "ORDER BY segment_id")); - if (!statement.is_valid()) - return; - - base::Time ts = from_time.LocalMidnight(); - statement.BindInt64(0, ts.ToInternalValue()); - - base::Time now = base::Time::Now(); - SegmentID last_segment_id = 0; - PageUsageData* pud = NULL; - float score = 0; - while (statement.Step()) { - SegmentID segment_id = statement.ColumnInt64(0); - if (segment_id != last_segment_id) { - if (pud) { - pud->SetScore(score); - results->push_back(pud); - } - - pud = new PageUsageData(segment_id); - score = 0; - last_segment_id = segment_id; - } - - base::Time timeslot = - base::Time::FromInternalValue(statement.ColumnInt64(1)); - int visit_count = statement.ColumnInt(2); - int days_ago = (now - timeslot).InDays(); - - // Score for this day in isolation. - float day_visits_score = 1.0f + log(static_cast(visit_count)); - // Recent visits count more than historical ones, so we multiply in a boost - // related to how long ago this day was. - // This boost is a curve that smoothly goes through these values: - // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x - // at the limit of how far we reach into the past. - float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); - score += recency_boost * day_visits_score; - } - - if (pud) { - pud->SetScore(score); - results->push_back(pud); - } - - // Limit to the top kResultCount results. - std::sort(results->begin(), results->end(), PageUsageData::Predicate); - if (static_cast(results->size()) > max_result_count) { - STLDeleteContainerPointers(results->begin() + max_result_count, - results->end()); - results->resize(max_result_count); - } - - // Now fetch the details about the entries we care about. - sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT urls.url, urls.title FROM urls " - "JOIN segments ON segments.url_id = urls.id " - "WHERE segments.id = ?")); - - if (!statement2.is_valid()) - return; - - for (size_t i = 0; i < results->size(); ++i) { - PageUsageData* pud = (*results)[i]; - statement2.BindInt64(0, pud->GetID()); - if (statement2.Step()) { - pud->SetURL(GURL(statement2.ColumnString(0))); - pud->SetTitle(statement2.ColumnString16(1)); - } - statement2.Reset(true); - } -} - -bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "DELETE FROM segment_usage WHERE time_slot < ?")); - statement.BindInt64(0, older_than.LocalMidnight().ToInternalValue()); - - return statement.Run(); -} - -bool VisitSegmentDatabase::DeleteSegmentForURL(URLID url_id) { - sql::Statement delete_usage(GetDB().GetCachedStatement(SQL_FROM_HERE, - "DELETE FROM segment_usage WHERE segment_id IN " - "(SELECT id FROM segments WHERE url_id = ?)")); - delete_usage.BindInt64(0, url_id); - - if (!delete_usage.Run()) - return false; - - sql::Statement delete_seg(GetDB().GetCachedStatement(SQL_FROM_HERE, - "DELETE FROM segments WHERE url_id = ?")); - delete_seg.BindInt64(0, url_id); - - return delete_seg.Run(); -} - -bool VisitSegmentDatabase::MigratePresentationIndex() { - sql::Transaction transaction(&GetDB()); - return transaction.Begin() && - GetDB().Execute("DROP TABLE presentation") && - GetDB().Execute("CREATE TABLE segments_tmp (" - "id INTEGER PRIMARY KEY," - "name VARCHAR," - "url_id INTEGER NON NULL)") && - GetDB().Execute("INSERT INTO segments_tmp SELECT " - "id, name, url_id FROM segments") && - GetDB().Execute("DROP TABLE segments") && - GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && - transaction.Commit(); -} - -} // namespace history diff --git a/chrome/browser/history/visitsegment_database.h b/chrome/browser/history/visitsegment_database.h deleted file mode 100644 index 2302978..0000000 --- a/chrome/browser/history/visitsegment_database.h +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_HISTORY_VISITSEGMENT_DATABASE_H_ -#define CHROME_BROWSER_HISTORY_VISITSEGMENT_DATABASE_H_ - -#include "base/basictypes.h" -#include "components/history/core/browser/history_types.h" - -class PageUsageData; - -namespace sql { -class Connection; -} - -namespace history { - -// Tracks pages used for the most visited view. -class VisitSegmentDatabase { - public: - // Must call InitSegmentTables before using any other part of this class. - VisitSegmentDatabase(); - virtual ~VisitSegmentDatabase(); - - // Compute a segment name given a URL. The segment name is currently the - // source url spec less some information such as query strings. - static std::string ComputeSegmentName(const GURL& url); - - // Returns the ID of the segment with the corresponding name, or 0 if there - // is no segment with that name. - SegmentID GetSegmentNamed(const std::string& segment_name); - - // Update the segment identified by |out_segment_id| with the provided URL ID. - // The URL identifies the page that will now represent the segment. If url_id - // is non zero, it is assumed to be the row id of |url|. - bool UpdateSegmentRepresentationURL(SegmentID segment_id, - URLID url_id); - - // Return the ID of the URL currently used to represent this segment or 0 if - // an error occured. - URLID GetSegmentRepresentationURL(SegmentID segment_id); - - // Create a segment for the provided URL ID with the given name. Returns the - // ID of the newly created segment, or 0 on failure. - SegmentID CreateSegment(URLID url_id, const std::string& segment_name); - - // Increase the segment visit count by the provided amount. Return true on - // success. - bool IncreaseSegmentVisitCount(SegmentID segment_id, base::Time ts, - int amount); - - // Compute the segment usage since |from_time| using the provided aggregator. - // A PageUsageData is added in |result| for the highest-scored segments up to - // |max_result_count|. - void QuerySegmentUsage(base::Time from_time, - int max_result_count, - std::vector* result); - - // Delete all the segment usage data which is older than the provided time - // stamp. - bool DeleteSegmentData(base::Time older_than); - - // Change the presentation id for the segment identified by |segment_id| - bool SetSegmentPresentationIndex(SegmentID segment_id, int index); - - // Delete the segment currently using the provided url for representation. - // This will also delete any associated segment usage data. - bool DeleteSegmentForURL(URLID url_id); - - protected: - // Returns the database for the functions in this interface. - virtual sql::Connection& GetDB() = 0; - - // Creates the tables used by this class if necessary. Returns true on - // success. - bool InitSegmentTables(); - - // Deletes all the segment tables, returning true on success. - bool DropSegmentTables(); - - // Removes the 'pres_index' column from the segments table and the - // presentation table is removed entirely. - bool MigratePresentationIndex(); - - private: - DISALLOW_COPY_AND_ASSIGN(VisitSegmentDatabase); -}; - -} // namespace history - -#endif // CHROME_BROWSER_HISTORY_VISITSEGMENT_DATABASE_H_ diff --git a/chrome/browser/prerender/prerender_local_predictor.h b/chrome/browser/prerender/prerender_local_predictor.h index e91ba31..8734d69 100644 --- a/chrome/browser/prerender/prerender_local_predictor.h +++ b/chrome/browser/prerender/prerender_local_predictor.h @@ -14,8 +14,8 @@ #include "base/scoped_observer.h" #include "base/task/cancelable_task_tracker.h" #include "base/timer/timer.h" -#include "chrome/browser/history/visit_database.h" #include "components/history/core/browser/history_service_observer.h" +#include "components/history/core/browser/visit_database.h" #include "content/public/browser/session_storage_namespace.h" #include "net/url_request/url_fetcher_delegate.h" #include "url/gurl.h" diff --git a/chrome/browser/ui/BUILD.gn b/chrome/browser/ui/BUILD.gn index 41e2328..6ca4613 100644 --- a/chrome/browser/ui/BUILD.gn +++ b/chrome/browser/ui/BUILD.gn @@ -41,7 +41,6 @@ static_library("ui") { "//chrome:strings", "//chrome/app/resources:platform_locale_settings", "//chrome/app/theme:theme_resources", - "//chrome/browser/history:in_memory_url_index_cache_proto", "//chrome/browser/net:cert_logger_proto", "//chrome/common", "//chrome/common/net", @@ -49,6 +48,7 @@ static_library("ui") { "//components/auto_login_parser", "//components/dom_distiller/webui", "//components/feedback/proto", + "//components/history/core/browser:proto", "//components/invalidation", "//components/omaha_client", "//components/onc", diff --git a/chrome/browser/ui/webui/ntp/suggestions_source_top_sites.cc b/chrome/browser/ui/webui/ntp/suggestions_source_top_sites.cc index 93b5204..49fb628 100644 --- a/chrome/browser/ui/webui/ntp/suggestions_source_top_sites.cc +++ b/chrome/browser/ui/webui/ntp/suggestions_source_top_sites.cc @@ -10,13 +10,14 @@ #include "base/stl_util.h" #include "base/strings/string_number_conversions.h" #include "base/values.h" +#include "chrome/browser/history/history_service.h" #include "chrome/browser/history/history_service_factory.h" #include "chrome/browser/history/top_sites.h" -#include "chrome/browser/history/visit_filter.h" #include "chrome/browser/profiles/profile.h" #include "chrome/browser/ui/webui/ntp/new_tab_ui.h" #include "chrome/browser/ui/webui/ntp/suggestions_combiner.h" #include "chrome/common/chrome_switches.h" +#include "components/history/core/browser/visit_filter.h" namespace { diff --git a/chrome/browser/ui/webui/ntp/suggestions_source_top_sites.h b/chrome/browser/ui/webui/ntp/suggestions_source_top_sites.h index 818397c..3d22043 100644 --- a/chrome/browser/ui/webui/ntp/suggestions_source_top_sites.h +++ b/chrome/browser/ui/webui/ntp/suggestions_source_top_sites.h @@ -9,9 +9,9 @@ #include "base/basictypes.h" #include "base/task/cancelable_task_tracker.h" -#include "chrome/browser/history/visit_filter.h" #include "chrome/browser/ui/webui/ntp/suggestions_source.h" #include "components/history/core/browser/history_types.h" +#include "components/history/core/browser/visit_filter.h" class SuggestionsCombiner; class Profile; diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index 2d04f84..77978be 100644 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -1530,20 +1530,14 @@ 'browser/history/in_memory_history_backend.h', 'browser/history/in_memory_url_index.cc', 'browser/history/in_memory_url_index.h', - 'browser/history/in_memory_url_index_types.cc', - 'browser/history/in_memory_url_index_types.h', 'browser/history/scored_history_match.cc', 'browser/history/scored_history_match.h', 'browser/history/shortcuts_database.cc', 'browser/history/shortcuts_database.h', - 'browser/history/thumbnail_database.cc', - 'browser/history/thumbnail_database.h', 'browser/history/top_sites.cc', 'browser/history/top_sites.h', 'browser/history/top_sites_backend.cc', 'browser/history/top_sites_backend.h', - 'browser/history/top_sites_cache.cc', - 'browser/history/top_sites_cache.h', 'browser/history/top_sites_database.cc', 'browser/history/top_sites_database.h', 'browser/history/top_sites_impl.cc', @@ -1552,16 +1546,6 @@ 'browser/history/typed_url_syncable_service.h', 'browser/history/url_index_private_data.cc', 'browser/history/url_index_private_data.h', - 'browser/history/url_utils.cc', - 'browser/history/url_utils.h', - 'browser/history/visit_database.cc', - 'browser/history/visit_database.h', - 'browser/history/visit_filter.cc', - 'browser/history/visit_filter.h', - 'browser/history/visit_tracker.cc', - 'browser/history/visit_tracker.h', - 'browser/history/visitsegment_database.cc', - 'browser/history/visitsegment_database.h', 'browser/history/web_history_service.cc', 'browser/history/web_history_service.h', 'browser/history/web_history_service_factory.cc', @@ -2895,7 +2879,6 @@ 'chrome_resources.gyp:theme_resources', 'common', 'common_net', - 'in_memory_url_index_cache_proto', 'probe_message_proto', '../components/components.gyp:autofill_core_browser', '../components/components.gyp:bookmarks_browser', @@ -3628,19 +3611,6 @@ }, 'includes': [ '../build/protoc.gypi' ] }, - { - # Protobuf compiler / generator for the InMemoryURLIndex caching - # protocol buffer. - # GN version: //chrome/browser/history:in_memory_url_index_cache_proto - 'target_name': 'in_memory_url_index_cache_proto', - 'type': 'static_library', - 'sources': [ 'browser/history/in_memory_url_index_cache.proto' ], - 'variables': { - 'proto_in_dir': 'browser/history', - 'proto_out_dir': 'chrome/browser/history', - }, - 'includes': [ '../build/protoc.gypi' ] - }, ], 'conditions': [ ['OS=="android"', { diff --git a/chrome/chrome_browser_chromeos.gypi b/chrome/chrome_browser_chromeos.gypi index b187589..163695a 100644 --- a/chrome/chrome_browser_chromeos.gypi +++ b/chrome/chrome_browser_chromeos.gypi @@ -1065,7 +1065,6 @@ 'debugger', 'device_policy_proto', 'drive_proto', - 'in_memory_url_index_cache_proto', 'installer_util', 'safe_browsing_chunk_proto', 'safe_browsing_proto', @@ -1080,6 +1079,7 @@ '../chromeos/chromeos.gyp:power_manager_proto', '../chromeos/ime/input_method.gyp:gencode', '../components/components.gyp:cloud_policy_proto', + '../components/components.gyp:history_core_browser_proto', '../components/components.gyp:login', '../components/components.gyp:onc_component', '../components/components.gyp:ownership', diff --git a/chrome/chrome_browser_extensions.gypi b/chrome/chrome_browser_extensions.gypi index 0140298..4cb3be8 100644 --- a/chrome/chrome_browser_extensions.gypi +++ b/chrome/chrome_browser_extensions.gypi @@ -920,10 +920,10 @@ 'common/extensions/api/api.gyp:chrome_api', 'common_net', 'debugger', - 'in_memory_url_index_cache_proto', 'installer_util', 'safe_browsing_proto', '../components/components.gyp:copresence', + '../components/components.gyp:history_core_browser_proto', '../components/components.gyp:omaha_client', '../components/components.gyp:onc_component', '../components/components.gyp:proximity_auth', diff --git a/chrome/chrome_browser_ui.gypi b/chrome/chrome_browser_ui.gypi index 3db256d8..918eccf 100644 --- a/chrome/chrome_browser_ui.gypi +++ b/chrome/chrome_browser_ui.gypi @@ -2664,11 +2664,11 @@ 'chrome_resources.gyp:theme_resources', 'common', 'common_net', - 'in_memory_url_index_cache_proto', '../components/components.gyp:auto_login_parser', '../components/components.gyp:dom_distiller_core', '../components/components.gyp:dom_distiller_webui', '../components/components.gyp:feedback_proto', + '../components/components.gyp:history_core_browser_proto', '../components/components.gyp:invalidation', '../components/components.gyp:omaha_client', '../components/components.gyp:onc_component', diff --git a/chrome/chrome_tests_unit.gypi b/chrome/chrome_tests_unit.gypi index 33108f6..0df0d0b 100644 --- a/chrome/chrome_tests_unit.gypi +++ b/chrome/chrome_tests_unit.gypi @@ -465,20 +465,14 @@ 'browser/history/history_unittest.cc', 'browser/history/history_unittest_base.cc', 'browser/history/history_unittest_base.h', - 'browser/history/in_memory_url_index_types_unittest.cc', 'browser/history/in_memory_url_index_unittest.cc', 'browser/history/scored_history_match_unittest.cc', 'browser/history/select_favicon_frames_unittest.cc', 'browser/history/shortcuts_database_unittest.cc', 'browser/history/thumbnail_database_unittest.cc', - 'browser/history/top_sites_cache_unittest.cc', 'browser/history/top_sites_database_unittest.cc', 'browser/history/top_sites_impl_unittest.cc', 'browser/history/typed_url_syncable_service_unittest.cc', - 'browser/history/url_utils_unittest.cc', - 'browser/history/visit_database_unittest.cc', - 'browser/history/visit_filter_unittest.cc', - 'browser/history/visit_tracker_unittest.cc', 'browser/history/web_history_service_unittest.cc', 'browser/image_holder_unittest.cc', 'browser/importer/firefox_profile_lock_unittest.cc', diff --git a/components/components_tests.gyp b/components/components_tests.gyp index 9392238..fba5778 100644 --- a/components/components_tests.gyp +++ b/components/components_tests.gyp @@ -146,7 +146,13 @@ 'google/core/browser/google_util_unittest.cc', 'history/core/android/android_history_types_unittest.cc', 'history/core/browser/history_types_unittest.cc', + 'history/core/browser/in_memory_url_index_types_unittest.cc', + 'history/core/browser/top_sites_cache_unittest.cc', 'history/core/browser/url_database_unittest.cc', + 'history/core/browser/url_utils_unittest.cc', + 'history/core/browser/visit_database_unittest.cc', + 'history/core/browser/visit_filter_unittest.cc', + 'history/core/browser/visit_tracker_unittest.cc', 'history/core/common/thumbnail_score_unittest.cc', 'invalidation/invalidation_logger_unittest.cc', 'json_schema/json_schema_validator_unittest.cc', diff --git a/components/history.gypi b/components/history.gypi index 622a540..f91843d 100644 --- a/components/history.gypi +++ b/components/history.gypi @@ -15,10 +15,12 @@ '../base/base.gyp:base', '../net/net.gyp:net', '../sql/sql.gyp:sql', + '../third_party/sqlite/sqlite.gyp:sqlite', '../ui/base/ui_base.gyp:ui_base', '../ui/gfx/gfx.gyp:gfx', '../url/url.gyp:url_lib', 'favicon_base', + 'history_core_browser_proto', 'keyed_service_core', 'query_parser', ], @@ -38,19 +40,50 @@ 'history/core/browser/history_types.h', 'history/core/browser/in_memory_database.cc', 'history/core/browser/in_memory_database.h', + 'history/core/browser/in_memory_url_index_types.cc', + 'history/core/browser/in_memory_url_index_types.h', 'history/core/browser/keyword_id.h', 'history/core/browser/keyword_search_term.cc', 'history/core/browser/keyword_search_term.h', 'history/core/browser/page_usage_data.cc', 'history/core/browser/page_usage_data.h', + 'history/core/browser/thumbnail_database.cc', + 'history/core/browser/thumbnail_database.h', + 'history/core/browser/top_sites_cache.cc', + 'history/core/browser/top_sites_cache.h', 'history/core/browser/top_sites_observer.h', 'history/core/browser/url_database.cc', 'history/core/browser/url_database.h', 'history/core/browser/url_row.cc', 'history/core/browser/url_row.h', + 'history/core/browser/url_utils.cc', + 'history/core/browser/url_utils.h', + 'history/core/browser/visit_database.cc', + 'history/core/browser/visit_database.h', + 'history/core/browser/visit_filter.cc', + 'history/core/browser/visit_filter.h', + 'history/core/browser/visit_tracker.cc', + 'history/core/browser/visit_tracker.h', + 'history/core/browser/visitsegment_database.cc', + 'history/core/browser/visitsegment_database.h', ], }, { + # GN version: //components/history/core/browser:proto + # Protobuf compiler / generator for the InMemoryURLIndex caching + # protocol buffer. + 'target_name': 'history_core_browser_proto', + 'type': 'static_library', + 'sources': [ + 'history/core/browser/in_memory_url_index_cache.proto', + ], + 'variables': { + 'proto_in_dir': 'history/core/browser', + 'proto_out_dir': 'components/history/core/browser', + }, + 'includes': [ '../build/protoc.gypi' ] + }, + { # GN version: //components/history/core/common 'target_name': 'history_core_common', 'type': 'static_library', diff --git a/components/history/DEPS b/components/history/DEPS index 28b1e19..5bd6d8d 100644 --- a/components/history/DEPS +++ b/components/history/DEPS @@ -4,6 +4,7 @@ include_rules = [ "+components/query_parser", "+net", "+sql", + "+third_party/sqlite", "+ui/base", "+ui/gfx", ] diff --git a/components/history/core/browser/BUILD.gn b/components/history/core/browser/BUILD.gn index ca03f0b..977ea49 100644 --- a/components/history/core/browser/BUILD.gn +++ b/components/history/core/browser/BUILD.gn @@ -2,6 +2,8 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. +import("//third_party/protobuf/proto_library.gni") + static_library("browser") { sources = [ "history_backend_notifier.h", @@ -18,16 +20,32 @@ static_library("browser") { "history_types.h", "in_memory_database.cc", "in_memory_database.h", + "in_memory_url_index_types.cc", + "in_memory_url_index_types.h", "keyword_id.h", "keyword_search_term.cc", "keyword_search_term.h", "page_usage_data.cc", "page_usage_data.h", + "thumbnail_database.cc", + "thumbnail_database.h", + "top_sites_cache.cc", + "top_sites_cache.h", "top_sites_observer.h", "url_database.cc", "url_database.h", "url_row.cc", "url_row.h", + "url_utils.cc", + "url_utils.h", + "visit_database.cc", + "visit_database.h", + "visit_filter.cc", + "visit_filter.h", + "visit_tracker.cc", + "visit_tracker.h", + "visitsegment_database.cc", + "visitsegment_database.h", ] deps = [ @@ -37,7 +55,15 @@ static_library("browser") { "//components/query_parser", "//net", "//sql", + "//third_party/sqlite", "//ui/base", "//ui/gfx", + "//url", + ] +} + +proto_library("proto") { + sources = [ + "in_memory_url_index_cache.proto", ] } diff --git a/components/history/core/browser/in_memory_url_index_cache.proto b/components/history/core/browser/in_memory_url_index_cache.proto new file mode 100644 index 0000000..df2de1b --- /dev/null +++ b/components/history/core/browser/in_memory_url_index_cache.proto @@ -0,0 +1,103 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// InMemoryURLIndex caching protocol buffers. +// +// At certain times during browser operation, the indexes from the +// InMemoryURLIndex are written to a disk-based cache using the +// following protobuf description. + +syntax = "proto2"; + +option optimize_for = LITE_RUNTIME; + +package in_memory_url_index; + +message InMemoryURLIndexCacheItem { + + message WordListItem { + required uint32 word_count = 1; + repeated string word = 2; + } + + message WordMapItem { + message WordMapEntry { + required string word = 1; + required int32 word_id = 2; + } + + required uint32 item_count = 1; + repeated WordMapEntry word_map_entry = 2; + } + + message CharWordMapItem { + message CharWordMapEntry { + required uint32 item_count = 1; + required int32 char_16 = 2; + repeated int32 word_id = 3 [packed=true]; + } + + required uint32 item_count = 1; + repeated CharWordMapEntry char_word_map_entry = 2; + } + + message WordIDHistoryMapItem { + message WordIDHistoryMapEntry { + required uint32 item_count = 1; + required int32 word_id = 2; + repeated int64 history_id = 3 [packed=true]; + } + + required uint32 item_count = 1; + repeated WordIDHistoryMapEntry word_id_history_map_entry = 2; + } + + message HistoryInfoMapItem { + message HistoryInfoMapEntry { + message VisitInfo { + required int64 visit_time = 1; + // Corresponds to ui::PageTransition. + required uint64 transition_type = 2; + } + required int64 history_id = 1; + required int32 visit_count = 2; + required int32 typed_count = 3; + required int64 last_visit = 4; + required string url = 5; + optional string title = 6; + repeated VisitInfo visits = 7; + } + + required uint32 item_count = 1; + repeated HistoryInfoMapEntry history_info_map_entry = 2; + } + + message WordStartsMapItem { + message WordStartsMapEntry { + required int64 history_id = 1; + repeated int32 url_word_starts = 2 [packed=true]; + repeated int32 title_word_starts = 3 [packed=true]; + } + + required uint32 item_count = 1; + repeated WordStartsMapEntry word_starts_map_entry = 2; + } + + // The date that the cache was last rebuilt from history. Note that + // this cache may include items that were visited after this date if + // the InMemoryURLIndex was updated on the fly. This timestamp is meant + // to indicate the last date the index was rebuilt from the ground truth: + // the history database on disk. + required int64 last_rebuild_timestamp = 1; + // If there is no version we'll assume version 0. + optional int32 version = 2; + required int32 history_item_count = 3; + + optional WordListItem word_list = 4; + optional WordMapItem word_map = 5; + optional CharWordMapItem char_word_map = 6; + optional WordIDHistoryMapItem word_id_history_map = 7; + optional HistoryInfoMapItem history_info_map = 8; + optional WordStartsMapItem word_starts_map = 9; +} diff --git a/components/history/core/browser/in_memory_url_index_types.cc b/components/history/core/browser/in_memory_url_index_types.cc new file mode 100644 index 0000000..25907b3 --- /dev/null +++ b/components/history/core/browser/in_memory_url_index_types.cc @@ -0,0 +1,163 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/in_memory_url_index_types.h" + +#include +#include +#include +#include +#include + +#include "base/i18n/break_iterator.h" +#include "base/i18n/case_conversion.h" +#include "base/strings/string_util.h" +#include "net/base/escape.h" +#include "net/base/net_util.h" + +namespace history { + +// Matches within URL and Title Strings ---------------------------------------- + +TermMatches MatchTermInString(const base::string16& term, + const base::string16& cleaned_string, + int term_num) { + const size_t kMaxCompareLength = 2048; + const base::string16& short_string = + (cleaned_string.length() > kMaxCompareLength) ? + cleaned_string.substr(0, kMaxCompareLength) : cleaned_string; + TermMatches matches; + for (size_t location = short_string.find(term); + location != base::string16::npos; + location = short_string.find(term, location + 1)) + matches.push_back(TermMatch(term_num, location, term.length())); + return matches; +} + +// Comparison function for sorting TermMatches by their offsets. +bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { + return m1.offset < m2.offset; +} + +TermMatches SortAndDeoverlapMatches(const TermMatches& matches) { + if (matches.empty()) + return matches; + TermMatches sorted_matches = matches; + std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); + TermMatches clean_matches; + TermMatch last_match; + for (TermMatches::const_iterator iter = sorted_matches.begin(); + iter != sorted_matches.end(); ++iter) { + if (iter->offset >= last_match.offset + last_match.length) { + last_match = *iter; + clean_matches.push_back(last_match); + } + } + return clean_matches; +} + +std::vector OffsetsFromTermMatches(const TermMatches& matches) { + std::vector offsets; + for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); + ++i) { + offsets.push_back(i->offset); + offsets.push_back(i->offset + i->length); + } + return offsets; +} + +TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, + const std::vector& offsets) { + DCHECK_EQ(2 * matches.size(), offsets.size()); + TermMatches new_matches; + std::vector::const_iterator offset_iter = offsets.begin(); + for (TermMatches::const_iterator term_iter = matches.begin(); + term_iter != matches.end(); ++term_iter, ++offset_iter) { + const size_t starting_offset = *offset_iter; + ++offset_iter; + const size_t ending_offset = *offset_iter; + if ((starting_offset != base::string16::npos) && + (ending_offset != base::string16::npos) && + (starting_offset != ending_offset)) { + TermMatch new_match(*term_iter); + new_match.offset = starting_offset; + new_match.length = ending_offset - starting_offset; + new_matches.push_back(new_match); + } + } + return new_matches; +} + +// Utility Functions ----------------------------------------------------------- + +String16Set String16SetFromString16(const base::string16& cleaned_uni_string, + WordStarts* word_starts) { + String16Vector words = + String16VectorFromString16(cleaned_uni_string, false, word_starts); + String16Set word_set; + for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); + ++iter) + word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxSignificantChars)); + return word_set; +} + +String16Vector String16VectorFromString16( + const base::string16& cleaned_uni_string, + bool break_on_space, + WordStarts* word_starts) { + if (word_starts) + word_starts->clear(); + base::i18n::BreakIterator iter(cleaned_uni_string, + break_on_space ? base::i18n::BreakIterator::BREAK_SPACE : + base::i18n::BreakIterator::BREAK_WORD); + String16Vector words; + if (!iter.Init()) + return words; + while (iter.Advance()) { + if (break_on_space || iter.IsWord()) { + base::string16 word(iter.GetString()); + size_t initial_whitespace = 0; + if (break_on_space) { + base::string16 trimmed_word; + base::TrimWhitespace(word, base::TRIM_LEADING, &trimmed_word); + initial_whitespace = word.length() - trimmed_word.length(); + base::TrimWhitespace(trimmed_word, base::TRIM_TRAILING, &word); + } + if (word.empty()) + continue; + words.push_back(word); + if (!word_starts) + continue; + size_t word_start = iter.prev() + initial_whitespace; + if (word_start < kMaxSignificantChars) + word_starts->push_back(word_start); + } + } + return words; +} + +Char16Set Char16SetFromString16(const base::string16& term) { + Char16Set characters; + for (base::string16::const_iterator iter = term.begin(); iter != term.end(); + ++iter) + characters.insert(*iter); + return characters; +} + +// HistoryInfoMapValue --------------------------------------------------------- + +HistoryInfoMapValue::HistoryInfoMapValue() {} +HistoryInfoMapValue::~HistoryInfoMapValue() {} + +// RowWordStarts --------------------------------------------------------------- + +RowWordStarts::RowWordStarts() {} +RowWordStarts::~RowWordStarts() {} + +void RowWordStarts::Clear() { + url_word_starts_.clear(); + title_word_starts_.clear(); +} + +} // namespace history diff --git a/components/history/core/browser/in_memory_url_index_types.h b/components/history/core/browser/in_memory_url_index_types.h new file mode 100644 index 0000000..9390c26 --- /dev/null +++ b/components/history/core/browser/in_memory_url_index_types.h @@ -0,0 +1,179 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_HISTORY_CORE_BROWSER_IN_MEMORY_URL_INDEX_TYPES_H_ +#define COMPONENTS_HISTORY_CORE_BROWSER_IN_MEMORY_URL_INDEX_TYPES_H_ + +#include +#include +#include + +#include "base/strings/string16.h" +#include "components/history/core/browser/history_types.h" +#include "url/gurl.h" + +namespace history { + +// The maximum number of characters to consider from an URL and page title +// while matching user-typed terms. +const size_t kMaxSignificantChars = 200; + +// Matches within URL and Title Strings ---------------------------------------- + +// Specifies where an omnibox term occurs within a string. Used for specifying +// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in +// scoring a result. +struct TermMatch { + TermMatch() : term_num(0), offset(0), length(0) {} + TermMatch(int term_num, size_t offset, size_t length) + : term_num(term_num), + offset(offset), + length(length) {} + + int term_num; // The index of the term in the original search string. + size_t offset; // The starting offset of the substring match. + size_t length; // The length of the substring match. +}; +typedef std::vector TermMatches; + +// Returns a TermMatches which has an entry for each occurrence of the +// string |term| found in the string |cleaned_string|. Use +// CleanUpUrlForMatching() or CleanUpUrlTitleMatching() before passing +// |cleaned_string| to this function. The function marks each match +// with |term_num| so that the resulting TermMatches can be merged +// with other TermMatches for other terms. Note that only the first +// 2,048 characters of |string| are considered during the match +// operation. +TermMatches MatchTermInString(const base::string16& term, + const base::string16& cleaned_string, + int term_num); + +// Sorts and removes overlapping substring matches from |matches| and +// returns the cleaned up matches. +TermMatches SortAndDeoverlapMatches(const TermMatches& matches); + +// Extracts and returns the offsets from |matches|. This includes both +// the offsets corresponding to the beginning of a match and the offsets +// corresponding to the end of a match (i.e., offset+length for that match). +std::vector OffsetsFromTermMatches(const TermMatches& matches); + +// Replaces the offsets and lengths in |matches| with those given in |offsets|. +// |offsets| gives beginning and ending offsets for each match; this function +// translates (beginning, ending) offset into (beginning offset, length of +// match). It deletes any matches for which an endpoint is npos and returns +// the updated list of matches. +TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, + const std::vector& offsets); + +// Convenience Types ----------------------------------------------------------- + +typedef std::vector String16Vector; +typedef std::set String16Set; +typedef std::set Char16Set; +typedef std::vector Char16Vector; + +// A vector that contains the offsets at which each word starts within a string. +typedef std::vector WordStarts; + +// Utility Functions ----------------------------------------------------------- + +// Breaks the string |cleaned_uni_string| down into individual words. +// Use CleanUpUrlForMatching() or CleanUpUrlTitleMatching() before +// passing |cleaned_uni_string| to this function. If |word_starts| is +// not NULL then clears and pushes the offsets within +// |cleaned_uni_string| at which each word starts onto +// |word_starts|. These offsets are collected only up to the first +// kMaxSignificantChars of |cleaned_uni_string|. +String16Set String16SetFromString16(const base::string16& cleaned_uni_string, + WordStarts* word_starts); + +// Breaks the |cleaned_uni_string| string down into individual words +// and return a vector with the individual words in their original +// order. Use CleanUpUrlForMatching() or CleanUpUrlTitleMatching() +// before passing |cleaned_uni_string| to this function. If +// |break_on_space| is false then the resulting list will contain only +// words containing alpha-numeric characters. If |break_on_space| is +// true then the resulting list will contain strings broken at +// whitespace. (|break_on_space| indicates that the +// BreakIterator::BREAK_SPACE (equivalent to BREAK_LINE) approach is +// to be used. For a complete description of this algorithm refer to +// the comments in base/i18n/break_iterator.h.) If |word_starts| is +// not NULL then clears and pushes the word starts onto |word_starts|. +// +// Example: +// Given: |cleaned_uni_string|: "http://www.google.com/ harry the rabbit." +// With |break_on_space| false the returned list will contain: +// "http", "www", "google", "com", "harry", "the", "rabbit" +// With |break_on_space| true the returned list will contain: +// "http://", "www.google.com/", "harry", "the", "rabbit." +String16Vector String16VectorFromString16( + const base::string16& cleaned_uni_string, + bool break_on_space, + WordStarts* word_starts); + +// Breaks the |uni_word| string down into its individual characters. +// Note that this is temporarily intended to work on a single word, but +// _will_ work on a string of words, perhaps with unexpected results. +// TODO(mrossetti): Lots of optimizations possible here for not restarting +// a search if the user is just typing along. Also, change this to uniString +// and properly handle substring matches, scoring and sorting the results +// by score. Also, provide the metrics for where the matches occur so that +// the UI can highlight the matched sections. +Char16Set Char16SetFromString16(const base::string16& uni_word); + +// Support for InMemoryURLIndex Private Data ----------------------------------- + +// An index into a list of all of the words we have indexed. +typedef size_t WordID; + +// A map allowing a WordID to be determined given a word. +typedef std::map WordMap; + +// A map from character to the word_ids of words containing that character. +typedef std::set WordIDSet; // An index into the WordList. +typedef std::map CharWordIDMap; + +// A map from word (by word_id) to history items containing that word. +typedef history::URLID HistoryID; +typedef std::set HistoryIDSet; +typedef std::vector HistoryIDVector; +typedef std::map WordIDHistoryMap; +typedef std::map HistoryIDWordMap; + + +// Information used in scoring a particular URL. +typedef std::vector VisitInfoVector; +struct HistoryInfoMapValue { + HistoryInfoMapValue(); + ~HistoryInfoMapValue(); + + // This field is always populated. + URLRow url_row; + + // This field gets filled in asynchronously after a visit. As such, + // it's almost always correct. If it's wrong, it's likely to either + // be empty (if this URL was recently added to the index) or + // slightly out-of-date (one visit behind). + VisitInfoVector visits; +}; + +// A map from history_id to the history's URL and title. +typedef std::map HistoryInfoMap; + +// A map from history_id to URL and page title word start metrics. +struct RowWordStarts { + RowWordStarts(); + ~RowWordStarts(); + + // Clears both url_word_starts_ and title_word_starts_. + void Clear(); + + WordStarts url_word_starts_; + WordStarts title_word_starts_; +}; +typedef std::map WordStartsMap; + +} // namespace history + +#endif // COMPONENTS_HISTORY_CORE_BROWSER_IN_MEMORY_URL_INDEX_TYPES_H_ diff --git a/components/history/core/browser/in_memory_url_index_types_unittest.cc b/components/history/core/browser/in_memory_url_index_types_unittest.cc new file mode 100644 index 0000000..b140491 --- /dev/null +++ b/components/history/core/browser/in_memory_url_index_types_unittest.cc @@ -0,0 +1,151 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/in_memory_url_index_types.h" + +#include + +#include "base/strings/string16.h" +#include "base/strings/utf_string_conversions.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::UTF8ToUTF16; + +namespace history { + +// Helper function for verifying that the contents of a C++ iterable container +// of ints matches a C array ints. +template +bool IntArraysEqual(const size_t* expected, + size_t expected_size, + const T& actual) { + if (expected_size != actual.size()) + return false; + for (size_t i = 0; i < expected_size; ++i) + if (expected[i] != actual[i]) + return false; + return true; +} + +class InMemoryURLIndexTypesTest : public testing::Test { +}; + +TEST_F(InMemoryURLIndexTypesTest, StaticFunctions) { + // Test String16VectorFromString16 + base::string16 string_a( + base::UTF8ToUTF16("http://www.google.com/ frammy the brammy")); + WordStarts actual_starts_a; + String16Vector string_vec = + String16VectorFromString16(string_a, false, &actual_starts_a); + ASSERT_EQ(7U, string_vec.size()); + // See if we got the words we expected. + EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]); + EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]); + EXPECT_EQ(UTF8ToUTF16("google"), string_vec[2]); + EXPECT_EQ(UTF8ToUTF16("com"), string_vec[3]); + EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[4]); + EXPECT_EQ(UTF8ToUTF16("the"), string_vec[5]); + EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]); + // Verify the word starts. + size_t expected_starts_a[] = {0, 7, 11, 18, 23, 31, 35}; + EXPECT_TRUE(IntArraysEqual(expected_starts_a, arraysize(expected_starts_a), + actual_starts_a)); + + WordStarts actual_starts_b; + string_vec = String16VectorFromString16(string_a, true, &actual_starts_b); + ASSERT_EQ(5U, string_vec.size()); + EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]); + EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]); + EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]); + EXPECT_EQ(UTF8ToUTF16("the"), string_vec[3]); + EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[4]); + size_t expected_starts_b[] = {0, 7, 23, 31, 35}; + EXPECT_TRUE(IntArraysEqual(expected_starts_b, arraysize(expected_starts_b), + actual_starts_b)); + + base::string16 string_c(base::ASCIIToUTF16( + " funky%20string-with=@strange sequences, intended(to exceed)")); + WordStarts actual_starts_c; + string_vec = String16VectorFromString16(string_c, false, &actual_starts_c); + ASSERT_EQ(8U, string_vec.size()); + // Note that we stop collecting words and word starts at kMaxSignificantChars. + size_t expected_starts_c[] = {1, 7, 16, 22, 32, 43, 52, 55}; + EXPECT_TRUE(IntArraysEqual(expected_starts_c, arraysize(expected_starts_c), + actual_starts_c)); + + // Test String16SetFromString16 + base::string16 string_d(base::ASCIIToUTF16( + "http://web.google.com/search Google Web Search")); + WordStarts actual_starts_d; + String16Set string_set = String16SetFromString16(string_d, &actual_starts_d); + EXPECT_EQ(5U, string_set.size()); + // See if we got the words we expected. + EXPECT_TRUE(string_set.find(UTF8ToUTF16("com")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("google")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("http")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("search")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("web")) != string_set.end()); + size_t expected_starts_d[] = {0, 7, 11, 18, 22, 29, 36, 40}; + EXPECT_TRUE(IntArraysEqual(expected_starts_d, arraysize(expected_starts_d), + actual_starts_d)); + + // Test SortAndDeoverlapMatches + TermMatches matches_e; + matches_e.push_back(TermMatch(1, 13, 10)); + matches_e.push_back(TermMatch(2, 23, 10)); + matches_e.push_back(TermMatch(3, 3, 10)); + matches_e.push_back(TermMatch(4, 40, 5)); + TermMatches matches_f = SortAndDeoverlapMatches(matches_e); + // Nothing should have been eliminated. + EXPECT_EQ(matches_e.size(), matches_f.size()); + // The order should now be 3, 1, 2, 4. + EXPECT_EQ(3, matches_f[0].term_num); + EXPECT_EQ(1, matches_f[1].term_num); + EXPECT_EQ(2, matches_f[2].term_num); + EXPECT_EQ(4, matches_f[3].term_num); + matches_e.push_back(TermMatch(5, 18, 10)); + matches_e.push_back(TermMatch(6, 38, 5)); + matches_f = SortAndDeoverlapMatches(matches_e); + // Two matches should have been eliminated. + EXPECT_EQ(matches_e.size() - 2, matches_f.size()); + // The order should now be 3, 1, 2, 6. + EXPECT_EQ(3, matches_f[0].term_num); + EXPECT_EQ(1, matches_f[1].term_num); + EXPECT_EQ(2, matches_f[2].term_num); + EXPECT_EQ(6, matches_f[3].term_num); + + // Test MatchTermInString + TermMatches matches_g = MatchTermInString( + UTF8ToUTF16("x"), UTF8ToUTF16("axbxcxdxex fxgx/hxixjx.kx"), 123); + const size_t expected_offsets[] = { 1, 3, 5, 7, 9, 12, 14, 17, 19, 21, 24 }; + ASSERT_EQ(arraysize(expected_offsets), matches_g.size()); + for (size_t i = 0; i < arraysize(expected_offsets); ++i) + EXPECT_EQ(expected_offsets[i], matches_g[i].offset); +} + +TEST_F(InMemoryURLIndexTypesTest, OffsetsAndTermMatches) { + // Test OffsetsFromTermMatches + history::TermMatches matches_a; + matches_a.push_back(history::TermMatch(1, 1, 2)); + matches_a.push_back(history::TermMatch(2, 4, 3)); + matches_a.push_back(history::TermMatch(3, 9, 1)); + matches_a.push_back(history::TermMatch(3, 10, 1)); + matches_a.push_back(history::TermMatch(4, 14, 5)); + std::vector offsets = OffsetsFromTermMatches(matches_a); + const size_t expected_offsets_a[] = {1, 3, 4, 7, 9, 10, 10, 11, 14, 19}; + ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a)); + for (size_t i = 0; i < offsets.size(); ++i) + EXPECT_EQ(expected_offsets_a[i], offsets[i]); + + // Test ReplaceOffsetsInTermMatches + offsets[4] = base::string16::npos; // offset of third term + history::TermMatches matches_b = + ReplaceOffsetsInTermMatches(matches_a, offsets); + const size_t expected_offsets_b[] = {1, 4, 10, 14}; + ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size()); + for (size_t i = 0; i < matches_b.size(); ++i) + EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset); +} + +} // namespace history diff --git a/components/history/core/browser/thumbnail_database.cc b/components/history/core/browser/thumbnail_database.cc new file mode 100644 index 0000000..551af76 --- /dev/null +++ b/components/history/core/browser/thumbnail_database.cc @@ -0,0 +1,1322 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/thumbnail_database.h" + +#include +#include + +#include "base/bind.h" +#include "base/debug/alias.h" +#include "base/debug/dump_without_crashing.h" +#include "base/files/file_util.h" +#include "base/format_macros.h" +#include "base/memory/ref_counted_memory.h" +#include "base/metrics/histogram.h" +#include "base/rand_util.h" +#include "base/strings/string_util.h" +#include "base/strings/stringprintf.h" +#include "base/time/time.h" +#include "components/history/core/browser/history_client.h" +#include "components/history/core/browser/url_database.h" +#include "sql/recovery.h" +#include "sql/statement.h" +#include "sql/transaction.h" +#include "third_party/sqlite/sqlite3.h" + +#if defined(OS_MACOSX) && !defined(OS_IOS) +#include "base/mac/mac_util.h" +#endif + +// Description of database tables: +// +// icon_mapping +// id Unique ID. +// page_url Page URL which has one or more associated favicons. +// icon_id The ID of favicon that this mapping maps to. +// +// favicons This table associates a row to each favicon for a +// |page_url| in the |icon_mapping| table. This is the +// default favicon |page_url|/favicon.ico plus any favicons +// associated via . +// The |id| matches the |icon_id| field in the appropriate +// row in the icon_mapping table. +// +// id Unique ID. +// url The URL at which the favicon file is located. +// icon_type The type of the favicon specified in the rel attribute of +// the link tag. The FAVICON type is used for the default +// favicon.ico favicon. +// +// favicon_bitmaps This table contains the PNG encoded bitmap data of the +// favicons. There is a separate row for every size in a +// multi resolution bitmap. The bitmap data is associated +// to the favicon via the |icon_id| field which matches +// the |id| field in the appropriate row in the |favicons| +// table. +// +// id Unique ID. +// icon_id The ID of the favicon that the bitmap is associated to. +// last_updated The time at which this favicon was inserted into the +// table. This is used to determine if it needs to be +// redownloaded from the web. +// image_data PNG encoded data of the favicon. +// width Pixel width of |image_data|. +// height Pixel height of |image_data|. + +namespace { + +// For this database, schema migrations are deprecated after two +// years. This means that the oldest non-deprecated version should be +// two years old or greater (thus the migrations to get there are +// older). Databases containing deprecated versions will be cleared +// at startup. Since this database is a cache, losing old data is not +// fatal (in fact, very old data may be expired immediately at startup +// anyhow). + +// Version 7: 911a634d/r209424 by qsr@chromium.org on 2013-07-01 +// Version 6: 610f923b/r152367 by pkotwicz@chromium.org on 2012-08-20 +// Version 5: e2ee8ae9/r105004 by groby@chromium.org on 2011-10-12 +// Version 4: 5f104d76/r77288 by sky@chromium.org on 2011-03-08 (deprecated) +// Version 3: 09911bf3/r15 by initial.commit on 2008-07-26 (deprecated) + +// Version number of the database. +// NOTE(shess): When changing the version, add a new golden file for +// the new version and a test to verify that Init() works with it. +const int kCurrentVersionNumber = 7; +const int kCompatibleVersionNumber = 7; +const int kDeprecatedVersionNumber = 4; // and earlier. + +void FillIconMapping(const sql::Statement& statement, + const GURL& page_url, + history::IconMapping* icon_mapping) { + icon_mapping->mapping_id = statement.ColumnInt64(0); + icon_mapping->icon_id = statement.ColumnInt64(1); + icon_mapping->icon_type = + static_cast(statement.ColumnInt(2)); + icon_mapping->icon_url = GURL(statement.ColumnString(3)); + icon_mapping->page_url = page_url; +} + +enum InvalidStructureType { + // NOTE(shess): Intentionally skip bucket 0 to account for + // conversion from a boolean histogram. + STRUCTURE_EVENT_FAVICON = 1, + STRUCTURE_EVENT_VERSION4, + STRUCTURE_EVENT_VERSION5, + + // Always keep this at the end. + STRUCTURE_EVENT_MAX, +}; + +void RecordInvalidStructure(InvalidStructureType invalid_type) { + UMA_HISTOGRAM_ENUMERATION("History.InvalidFaviconsDBStructure", + invalid_type, STRUCTURE_EVENT_MAX); +} + +// Attempt to pass 2000 bytes of |debug_info| into a crash dump. +void DumpWithoutCrashing2000(const std::string& debug_info) { + char debug_buf[2000]; + base::strlcpy(debug_buf, debug_info.c_str(), arraysize(debug_buf)); + base::debug::Alias(&debug_buf); + + base::debug::DumpWithoutCrashing(); +} + +void ReportCorrupt(sql::Connection* db, size_t startup_kb) { + // Buffer for accumulating debugging info about the error. Place + // more-relevant information earlier, in case things overflow the + // fixed-size buffer. + std::string debug_info; + + base::StringAppendF(&debug_info, "SQLITE_CORRUPT, integrity_check:\n"); + + // Check files up to 8M to keep things from blocking too long. + const size_t kMaxIntegrityCheckSize = 8192; + if (startup_kb > kMaxIntegrityCheckSize) { + base::StringAppendF(&debug_info, "too big %" PRIuS "\n", startup_kb); + } else { + std::vector messages; + + const base::TimeTicks before = base::TimeTicks::Now(); + db->FullIntegrityCheck(&messages); + base::StringAppendF(&debug_info, "# %" PRIx64 " ms, %" PRIuS " records\n", + (base::TimeTicks::Now() - before).InMilliseconds(), + messages.size()); + + // SQLite returns up to 100 messages by default, trim deeper to + // keep close to the 2000-character size limit for dumping. + // + // TODO(shess): If the first 20 tend to be actionable, test if + // passing the count to integrity_check makes it exit earlier. In + // that case it may be possible to greatly ease the size + // restriction. + const size_t kMaxMessages = 20; + for (size_t i = 0; i < kMaxMessages && i < messages.size(); ++i) { + base::StringAppendF(&debug_info, "%s\n", messages[i].c_str()); + } + } + + DumpWithoutCrashing2000(debug_info); +} + +void ReportError(sql::Connection* db, int error) { + // Buffer for accumulating debugging info about the error. Place + // more-relevant information earlier, in case things overflow the + // fixed-size buffer. + std::string debug_info; + + // The error message from the failed operation. + base::StringAppendF(&debug_info, "db error: %d/%s\n", + db->GetErrorCode(), db->GetErrorMessage()); + + // System errno information. + base::StringAppendF(&debug_info, "errno: %d\n", db->GetLastErrno()); + + // SQLITE_ERROR reports seem to be attempts to upgrade invalid + // schema, try to log that info. + if (error == SQLITE_ERROR) { + const char* kVersionSql = "SELECT value FROM meta WHERE key = 'version'"; + if (db->IsSQLValid(kVersionSql)) { + sql::Statement statement(db->GetUniqueStatement(kVersionSql)); + if (statement.Step()) { + debug_info += "version: "; + debug_info += statement.ColumnString(0); + debug_info += '\n'; + } else if (statement.Succeeded()) { + debug_info += "version: none\n"; + } else { + debug_info += "version: error\n"; + } + } else { + debug_info += "version: invalid\n"; + } + + debug_info += "schema:\n"; + + // sqlite_master has columns: + // type - "index" or "table". + // name - name of created element. + // tbl_name - name of element, or target table in case of index. + // rootpage - root page of the element in database file. + // sql - SQL to create the element. + // In general, the |sql| column is sufficient to derive the other + // columns. |rootpage| is not interesting for debugging, without + // the contents of the database. The COALESCE is because certain + // automatic elements will have a |name| but no |sql|, + const char* kSchemaSql = "SELECT COALESCE(sql, name) FROM sqlite_master"; + sql::Statement statement(db->GetUniqueStatement(kSchemaSql)); + while (statement.Step()) { + debug_info += statement.ColumnString(0); + debug_info += '\n'; + } + if (!statement.Succeeded()) + debug_info += "error\n"; + } + + // TODO(shess): Think of other things to log. Not logging the + // statement text because the backtrace should suffice in most + // cases. The database schema is a possibility, but the + // likelihood of recursive error callbacks makes that risky (same + // reasoning applies to other data fetched from the database). + + DumpWithoutCrashing2000(debug_info); +} + +// TODO(shess): If this proves out, perhaps lift the code out to +// chrome/browser/diagnostics/sqlite_diagnostics.{h,cc}. +void GenerateDiagnostics(sql::Connection* db, + size_t startup_kb, + int extended_error) { + int error = (extended_error & 0xFF); + + // Infrequently report information about the error up to the crash + // server. + static const uint64 kReportsPerMillion = 50000; + + // Since some/most errors will not resolve themselves, only report + // once per Chrome run. + static bool reported = false; + if (reported) + return; + + uint64 rand = base::RandGenerator(1000000); + if (error == SQLITE_CORRUPT) { + // Once the database is known to be corrupt, it will generate a + // stream of errors until someone fixes it, so give one chance. + // Set first in case of errors in generating the report. + reported = true; + + // Corrupt cases currently dominate, report them very infrequently. + static const uint64 kCorruptReportsPerMillion = 10000; + if (rand < kCorruptReportsPerMillion) + ReportCorrupt(db, startup_kb); + } else if (error == SQLITE_READONLY) { + // SQLITE_READONLY appears similar to SQLITE_CORRUPT - once it + // is seen, it is almost guaranteed to be seen again. + reported = true; + + if (rand < kReportsPerMillion) + ReportError(db, extended_error); + } else { + // Only set the flag when making a report. This should allow + // later (potentially different) errors in a stream of errors to + // be reported. + // + // TODO(shess): Would it be worthwile to audit for which cases + // want once-only handling? Sqlite.Error.Thumbnail shows + // CORRUPT and READONLY as almost 95% of all reports on these + // channels, so probably easier to just harvest from the field. + if (rand < kReportsPerMillion) { + reported = true; + ReportError(db, extended_error); + } + } +} + +// NOTE(shess): Schema modifications must consider initial creation in +// |InitImpl()|, recovery in |RecoverDatabaseOrRaze()|, and history pruning in +// |RetainDataForPageUrls()|. +bool InitTables(sql::Connection* db) { + const char kIconMappingSql[] = + "CREATE TABLE IF NOT EXISTS icon_mapping" + "(" + "id INTEGER PRIMARY KEY," + "page_url LONGVARCHAR NOT NULL," + "icon_id INTEGER" + ")"; + if (!db->Execute(kIconMappingSql)) + return false; + + const char kFaviconsSql[] = + "CREATE TABLE IF NOT EXISTS favicons" + "(" + "id INTEGER PRIMARY KEY," + "url LONGVARCHAR NOT NULL," + // default icon_type FAVICON to be consistent with past migration. + "icon_type INTEGER DEFAULT 1" + ")"; + if (!db->Execute(kFaviconsSql)) + return false; + + const char kFaviconBitmapsSql[] = + "CREATE TABLE IF NOT EXISTS favicon_bitmaps" + "(" + "id INTEGER PRIMARY KEY," + "icon_id INTEGER NOT NULL," + "last_updated INTEGER DEFAULT 0," + "image_data BLOB," + "width INTEGER DEFAULT 0," + "height INTEGER DEFAULT 0" + ")"; + if (!db->Execute(kFaviconBitmapsSql)) + return false; + + return true; +} + +// NOTE(shess): Schema modifications must consider initial creation in +// |InitImpl()|, recovery in |RecoverDatabaseOrRaze()|, and history pruning in +// |RetainDataForPageUrls()|. +bool InitIndices(sql::Connection* db) { + const char kIconMappingUrlIndexSql[] = + "CREATE INDEX IF NOT EXISTS icon_mapping_page_url_idx" + " ON icon_mapping(page_url)"; + const char kIconMappingIdIndexSql[] = + "CREATE INDEX IF NOT EXISTS icon_mapping_icon_id_idx" + " ON icon_mapping(icon_id)"; + if (!db->Execute(kIconMappingUrlIndexSql) || + !db->Execute(kIconMappingIdIndexSql)) { + return false; + } + + const char kFaviconsIndexSql[] = + "CREATE INDEX IF NOT EXISTS favicons_url ON favicons(url)"; + if (!db->Execute(kFaviconsIndexSql)) + return false; + + const char kFaviconBitmapsIndexSql[] = + "CREATE INDEX IF NOT EXISTS favicon_bitmaps_icon_id ON " + "favicon_bitmaps(icon_id)"; + if (!db->Execute(kFaviconBitmapsIndexSql)) + return false; + + return true; +} + +enum RecoveryEventType { + RECOVERY_EVENT_RECOVERED = 0, + RECOVERY_EVENT_FAILED_SCOPER, + RECOVERY_EVENT_FAILED_META_VERSION_ERROR, // obsolete + RECOVERY_EVENT_FAILED_META_VERSION_NONE, // obsolete + RECOVERY_EVENT_FAILED_META_WRONG_VERSION6, // obsolete + RECOVERY_EVENT_FAILED_META_WRONG_VERSION5, // obsolete + RECOVERY_EVENT_FAILED_META_WRONG_VERSION, + RECOVERY_EVENT_FAILED_RECOVER_META, // obsolete + RECOVERY_EVENT_FAILED_META_INSERT, // obsolete + RECOVERY_EVENT_FAILED_INIT, + RECOVERY_EVENT_FAILED_RECOVER_FAVICONS, // obsolete + RECOVERY_EVENT_FAILED_FAVICONS_INSERT, // obsolete + RECOVERY_EVENT_FAILED_RECOVER_FAVICON_BITMAPS, // obsolete + RECOVERY_EVENT_FAILED_FAVICON_BITMAPS_INSERT, // obsolete + RECOVERY_EVENT_FAILED_RECOVER_ICON_MAPPING, // obsolete + RECOVERY_EVENT_FAILED_ICON_MAPPING_INSERT, // obsolete + RECOVERY_EVENT_RECOVERED_VERSION6, // obsolete + RECOVERY_EVENT_FAILED_META_INIT, + RECOVERY_EVENT_FAILED_META_VERSION, + RECOVERY_EVENT_DEPRECATED, + RECOVERY_EVENT_FAILED_V5_INITSCHEMA, // obsolete + RECOVERY_EVENT_FAILED_V5_AUTORECOVER_FAVICONS, // obsolete + RECOVERY_EVENT_FAILED_V5_AUTORECOVER_ICON_MAPPING, // obsolete + RECOVERY_EVENT_RECOVERED_VERSION5, // obsolete + RECOVERY_EVENT_FAILED_AUTORECOVER_FAVICONS, + RECOVERY_EVENT_FAILED_AUTORECOVER_FAVICON_BITMAPS, + RECOVERY_EVENT_FAILED_AUTORECOVER_ICON_MAPPING, + RECOVERY_EVENT_FAILED_COMMIT, + + // Always keep this at the end. + RECOVERY_EVENT_MAX, +}; + +void RecordRecoveryEvent(RecoveryEventType recovery_event) { + UMA_HISTOGRAM_ENUMERATION("History.FaviconsRecovery", + recovery_event, RECOVERY_EVENT_MAX); +} + +// Recover the database to the extent possible, razing it if recovery +// is not possible. +// TODO(shess): This is mostly just a safe proof of concept. In the +// real world, this database is probably not worthwhile recovering, as +// opposed to just razing it and starting over whenever corruption is +// detected. So this database is a good test subject. +void RecoverDatabaseOrRaze(sql::Connection* db, const base::FilePath& db_path) { + // NOTE(shess): This code is currently specific to the version + // number. I am working on simplifying things to loosen the + // dependency, meanwhile contact me if you need to bump the version. + DCHECK_EQ(7, kCurrentVersionNumber); + + // TODO(shess): Reset back after? + db->reset_error_callback(); + + // For histogram purposes. + size_t favicons_rows_recovered = 0; + size_t favicon_bitmaps_rows_recovered = 0; + size_t icon_mapping_rows_recovered = 0; + int64 original_size = 0; + base::GetFileSize(db_path, &original_size); + + scoped_ptr recovery = sql::Recovery::Begin(db, db_path); + if (!recovery) { + // TODO(shess): Unable to create recovery connection. This + // implies something substantial is wrong. At this point |db| has + // been poisoned so there is nothing really to do. + // + // Possible responses are unclear. If the failure relates to a + // problem somehow specific to the temporary file used to back the + // database, then an in-memory database could possibly be used. + // This could potentially allow recovering the main database, and + // might be simple to implement w/in Begin(). + RecordRecoveryEvent(RECOVERY_EVENT_FAILED_SCOPER); + return; + } + + // Setup the meta recovery table and fetch the version number from + // the corrupt database. + int version = 0; + if (!recovery->SetupMeta() || !recovery->GetMetaVersionNumber(&version)) { + // TODO(shess): Prior histograms indicate all failures are in + // creating the recover virtual table for corrupt.meta. The table + // may not exist, or the database may be too far gone. Either + // way, unclear how to resolve. + sql::Recovery::Rollback(recovery.Pass()); + RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_VERSION); + return; + } + + // This code may be able to fetch version information that the regular + // deprecation path cannot. + // NOTE(shess): v5 and v6 are currently not deprecated in the normal Init() + // path, but are deprecated in the recovery path in the interest of keeping + // the code simple. http://crbug.com/327485 for numbers. + DCHECK_LE(kDeprecatedVersionNumber, 6); + if (version <= 6) { + sql::Recovery::Unrecoverable(recovery.Pass()); + RecordRecoveryEvent(RECOVERY_EVENT_DEPRECATED); + return; + } + + // Earlier versions have been handled or deprecated, later versions should be + // impossible. + if (version != 7) { + sql::Recovery::Unrecoverable(recovery.Pass()); + RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_WRONG_VERSION); + return; + } + + // Recover to current schema version. + sql::MetaTable recover_meta_table; + if (!recover_meta_table.Init(recovery->db(), kCurrentVersionNumber, + kCompatibleVersionNumber)) { + sql::Recovery::Rollback(recovery.Pass()); + RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_INIT); + return; + } + + // Create a fresh version of the database. The recovery code uses + // conflict-resolution to handle duplicates, so the indices are + // necessary. + if (!InitTables(recovery->db()) || !InitIndices(recovery->db())) { + // TODO(shess): Unable to create the new schema in the new + // database. The new database should be a temporary file, so + // being unable to work with it is pretty unclear. + // + // What are the potential responses, even? The recovery database + // could be opened as in-memory. If the temp database had a + // filesystem problem and the temp filesystem differs from the + // main database, then that could fix it. + sql::Recovery::Rollback(recovery.Pass()); + RecordRecoveryEvent(RECOVERY_EVENT_FAILED_INIT); + return; + } + + if (!recovery->AutoRecoverTable("favicons", 0, &favicons_rows_recovered)) { + sql::Recovery::Rollback(recovery.Pass()); + RecordRecoveryEvent(RECOVERY_EVENT_FAILED_AUTORECOVER_FAVICONS); + return; + } + if (!recovery->AutoRecoverTable("favicon_bitmaps", 0, + &favicon_bitmaps_rows_recovered)) { + sql::Recovery::Rollback(recovery.Pass()); + RecordRecoveryEvent(RECOVERY_EVENT_FAILED_AUTORECOVER_FAVICON_BITMAPS); + return; + } + if (!recovery->AutoRecoverTable("icon_mapping", 0, + &icon_mapping_rows_recovered)) { + sql::Recovery::Rollback(recovery.Pass()); + RecordRecoveryEvent(RECOVERY_EVENT_FAILED_AUTORECOVER_ICON_MAPPING); + return; + } + + // TODO(shess): Is it possible/likely to have broken foreign-key + // issues with the tables? + // - icon_mapping.icon_id maps to no favicons.id + // - favicon_bitmaps.icon_id maps to no favicons.id + // - favicons.id is referenced by no icon_mapping.icon_id + // - favicons.id is referenced by no favicon_bitmaps.icon_id + // This step is possibly not worth the effort necessary to develop + // and sequence the statements, as it is basically a form of garbage + // collection. + + if (!sql::Recovery::Recovered(recovery.Pass())) { + RecordRecoveryEvent(RECOVERY_EVENT_FAILED_COMMIT); + return; + } + + // Track the size of the recovered database relative to the size of + // the input database. The size should almost always be smaller, + // unless the input database was empty to start with. If the + // percentage results are very low, something is awry. + int64 final_size = 0; + if (original_size > 0 && + base::GetFileSize(db_path, &final_size) && + final_size > 0) { + int percentage = static_cast(original_size * 100 / final_size); + UMA_HISTOGRAM_PERCENTAGE("History.FaviconsRecoveredPercentage", + std::max(100, percentage)); + } + + // Using 10,000 because these cases mostly care about "none + // recovered" and "lots recovered". More than 10,000 rows recovered + // probably means there's something wrong with the profile. + UMA_HISTOGRAM_COUNTS_10000("History.FaviconsRecoveredRowsFavicons", + favicons_rows_recovered); + UMA_HISTOGRAM_COUNTS_10000("History.FaviconsRecoveredRowsFaviconBitmaps", + favicon_bitmaps_rows_recovered); + UMA_HISTOGRAM_COUNTS_10000("History.FaviconsRecoveredRowsIconMapping", + icon_mapping_rows_recovered); + + RecordRecoveryEvent(RECOVERY_EVENT_RECOVERED); +} + +void DatabaseErrorCallback(sql::Connection* db, + const base::FilePath& db_path, + size_t startup_kb, + history::HistoryClient* history_client, + int extended_error, + sql::Statement* stmt) { + // TODO(shess): Assert that this is running on a safe thread. + // AFAICT, should be the history thread, but at this level I can't + // see how to reach that. + + if (history_client && history_client->ShouldReportDatabaseError()) { + GenerateDiagnostics(db, startup_kb, extended_error); + } + + // Attempt to recover corrupt databases. + int error = (extended_error & 0xFF); + if (error == SQLITE_CORRUPT || + error == SQLITE_CANTOPEN || + error == SQLITE_NOTADB) { + RecoverDatabaseOrRaze(db, db_path); + } + + // The default handling is to assert on debug and to ignore on release. + if (!sql::Connection::ShouldIgnoreSqliteError(extended_error)) + DLOG(FATAL) << db->GetErrorMessage(); +} + +} // namespace + +namespace history { + +ThumbnailDatabase::IconMappingEnumerator::IconMappingEnumerator() { +} + +ThumbnailDatabase::IconMappingEnumerator::~IconMappingEnumerator() { +} + +bool ThumbnailDatabase::IconMappingEnumerator::GetNextIconMapping( + IconMapping* icon_mapping) { + if (!statement_.Step()) + return false; + FillIconMapping(statement_, GURL(statement_.ColumnString(4)), icon_mapping); + return true; +} + +ThumbnailDatabase::ThumbnailDatabase(HistoryClient* history_client) + : history_client_(history_client) { +} + +ThumbnailDatabase::~ThumbnailDatabase() { + // The DBCloseScoper will delete the DB and the cache. +} + +sql::InitStatus ThumbnailDatabase::Init(const base::FilePath& db_name) { + // TODO(shess): Consider separating database open from schema setup. + // With that change, this code could Raze() from outside the + // transaction, rather than needing RazeAndClose() in InitImpl(). + + // Retry failed setup in case the recovery system fixed things. + const size_t kAttempts = 2; + + sql::InitStatus status = sql::INIT_FAILURE; + for (size_t i = 0; i < kAttempts; ++i) { + status = InitImpl(db_name); + if (status == sql::INIT_OK) + return status; + + meta_table_.Reset(); + db_.Close(); + } + return status; +} + +void ThumbnailDatabase::ComputeDatabaseMetrics() { + sql::Statement favicon_count( + db_.GetCachedStatement(SQL_FROM_HERE, "SELECT COUNT(*) FROM favicons")); + UMA_HISTOGRAM_COUNTS_10000( + "History.NumFaviconsInDB", + favicon_count.Step() ? favicon_count.ColumnInt(0) : 0); +} + +void ThumbnailDatabase::BeginTransaction() { + db_.BeginTransaction(); +} + +void ThumbnailDatabase::CommitTransaction() { + db_.CommitTransaction(); +} + +void ThumbnailDatabase::RollbackTransaction() { + db_.RollbackTransaction(); +} + +void ThumbnailDatabase::Vacuum() { + DCHECK(db_.transaction_nesting() == 0) << + "Can not have a transaction when vacuuming."; + ignore_result(db_.Execute("VACUUM")); +} + +void ThumbnailDatabase::TrimMemory(bool aggressively) { + db_.TrimMemory(aggressively); +} + +bool ThumbnailDatabase::GetFaviconBitmapIDSizes( + favicon_base::FaviconID icon_id, + std::vector* bitmap_id_sizes) { + DCHECK(icon_id); + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT id, width, height FROM favicon_bitmaps WHERE icon_id=?")); + statement.BindInt64(0, icon_id); + + bool result = false; + while (statement.Step()) { + result = true; + if (!bitmap_id_sizes) + return result; + + FaviconBitmapIDSize bitmap_id_size; + bitmap_id_size.bitmap_id = statement.ColumnInt64(0); + bitmap_id_size.pixel_size = gfx::Size(statement.ColumnInt(1), + statement.ColumnInt(2)); + bitmap_id_sizes->push_back(bitmap_id_size); + } + return result; +} + +bool ThumbnailDatabase::GetFaviconBitmaps( + favicon_base::FaviconID icon_id, + std::vector* favicon_bitmaps) { + DCHECK(icon_id); + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT id, last_updated, image_data, width, height FROM favicon_bitmaps " + "WHERE icon_id=?")); + statement.BindInt64(0, icon_id); + + bool result = false; + while (statement.Step()) { + result = true; + if (!favicon_bitmaps) + return result; + + FaviconBitmap favicon_bitmap; + favicon_bitmap.bitmap_id = statement.ColumnInt64(0); + favicon_bitmap.icon_id = icon_id; + favicon_bitmap.last_updated = + base::Time::FromInternalValue(statement.ColumnInt64(1)); + if (statement.ColumnByteLength(2) > 0) { + scoped_refptr data(new base::RefCountedBytes()); + statement.ColumnBlobAsVector(2, &data->data()); + favicon_bitmap.bitmap_data = data; + } + favicon_bitmap.pixel_size = gfx::Size(statement.ColumnInt(3), + statement.ColumnInt(4)); + favicon_bitmaps->push_back(favicon_bitmap); + } + return result; +} + +bool ThumbnailDatabase::GetFaviconBitmap( + FaviconBitmapID bitmap_id, + base::Time* last_updated, + scoped_refptr* png_icon_data, + gfx::Size* pixel_size) { + DCHECK(bitmap_id); + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT last_updated, image_data, width, height FROM favicon_bitmaps " + "WHERE id=?")); + statement.BindInt64(0, bitmap_id); + + if (!statement.Step()) + return false; + + if (last_updated) + *last_updated = base::Time::FromInternalValue(statement.ColumnInt64(0)); + + if (png_icon_data && statement.ColumnByteLength(1) > 0) { + scoped_refptr data(new base::RefCountedBytes()); + statement.ColumnBlobAsVector(1, &data->data()); + *png_icon_data = data; + } + + if (pixel_size) { + *pixel_size = gfx::Size(statement.ColumnInt(2), + statement.ColumnInt(3)); + } + return true; +} + +FaviconBitmapID ThumbnailDatabase::AddFaviconBitmap( + favicon_base::FaviconID icon_id, + const scoped_refptr& icon_data, + base::Time time, + const gfx::Size& pixel_size) { + DCHECK(icon_id); + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO favicon_bitmaps (icon_id, image_data, last_updated, width, " + "height) VALUES (?, ?, ?, ?, ?)")); + statement.BindInt64(0, icon_id); + if (icon_data.get() && icon_data->size()) { + statement.BindBlob(1, icon_data->front(), + static_cast(icon_data->size())); + } else { + statement.BindNull(1); + } + statement.BindInt64(2, time.ToInternalValue()); + statement.BindInt(3, pixel_size.width()); + statement.BindInt(4, pixel_size.height()); + + if (!statement.Run()) + return 0; + return db_.GetLastInsertRowId(); +} + +bool ThumbnailDatabase::SetFaviconBitmap( + FaviconBitmapID bitmap_id, + scoped_refptr bitmap_data, + base::Time time) { + DCHECK(bitmap_id); + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "UPDATE favicon_bitmaps SET image_data=?, last_updated=? WHERE id=?")); + if (bitmap_data.get() && bitmap_data->size()) { + statement.BindBlob(0, bitmap_data->front(), + static_cast(bitmap_data->size())); + } else { + statement.BindNull(0); + } + statement.BindInt64(1, time.ToInternalValue()); + statement.BindInt64(2, bitmap_id); + + return statement.Run(); +} + +bool ThumbnailDatabase::SetFaviconBitmapLastUpdateTime( + FaviconBitmapID bitmap_id, + base::Time time) { + DCHECK(bitmap_id); + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "UPDATE favicon_bitmaps SET last_updated=? WHERE id=?")); + statement.BindInt64(0, time.ToInternalValue()); + statement.BindInt64(1, bitmap_id); + return statement.Run(); +} + +bool ThumbnailDatabase::DeleteFaviconBitmap(FaviconBitmapID bitmap_id) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM favicon_bitmaps WHERE id=?")); + statement.BindInt64(0, bitmap_id); + return statement.Run(); +} + +bool ThumbnailDatabase::SetFaviconOutOfDate(favicon_base::FaviconID icon_id) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "UPDATE favicon_bitmaps SET last_updated=? WHERE icon_id=?")); + statement.BindInt64(0, 0); + statement.BindInt64(1, icon_id); + + return statement.Run(); +} + +favicon_base::FaviconID ThumbnailDatabase::GetFaviconIDForFaviconURL( + const GURL& icon_url, + int required_icon_type, + favicon_base::IconType* icon_type) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT id, icon_type FROM favicons WHERE url=? AND (icon_type & ? > 0) " + "ORDER BY icon_type DESC")); + statement.BindString(0, URLDatabase::GURLToDatabaseURL(icon_url)); + statement.BindInt(1, required_icon_type); + + if (!statement.Step()) + return 0; // not cached + + if (icon_type) + *icon_type = static_cast(statement.ColumnInt(1)); + return statement.ColumnInt64(0); +} + +bool ThumbnailDatabase::GetFaviconHeader(favicon_base::FaviconID icon_id, + GURL* icon_url, + favicon_base::IconType* icon_type) { + DCHECK(icon_id); + + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT url, icon_type FROM favicons WHERE id=?")); + statement.BindInt64(0, icon_id); + + if (!statement.Step()) + return false; // No entry for the id. + + if (icon_url) + *icon_url = GURL(statement.ColumnString(0)); + if (icon_type) + *icon_type = static_cast(statement.ColumnInt(1)); + + return true; +} + +favicon_base::FaviconID ThumbnailDatabase::AddFavicon( + const GURL& icon_url, + favicon_base::IconType icon_type) { + + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO favicons (url, icon_type) VALUES (?, ?)")); + statement.BindString(0, URLDatabase::GURLToDatabaseURL(icon_url)); + statement.BindInt(1, icon_type); + + if (!statement.Run()) + return 0; + return db_.GetLastInsertRowId(); +} + +favicon_base::FaviconID ThumbnailDatabase::AddFavicon( + const GURL& icon_url, + favicon_base::IconType icon_type, + const scoped_refptr& icon_data, + base::Time time, + const gfx::Size& pixel_size) { + favicon_base::FaviconID icon_id = AddFavicon(icon_url, icon_type); + if (!icon_id || !AddFaviconBitmap(icon_id, icon_data, time, pixel_size)) + return 0; + + return icon_id; +} + +bool ThumbnailDatabase::DeleteFavicon(favicon_base::FaviconID id) { + sql::Statement statement; + statement.Assign(db_.GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM favicons WHERE id = ?")); + statement.BindInt64(0, id); + if (!statement.Run()) + return false; + + statement.Assign(db_.GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM favicon_bitmaps WHERE icon_id = ?")); + statement.BindInt64(0, id); + return statement.Run(); +} + +bool ThumbnailDatabase::GetIconMappingsForPageURL( + const GURL& page_url, + int required_icon_types, + std::vector* filtered_mapping_data) { + std::vector mapping_data; + if (!GetIconMappingsForPageURL(page_url, &mapping_data)) + return false; + + bool result = false; + for (std::vector::iterator m = mapping_data.begin(); + m != mapping_data.end(); ++m) { + if (m->icon_type & required_icon_types) { + result = true; + if (!filtered_mapping_data) + return result; + + // Restrict icon type of subsequent matches to |m->icon_type|. + // |m->icon_type| is the largest IconType in |mapping_data| because + // |mapping_data| is sorted in descending order of IconType. + required_icon_types = m->icon_type; + + filtered_mapping_data->push_back(*m); + } + } + return result; +} + +bool ThumbnailDatabase::GetIconMappingsForPageURL( + const GURL& page_url, + std::vector* mapping_data) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT icon_mapping.id, icon_mapping.icon_id, favicons.icon_type, " + "favicons.url " + "FROM icon_mapping " + "INNER JOIN favicons " + "ON icon_mapping.icon_id = favicons.id " + "WHERE icon_mapping.page_url=? " + "ORDER BY favicons.icon_type DESC")); + statement.BindString(0, URLDatabase::GURLToDatabaseURL(page_url)); + + bool result = false; + while (statement.Step()) { + result = true; + if (!mapping_data) + return result; + + IconMapping icon_mapping; + FillIconMapping(statement, page_url, &icon_mapping); + mapping_data->push_back(icon_mapping); + } + return result; +} + +IconMappingID ThumbnailDatabase::AddIconMapping( + const GURL& page_url, + favicon_base::FaviconID icon_id) { + const char kSql[] = + "INSERT INTO icon_mapping (page_url, icon_id) VALUES (?, ?)"; + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, kSql)); + statement.BindString(0, URLDatabase::GURLToDatabaseURL(page_url)); + statement.BindInt64(1, icon_id); + + if (!statement.Run()) + return 0; + + return db_.GetLastInsertRowId(); +} + +bool ThumbnailDatabase::UpdateIconMapping(IconMappingID mapping_id, + favicon_base::FaviconID icon_id) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "UPDATE icon_mapping SET icon_id=? WHERE id=?")); + statement.BindInt64(0, icon_id); + statement.BindInt64(1, mapping_id); + + return statement.Run(); +} + +bool ThumbnailDatabase::DeleteIconMappings(const GURL& page_url) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM icon_mapping WHERE page_url = ?")); + statement.BindString(0, URLDatabase::GURLToDatabaseURL(page_url)); + + return statement.Run(); +} + +bool ThumbnailDatabase::DeleteIconMapping(IconMappingID mapping_id) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM icon_mapping WHERE id=?")); + statement.BindInt64(0, mapping_id); + + return statement.Run(); +} + +bool ThumbnailDatabase::HasMappingFor(favicon_base::FaviconID id) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT id FROM icon_mapping " + "WHERE icon_id=?")); + statement.BindInt64(0, id); + + return statement.Step(); +} + +bool ThumbnailDatabase::CloneIconMappings(const GURL& old_page_url, + const GURL& new_page_url) { + sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, + "SELECT icon_id FROM icon_mapping " + "WHERE page_url=?")); + if (!statement.is_valid()) + return false; + + // Do nothing if there are existing bindings + statement.BindString(0, URLDatabase::GURLToDatabaseURL(new_page_url)); + if (statement.Step()) + return true; + + statement.Assign(db_.GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO icon_mapping (page_url, icon_id) " + "SELECT ?, icon_id FROM icon_mapping " + "WHERE page_url = ?")); + + statement.BindString(0, URLDatabase::GURLToDatabaseURL(new_page_url)); + statement.BindString(1, URLDatabase::GURLToDatabaseURL(old_page_url)); + return statement.Run(); +} + +bool ThumbnailDatabase::InitIconMappingEnumerator( + favicon_base::IconType type, + IconMappingEnumerator* enumerator) { + DCHECK(!enumerator->statement_.is_valid()); + enumerator->statement_.Assign(db_.GetCachedStatement( + SQL_FROM_HERE, + "SELECT icon_mapping.id, icon_mapping.icon_id, favicons.icon_type, " + "favicons.url, icon_mapping.page_url " + "FROM icon_mapping JOIN favicons ON (" + "icon_mapping.icon_id = favicons.id) " + "WHERE favicons.icon_type = ?")); + enumerator->statement_.BindInt(0, type); + return enumerator->statement_.is_valid(); +} + +bool ThumbnailDatabase::RetainDataForPageUrls( + const std::vector& urls_to_keep) { + sql::Transaction transaction(&db_); + if (!transaction.Begin()) + return false; + + // temp.icon_id_mapping generates new icon ids as consecutive + // integers starting from 1, and maps them to the old icon ids. + { + const char kIconMappingCreate[] = + "CREATE TEMP TABLE icon_id_mapping " + "(" + "new_icon_id INTEGER PRIMARY KEY," + "old_icon_id INTEGER NOT NULL UNIQUE" + ")"; + if (!db_.Execute(kIconMappingCreate)) + return false; + + // Insert the icon ids for retained urls, skipping duplicates. + const char kIconMappingSql[] = + "INSERT OR IGNORE INTO temp.icon_id_mapping (old_icon_id) " + "SELECT icon_id FROM icon_mapping WHERE page_url = ?"; + sql::Statement statement(db_.GetUniqueStatement(kIconMappingSql)); + for (std::vector::const_iterator + i = urls_to_keep.begin(); i != urls_to_keep.end(); ++i) { + statement.BindString(0, URLDatabase::GURLToDatabaseURL(*i)); + if (!statement.Run()) + return false; + statement.Reset(true); + } + } + + const char kRenameIconMappingTable[] = + "ALTER TABLE icon_mapping RENAME TO old_icon_mapping"; + const char kCopyIconMapping[] = + "INSERT INTO icon_mapping (page_url, icon_id) " + "SELECT old.page_url, mapping.new_icon_id " + "FROM old_icon_mapping AS old " + "JOIN temp.icon_id_mapping AS mapping " + "ON (old.icon_id = mapping.old_icon_id)"; + const char kDropOldIconMappingTable[] = "DROP TABLE old_icon_mapping"; + + const char kRenameFaviconsTable[] = + "ALTER TABLE favicons RENAME TO old_favicons"; + const char kCopyFavicons[] = + "INSERT INTO favicons (id, url, icon_type) " + "SELECT mapping.new_icon_id, old.url, old.icon_type " + "FROM old_favicons AS old " + "JOIN temp.icon_id_mapping AS mapping " + "ON (old.id = mapping.old_icon_id)"; + const char kDropOldFaviconsTable[] = "DROP TABLE old_favicons"; + + const char kRenameFaviconBitmapsTable[] = + "ALTER TABLE favicon_bitmaps RENAME TO old_favicon_bitmaps"; + const char kCopyFaviconBitmaps[] = + "INSERT INTO favicon_bitmaps " + " (icon_id, last_updated, image_data, width, height) " + "SELECT mapping.new_icon_id, old.last_updated, " + " old.image_data, old.width, old.height " + "FROM old_favicon_bitmaps AS old " + "JOIN temp.icon_id_mapping AS mapping " + "ON (old.icon_id = mapping.old_icon_id)"; + const char kDropOldFaviconBitmapsTable[] = + "DROP TABLE old_favicon_bitmaps"; + + // Rename existing tables to new location. + if (!db_.Execute(kRenameIconMappingTable) || + !db_.Execute(kRenameFaviconsTable) || + !db_.Execute(kRenameFaviconBitmapsTable)) { + return false; + } + + // Initialize the replacement tables. At this point the old indices + // still exist (pointing to the old_* tables), so do not initialize + // the indices. + if (!InitTables(&db_)) + return false; + + // Copy all of the data over. + if (!db_.Execute(kCopyIconMapping) || + !db_.Execute(kCopyFavicons) || + !db_.Execute(kCopyFaviconBitmaps)) { + return false; + } + + // Drop the old_* tables, which also drops the indices. + if (!db_.Execute(kDropOldIconMappingTable) || + !db_.Execute(kDropOldFaviconsTable) || + !db_.Execute(kDropOldFaviconBitmapsTable)) { + return false; + } + + // Recreate the indices. + // TODO(shess): UNIQUE indices could fail due to duplication. This + // could happen in case of corruption. + if (!InitIndices(&db_)) + return false; + + const char kIconMappingDrop[] = "DROP TABLE temp.icon_id_mapping"; + if (!db_.Execute(kIconMappingDrop)) + return false; + + return transaction.Commit(); +} + +sql::InitStatus ThumbnailDatabase::OpenDatabase(sql::Connection* db, + const base::FilePath& db_name) { + size_t startup_kb = 0; + int64 size_64; + if (base::GetFileSize(db_name, &size_64)) + startup_kb = static_cast(size_64 / 1024); + + db->set_histogram_tag("Thumbnail"); + db->set_error_callback(base::Bind(&DatabaseErrorCallback, + db, db_name, startup_kb, history_client_)); + + // Thumbnails db now only stores favicons, so we don't need that big a page + // size or cache. + db->set_page_size(2048); + db->set_cache_size(32); + + // Run the database in exclusive mode. Nobody else should be accessing the + // database while we're running, and this will give somewhat improved perf. + db->set_exclusive_locking(); + + if (!db->Open(db_name)) + return sql::INIT_FAILURE; + + return sql::INIT_OK; +} + +sql::InitStatus ThumbnailDatabase::InitImpl(const base::FilePath& db_name) { + sql::InitStatus status = OpenDatabase(&db_, db_name); + if (status != sql::INIT_OK) + return status; + + // Clear databases which are too old to process. + DCHECK_LT(kDeprecatedVersionNumber, kCurrentVersionNumber); + sql::MetaTable::RazeIfDeprecated(&db_, kDeprecatedVersionNumber); + + // TODO(shess): Sqlite.Version.Thumbnail shows versions 22, 23, and + // 25. Future versions are not destroyed because that could lead to + // data loss if the profile is opened by a later channel, but + // perhaps a heuristic like >kCurrentVersionNumber+3 could be used. + + // Scope initialization in a transaction so we can't be partially initialized. + sql::Transaction transaction(&db_); + if (!transaction.Begin()) + return sql::INIT_FAILURE; + + // TODO(shess): Failing Begin() implies that something serious is + // wrong with the database. Raze() may be in order. + +#if defined(OS_MACOSX) && !defined(OS_IOS) + // Exclude the thumbnails file from backups. + base::mac::SetFileBackupExclusion(db_name); +#endif + + // thumbnails table has been obsolete for a long time, remove any + // detrious. + ignore_result(db_.Execute("DROP TABLE IF EXISTS thumbnails")); + + // At some point, operations involving temporary tables weren't done + // atomically and users have been stranded. Drop those tables and + // move on. + // TODO(shess): Prove it? Audit all cases and see if it's possible + // that this implies non-atomic update, and should thus be handled + // via the corruption handler. + ignore_result(db_.Execute("DROP TABLE IF EXISTS temp_favicons")); + ignore_result(db_.Execute("DROP TABLE IF EXISTS temp_favicon_bitmaps")); + ignore_result(db_.Execute("DROP TABLE IF EXISTS temp_icon_mapping")); + + // Create the tables. + if (!meta_table_.Init(&db_, kCurrentVersionNumber, + kCompatibleVersionNumber) || + !InitTables(&db_) || + !InitIndices(&db_)) { + return sql::INIT_FAILURE; + } + + // Version check. We should not encounter a database too old for us to handle + // in the wild, so we try to continue in that case. + if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) { + LOG(WARNING) << "Thumbnail database is too new."; + return sql::INIT_TOO_NEW; + } + + int cur_version = meta_table_.GetVersionNumber(); + + if (!db_.DoesColumnExist("favicons", "icon_type")) { + LOG(ERROR) << "Raze because of missing favicon.icon_type"; + RecordInvalidStructure(STRUCTURE_EVENT_VERSION4); + + db_.RazeAndClose(); + return sql::INIT_FAILURE; + } + + if (cur_version < 7 && !db_.DoesColumnExist("favicons", "sizes")) { + LOG(ERROR) << "Raze because of missing favicon.sizes"; + RecordInvalidStructure(STRUCTURE_EVENT_VERSION5); + + db_.RazeAndClose(); + return sql::INIT_FAILURE; + } + + if (cur_version == 5) { + ++cur_version; + if (!UpgradeToVersion6()) + return CantUpgradeToVersion(cur_version); + } + + if (cur_version == 6) { + ++cur_version; + if (!UpgradeToVersion7()) + return CantUpgradeToVersion(cur_version); + } + + LOG_IF(WARNING, cur_version < kCurrentVersionNumber) << + "Thumbnail database version " << cur_version << " is too old to handle."; + + // Initialization is complete. + if (!transaction.Commit()) + return sql::INIT_FAILURE; + + // Raze the database if the structure of the favicons database is not what + // it should be. This error cannot be detected via the SQL error code because + // the error code for running SQL statements against a database with missing + // columns is SQLITE_ERROR which is not unique enough to act upon. + // TODO(pkotwicz): Revisit this in M27 and see if the razing can be removed. + // (crbug.com/166453) + if (IsFaviconDBStructureIncorrect()) { + LOG(ERROR) << "Raze because of invalid favicon db structure."; + RecordInvalidStructure(STRUCTURE_EVENT_FAVICON); + + db_.RazeAndClose(); + return sql::INIT_FAILURE; + } + + return sql::INIT_OK; +} + +sql::InitStatus ThumbnailDatabase::CantUpgradeToVersion(int cur_version) { + LOG(WARNING) << "Unable to update to thumbnail database to version " << + cur_version << "."; + db_.Close(); + return sql::INIT_FAILURE; +} + +bool ThumbnailDatabase::UpgradeToVersion6() { + // Move bitmap data from favicons to favicon_bitmaps. + bool success = + db_.Execute("INSERT INTO favicon_bitmaps (icon_id, last_updated, " + "image_data, width, height)" + "SELECT id, last_updated, image_data, 0, 0 FROM favicons") && + db_.Execute("CREATE TABLE temp_favicons (" + "id INTEGER PRIMARY KEY," + "url LONGVARCHAR NOT NULL," + "icon_type INTEGER DEFAULT 1," + // default icon_type FAVICON to be consistent with + // past migration. + "sizes LONGVARCHAR)") && + db_.Execute("INSERT INTO temp_favicons (id, url, icon_type) " + "SELECT id, url, icon_type FROM favicons") && + db_.Execute("DROP TABLE favicons") && + db_.Execute("ALTER TABLE temp_favicons RENAME TO favicons"); + // NOTE(shess): v7 will re-create the index. + if (!success) + return false; + + meta_table_.SetVersionNumber(6); + meta_table_.SetCompatibleVersionNumber(std::min(6, kCompatibleVersionNumber)); + return true; +} + +bool ThumbnailDatabase::UpgradeToVersion7() { + // Sizes column was never used, remove it. + bool success = + db_.Execute("CREATE TABLE temp_favicons (" + "id INTEGER PRIMARY KEY," + "url LONGVARCHAR NOT NULL," + // default icon_type FAVICON to be consistent with + // past migration. + "icon_type INTEGER DEFAULT 1)") && + db_.Execute("INSERT INTO temp_favicons (id, url, icon_type) " + "SELECT id, url, icon_type FROM favicons") && + db_.Execute("DROP TABLE favicons") && + db_.Execute("ALTER TABLE temp_favicons RENAME TO favicons") && + db_.Execute("CREATE INDEX IF NOT EXISTS favicons_url ON favicons(url)"); + + if (!success) + return false; + + meta_table_.SetVersionNumber(7); + meta_table_.SetCompatibleVersionNumber(std::min(7, kCompatibleVersionNumber)); + return true; +} + +bool ThumbnailDatabase::IsFaviconDBStructureIncorrect() { + return !db_.IsSQLValid("SELECT id, url, icon_type FROM favicons"); +} + +} // namespace history diff --git a/components/history/core/browser/thumbnail_database.h b/components/history/core/browser/thumbnail_database.h new file mode 100644 index 0000000..16a5bd1 --- /dev/null +++ b/components/history/core/browser/thumbnail_database.h @@ -0,0 +1,278 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_HISTORY_CORE_BROWSER_THUMBNAIL_DATABASE_H_ +#define COMPONENTS_HISTORY_CORE_BROWSER_THUMBNAIL_DATABASE_H_ + +#include + +#include "base/gtest_prod_util.h" +#include "base/memory/ref_counted.h" +#include "components/history/core/browser/history_types.h" +#include "sql/connection.h" +#include "sql/init_status.h" +#include "sql/meta_table.h" +#include "sql/statement.h" + +namespace base { +class FilePath; +class RefCountedMemory; +class Time; +} + +namespace history { + +class HistoryClient; + +// This database interface is owned by the history backend and runs on the +// history thread. It is a totally separate component from history partially +// because we may want to move it to its own thread in the future. The +// operations we will do on this database will be slow, but we can tolerate +// higher latency (it's OK for thumbnails to come in slower than the rest +// of the data). Moving this to a separate thread would not block potentially +// higher priority history operations. +class ThumbnailDatabase { + public: + explicit ThumbnailDatabase(HistoryClient* history_client); + ~ThumbnailDatabase(); + + // Must be called after creation but before any other methods are called. + // When not INIT_OK, no other functions should be called. + sql::InitStatus Init(const base::FilePath& db_name); + + // Computes and records various metrics for the database. Should only be + // called once and only upon successful Init. + void ComputeDatabaseMetrics(); + + // Transactions on the database. + void BeginTransaction(); + void CommitTransaction(); + int transaction_nesting() const { + return db_.transaction_nesting(); + } + void RollbackTransaction(); + + // Vacuums the database. This will cause sqlite to defragment and collect + // unused space in the file. It can be VERY SLOW. + void Vacuum(); + + // Try to trim the cache memory used by the database. If |aggressively| is + // true try to trim all unused cache, otherwise trim by half. + void TrimMemory(bool aggressively); + + // Favicon Bitmaps ----------------------------------------------------------- + + // Returns true if there are favicon bitmaps for |icon_id|. If + // |bitmap_id_sizes| is non NULL, sets it to a list of the favicon bitmap ids + // and their associated pixel sizes for the favicon with |icon_id|. + // The list contains results for the bitmaps which are cached in the + // favicon_bitmaps table. The pixel sizes are a subset of the sizes in the + // 'sizes' field of the favicons table for |icon_id|. + bool GetFaviconBitmapIDSizes( + favicon_base::FaviconID icon_id, + std::vector* bitmap_id_sizes); + + // Returns true if there are any matched bitmaps for the given |icon_id|. All + // matched results are returned if |favicon_bitmaps| is not NULL. + bool GetFaviconBitmaps(favicon_base::FaviconID icon_id, + std::vector* favicon_bitmaps); + + // Gets the last updated time, bitmap data, and pixel size of the favicon + // bitmap at |bitmap_id|. Returns true if successful. + bool GetFaviconBitmap(FaviconBitmapID bitmap_id, + base::Time* last_updated, + scoped_refptr* png_icon_data, + gfx::Size* pixel_size); + + // Adds a bitmap component at |pixel_size| for the favicon with |icon_id|. + // Only favicons representing a .ico file should have multiple favicon bitmaps + // per favicon. + // |icon_data| is the png encoded data. + // The |time| indicates the access time, and is used to detect when the + // favicon should be refreshed. + // |pixel_size| is the pixel dimensions of |icon_data|. + // Returns the id of the added bitmap or 0 if unsuccessful. + FaviconBitmapID AddFaviconBitmap( + favicon_base::FaviconID icon_id, + const scoped_refptr& icon_data, + base::Time time, + const gfx::Size& pixel_size); + + // Sets the bitmap data and the last updated time for the favicon bitmap at + // |bitmap_id|. + // Returns true if successful. + bool SetFaviconBitmap(FaviconBitmapID bitmap_id, + scoped_refptr bitmap_data, + base::Time time); + + // Sets the last updated time for the favicon bitmap at |bitmap_id|. + // Returns true if successful. + bool SetFaviconBitmapLastUpdateTime(FaviconBitmapID bitmap_id, + base::Time time); + + // Deletes the favicon bitmap with |bitmap_id|. + // Returns true if successful. + bool DeleteFaviconBitmap(FaviconBitmapID bitmap_id); + + // Favicons ------------------------------------------------------------------ + + // Sets the the favicon as out of date. This will set |last_updated| for all + // of the bitmaps for |icon_id| to be out of date. + bool SetFaviconOutOfDate(favicon_base::FaviconID icon_id); + + // Returns the id of the entry in the favicon database with the specified url + // and icon type. If |required_icon_type| contains multiple icon types and + // there are more than one matched icon in database, only one icon will be + // returned in the priority of TOUCH_PRECOMPOSED_ICON, TOUCH_ICON, and + // FAVICON, and the icon type is returned in icon_type parameter if it is not + // NULL. + // Returns 0 if no entry exists for the specified url. + favicon_base::FaviconID GetFaviconIDForFaviconURL( + const GURL& icon_url, + int required_icon_type, + favicon_base::IconType* icon_type); + + // Gets the icon_url, icon_type and sizes for the specified |icon_id|. + bool GetFaviconHeader(favicon_base::FaviconID icon_id, + GURL* icon_url, + favicon_base::IconType* icon_type); + + // Adds favicon with |icon_url|, |icon_type| and |favicon_sizes| to the + // favicon db, returning its id. + favicon_base::FaviconID AddFavicon(const GURL& icon_url, + favicon_base::IconType icon_type); + + // Adds a favicon with a single bitmap. This call is equivalent to calling + // AddFavicon and AddFaviconBitmap. + favicon_base::FaviconID AddFavicon( + const GURL& icon_url, + favicon_base::IconType icon_type, + const scoped_refptr& icon_data, + base::Time time, + const gfx::Size& pixel_size); + + // Delete the favicon with the provided id. Returns false on failure + bool DeleteFavicon(favicon_base::FaviconID id); + + // Icon Mapping -------------------------------------------------------------- + // + // Returns true if there is a matched icon mapping for the given page and + // icon type. + // The matched icon mapping is returned in the icon_mapping parameter if it is + // not NULL. + + // Returns true if there are icon mappings for the given page and icon types. + // If |required_icon_types| contains multiple icon types and there is more + // than one matched icon type in the database, icons of only a single type + // will be returned in the priority of TOUCH_PRECOMPOSED_ICON, TOUCH_ICON, + // and FAVICON. + // The matched icon mappings are returned in the |mapping_data| parameter if + // it is not NULL. + bool GetIconMappingsForPageURL(const GURL& page_url, + int required_icon_types, + std::vector* mapping_data); + + // Returns true if there is any matched icon mapping for the given page. + // All matched icon mappings are returned in descent order of IconType if + // mapping_data is not NULL. + bool GetIconMappingsForPageURL(const GURL& page_url, + std::vector* mapping_data); + + // Adds a mapping between the given page_url and icon_id. + // Returns the new mapping id if the adding succeeds, otherwise 0 is returned. + IconMappingID AddIconMapping(const GURL& page_url, + favicon_base::FaviconID icon_id); + + // Updates the page and icon mapping for the given mapping_id with the given + // icon_id. + // Returns true if the update succeeded. + bool UpdateIconMapping(IconMappingID mapping_id, + favicon_base::FaviconID icon_id); + + // Deletes the icon mapping entries for the given page url. + // Returns true if the deletion succeeded. + bool DeleteIconMappings(const GURL& page_url); + + // Deletes the icon mapping with |mapping_id|. + // Returns true if the deletion succeeded. + bool DeleteIconMapping(IconMappingID mapping_id); + + // Checks whether a favicon is used by any URLs in the database. + bool HasMappingFor(favicon_base::FaviconID id); + + // Clones the existing mappings from |old_page_url| if |new_page_url| has no + // mappings. Otherwise, will leave mappings alone. + bool CloneIconMappings(const GURL& old_page_url, const GURL& new_page_url); + + // The class to enumerate icon mappings. Use InitIconMappingEnumerator to + // initialize. + class IconMappingEnumerator { + public: + IconMappingEnumerator(); + ~IconMappingEnumerator(); + + // Get the next icon mapping, return false if no more are available. + bool GetNextIconMapping(IconMapping* icon_mapping); + + private: + friend class ThumbnailDatabase; + + // Used to query database and return the data for filling IconMapping in + // each call of GetNextIconMapping(). + sql::Statement statement_; + + DISALLOW_COPY_AND_ASSIGN(IconMappingEnumerator); + }; + + // Return all icon mappings of the given |icon_type|. + bool InitIconMappingEnumerator(favicon_base::IconType type, + IconMappingEnumerator* enumerator); + + // Remove all data except that associated with the passed page urls. + // Returns false in case of failure. A nested transaction is used, + // so failure causes any outer transaction to be rolled back. + bool RetainDataForPageUrls(const std::vector& urls_to_keep); + + private: + FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, RetainDataForPageUrls); + FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, Version3); + FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, Version4); + FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, Version5); + FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, Version6); + FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, Version7); + FRIEND_TEST_ALL_PREFIXES(ThumbnailDatabaseTest, WildSchema); + + // Open database on a given filename. If the file does not exist, + // it is created. + // |db| is the database to open. + // |db_name| is a path to the database file. + sql::InitStatus OpenDatabase(sql::Connection* db, + const base::FilePath& db_name); + + // Helper function to implement internals of Init(). This allows + // Init() to retry in case of failure, since some failures run + // recovery code. + sql::InitStatus InitImpl(const base::FilePath& db_name); + + // Helper function to handle cleanup on upgrade failures. + sql::InitStatus CantUpgradeToVersion(int cur_version); + + // Adds support for size in favicons table. + bool UpgradeToVersion6(); + + // Removes sizes column. + bool UpgradeToVersion7(); + + // Returns true if the |favicons| database is missing a column. + bool IsFaviconDBStructureIncorrect(); + + sql::Connection db_; + sql::MetaTable meta_table_; + + HistoryClient* history_client_; +}; + +} // namespace history + +#endif // COMPONENTS_HISTORY_CORE_BROWSER_THUMBNAIL_DATABASE_H_ diff --git a/components/history/core/browser/top_sites_cache.cc b/components/history/core/browser/top_sites_cache.cc new file mode 100644 index 0000000..647dd01 --- /dev/null +++ b/components/history/core/browser/top_sites_cache.cc @@ -0,0 +1,182 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/top_sites_cache.h" + +#include "base/logging.h" +#include "base/memory/ref_counted_memory.h" + +namespace history { + +TopSitesCache::CanonicalURLQuery::CanonicalURLQuery(const GURL& url) { + most_visited_url_.redirects.push_back(url); + entry_.first = &most_visited_url_; + entry_.second = 0u; +} + +TopSitesCache::CanonicalURLQuery::~CanonicalURLQuery() { +} + +TopSitesCache::TopSitesCache() { + clear_query_ref_.ClearQuery(); + clear_query_ref_.ClearRef(); + clear_path_query_ref_.ClearQuery(); + clear_path_query_ref_.ClearRef(); + clear_path_query_ref_.ClearPath(); +} + +TopSitesCache::~TopSitesCache() { +} + +void TopSitesCache::SetTopSites(const MostVisitedURLList& top_sites) { + top_sites_ = top_sites; + CountForcedURLs(); + GenerateCanonicalURLs(); +} + +void TopSitesCache::SetThumbnails(const URLToImagesMap& images) { + images_ = images; +} + +Images* TopSitesCache::GetImage(const GURL& url) { + return &images_[GetCanonicalURL(url)]; +} + +bool TopSitesCache::GetPageThumbnail( + const GURL& url, + scoped_refptr* bytes) const { + std::map::const_iterator found = + images_.find(GetCanonicalURL(url)); + if (found != images_.end()) { + base::RefCountedMemory* data = found->second.thumbnail.get(); + if (data) { + *bytes = data; + return true; + } + } + return false; +} + +bool TopSitesCache::GetPageThumbnailScore(const GURL& url, + ThumbnailScore* score) const { + std::map::const_iterator found = + images_.find(GetCanonicalURL(url)); + if (found != images_.end()) { + *score = found->second.thumbnail_score; + return true; + } + return false; +} + +const GURL& TopSitesCache::GetCanonicalURL(const GURL& url) const { + CanonicalURLs::const_iterator it = GetCanonicalURLsIterator(url); + return it == canonical_urls_.end() ? url : it->first.first->url; +} + +GURL TopSitesCache::GetGeneralizedCanonicalURL(const GURL& url) const { + CanonicalURLs::const_iterator it_hi = + canonical_urls_.lower_bound(CanonicalURLQuery(url).entry()); + if (it_hi != canonical_urls_.end()) { + // Test match ignoring "?query#ref". This also handles exact match. + if (url.ReplaceComponents(clear_query_ref_) == + GetURLFromIterator(it_hi).ReplaceComponents(clear_query_ref_)) { + return it_hi->first.first->url; + } + } + // Everything on or after |it_hi| is irrelevant. + + GURL base_url(url.ReplaceComponents(clear_path_query_ref_)); + CanonicalURLs::const_iterator it_lo = + canonical_urls_.lower_bound(CanonicalURLQuery(base_url).entry()); + if (it_lo == canonical_urls_.end()) + return GURL::EmptyGURL(); + GURL compare_url_lo(GetURLFromIterator(it_lo)); + if (!HaveSameSchemeHostAndPort(base_url, compare_url_lo) || + !IsPathPrefix(base_url.path(), compare_url_lo.path())) { + return GURL::EmptyGURL(); + } + // Everything before |it_lo| is irrelevant. + + // Search in [|it_lo|, |it_hi|) in reversed order. The first URL found that's + // a prefix of |url| (ignoring "?query#ref") would be returned. + for (CanonicalURLs::const_iterator it = it_hi; it != it_lo;) { + --it; + GURL compare_url(GetURLFromIterator(it)); + DCHECK(HaveSameSchemeHostAndPort(compare_url, url)); + if (IsPathPrefix(compare_url.path(), url.path())) + return it->first.first->url; + } + + return GURL::EmptyGURL(); +} + +bool TopSitesCache::IsKnownURL(const GURL& url) const { + return GetCanonicalURLsIterator(url) != canonical_urls_.end(); +} + +size_t TopSitesCache::GetURLIndex(const GURL& url) const { + DCHECK(IsKnownURL(url)); + return GetCanonicalURLsIterator(url)->second; +} + +size_t TopSitesCache::GetNumNonForcedURLs() const { + return top_sites_.size() - num_forced_urls_; +} + +size_t TopSitesCache::GetNumForcedURLs() const { + return num_forced_urls_; +} + +void TopSitesCache::CountForcedURLs() { + num_forced_urls_ = 0; + while (num_forced_urls_ < top_sites_.size()) { + // Forced sites are all at the beginning. + if (top_sites_[num_forced_urls_].last_forced_time.is_null()) + break; + num_forced_urls_++; + } +#if DCHECK_IS_ON + // In debug, ensure the cache user has no forced URLs pass that point. + for (size_t i = num_forced_urls_; i < top_sites_.size(); ++i) { + DCHECK(top_sites_[i].last_forced_time.is_null()) + << "All the forced URLs must appear before non-forced URLs."; + } +#endif +} + +void TopSitesCache::GenerateCanonicalURLs() { + canonical_urls_.clear(); + for (size_t i = 0; i < top_sites_.size(); i++) + StoreRedirectChain(top_sites_[i].redirects, i); +} + +void TopSitesCache::StoreRedirectChain(const RedirectList& redirects, + size_t destination) { + // |redirects| is empty if the user pinned a site and there are not enough top + // sites before the pinned site. + + // Map all the redirected URLs to the destination. + for (size_t i = 0; i < redirects.size(); i++) { + // If this redirect is already known, don't replace it with a new one. + if (!IsKnownURL(redirects[i])) { + CanonicalURLEntry entry; + entry.first = &(top_sites_[destination]); + entry.second = i; + canonical_urls_[entry] = destination; + } + } +} + +TopSitesCache::CanonicalURLs::const_iterator + TopSitesCache::GetCanonicalURLsIterator(const GURL& url) const { + return canonical_urls_.find(CanonicalURLQuery(url).entry()); +} + +const GURL& TopSitesCache::GetURLFromIterator( + CanonicalURLs::const_iterator it) const { + DCHECK(it != canonical_urls_.end()); + return it->first.first->redirects[it->first.second]; +} + +} // namespace history diff --git a/components/history/core/browser/top_sites_cache.h b/components/history/core/browser/top_sites_cache.h new file mode 100644 index 0000000..b88b51c --- /dev/null +++ b/components/history/core/browser/top_sites_cache.h @@ -0,0 +1,170 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_HISTORY_CORE_BROWSER_TOP_SITES_CACHE_H_ +#define COMPONENTS_HISTORY_CORE_BROWSER_TOP_SITES_CACHE_H_ + +#include +#include + +#include "base/memory/ref_counted.h" +#include "components/history/core/browser/history_types.h" +#include "components/history/core/browser/url_utils.h" +#include "url/gurl.h" + +class GURL; + +namespace history { + +// TopSiteCache caches thumbnails for visited pages. Retrieving thumbnails from +// a given input URL is a two-stage process: +// +// input URL --(map 1)--> canonical URL --(map 2)--> image. +// +// (map 1) searches for an URL in |canonical_urls_| that "matches" (see below) +// input URL. If found, canonical URL assigned to the result. Otherwise the +// input URL is considered to already be a canonical URL. +// +// (map 2) simply looks up canonical URL in |images_|. +// +// The rule to "match" URL in |canonical_urls_| always favors exact match. +// - In GetCanonicalURL(), exact match is the only case examined. +// - In GetGeneralizedCanonicalURL(), we also perform "generalized" URL matches, +// i.e., stored URLs in |canonical_urls_| that are prefixes of input URL, +// ignoring "?query#ref". +// For the latter two "URL prefix matches", we prefer the match that is closest +// to input URL, w.r.t. path hierarchy. + +// TopSitesCache caches the top sites and thumbnails for TopSites. +class TopSitesCache { + public: + TopSitesCache(); + ~TopSitesCache(); + + // Set the top sites. In |top_sites| all forced URLs must appear before + // non-forced URLs. This is only checked in debug. + void SetTopSites(const MostVisitedURLList& top_sites); + const MostVisitedURLList& top_sites() const { return top_sites_; } + + // The thumbnails. + void SetThumbnails(const URLToImagesMap& images); + const URLToImagesMap& images() const { return images_; } + + // Returns the thumbnail as an Image for the specified url. This adds an entry + // for |url| if one has not yet been added. + Images* GetImage(const GURL& url); + + // Fetches the thumbnail for the specified url. Returns true if there is a + // thumbnail for the specified url. It is possible for a URL to be in TopSites + // but not have an thumbnail. + bool GetPageThumbnail(const GURL& url, + scoped_refptr* bytes) const; + + // Fetches the thumbnail score for the specified url. Returns true if + // there is a thumbnail score for the specified url. + bool GetPageThumbnailScore(const GURL& url, ThumbnailScore* score) const; + + // Returns the canonical URL for |url|. + const GURL& GetCanonicalURL(const GURL& url) const; + + // Searches for a URL in |canonical_urls_| that is a URL prefix of |url|. + // Prefers an exact match if it exists, or the least generalized match while + // ignoring "?query#ref". Returns the resulting canonical URL if match is + // found, otherwise returns an empty GURL. + GURL GetGeneralizedCanonicalURL(const GURL& url) const; + + // Returns true if |url| is known. + bool IsKnownURL(const GURL& url) const; + + // Returns the index into |top_sites_| for |url|. + size_t GetURLIndex(const GURL& url) const; + + // Returns the number of non-forced URLs in the cache. + size_t GetNumNonForcedURLs() const; + + // Returns the number of forced URLs in the cache. + size_t GetNumForcedURLs() const; + + private: + // The entries in CanonicalURLs, see CanonicalURLs for details. The second + // argument gives the index of the URL into MostVisitedURLs redirects. + typedef std::pair CanonicalURLEntry; + + // Comparator used for CanonicalURLs. + class CanonicalURLComparator { + public: + bool operator()(const CanonicalURLEntry& e1, + const CanonicalURLEntry& e2) const { + return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(), + e2.first->redirects[e2.second].spec()); + } + }; + + // Creates the object needed to form std::map queries into |canonical_urls_|, + // wrapping all required temporary data to allow inlining. + class CanonicalURLQuery { + public: + explicit CanonicalURLQuery(const GURL& url); + ~CanonicalURLQuery(); + const CanonicalURLEntry& entry() { return entry_; } + + private: + MostVisitedURL most_visited_url_; + CanonicalURLEntry entry_; + }; + + // This is used to map from redirect url to the MostVisitedURL the redirect is + // from. Ideally this would be map (second param indexing into + // top_sites_), but this results in duplicating all redirect urls. As some + // sites have a lot of redirects, we instead use the MostVisitedURL* and the + // index of the redirect as the key, and the index into top_sites_ as the + // value. This way we aren't duplicating GURLs. CanonicalURLComparator + // enforces the ordering as if we were using GURLs. + typedef std::map CanonicalURLs; + + // Count the number of forced URLs. + void CountForcedURLs(); + + // Generates the set of canonical urls from |top_sites_|. + void GenerateCanonicalURLs(); + + // Stores a set of redirects. This is used by GenerateCanonicalURLs. + void StoreRedirectChain(const RedirectList& redirects, size_t destination); + + // Returns the iterator into |canonical_urls_| for the |url|. + CanonicalURLs::const_iterator GetCanonicalURLsIterator(const GURL& url) const; + + // Returns the GURL corresponding to an iterator in |canonical_urls_|. + const GURL& GetURLFromIterator(CanonicalURLs::const_iterator it) const; + + // The number of top sites with forced URLs. + size_t num_forced_urls_; + + // The top sites. This list must always contain the forced URLs first followed + // by the non-forced URLs. This is not strictly enforced but is checked in + // debug. + MostVisitedURLList top_sites_; + + // The images. These map from canonical url to image. + URLToImagesMap images_; + + // Generated from the redirects to and from the most visited pages. See + // description above typedef for details. + CanonicalURLs canonical_urls_; + + // Helper to clear "?query#ref" from any GURL. This is set in the constructor + // and never modified after. + GURL::Replacements clear_query_ref_; + + // Helper to clear "/path?query#ref" from any GURL. This is set in the + // constructor and never modified after. + GURL::Replacements clear_path_query_ref_; + + DISALLOW_COPY_AND_ASSIGN(TopSitesCache); +}; + +} // namespace history + +#endif // COMPONENTS_HISTORY_CORE_BROWSER_TOP_SITES_CACHE_H_ diff --git a/components/history/core/browser/top_sites_cache_unittest.cc b/components/history/core/browser/top_sites_cache_unittest.cc new file mode 100644 index 0000000..30e7aeb --- /dev/null +++ b/components/history/core/browser/top_sites_cache_unittest.cc @@ -0,0 +1,258 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/top_sites_cache.h" + +#include + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/strings/string16.h" +#include "base/strings/string_number_conversions.h" +#include "base/strings/utf_string_conversions.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace history { + +namespace { + +class TopSitesCacheTest : public testing::Test { + public: + TopSitesCacheTest() { + } + + protected: + // Initializes |top_sites_| on |spec|, which is a list of URL strings with + // optional indents: indentated URLs redirect to the last non-indented URL. + // Titles are assigned as "Title 1", "Title 2", etc., in the order of + // appearance. See |kTopSitesSpecBasic| for an example. This function does not + // update |cache_| so you can manipulate |top_sites_| before you update it. + void BuildTopSites(const char** spec, size_t size); + + // Initializes |top_sites_| and |cache_| based on |spec|. + void InitTopSiteCache(const char** spec, size_t size); + + MostVisitedURLList top_sites_; + TopSitesCache cache_; + + private: + DISALLOW_COPY_AND_ASSIGN(TopSitesCacheTest); +}; + +void TopSitesCacheTest::BuildTopSites(const char** spec, size_t size) { + std::set urls_seen; + for (size_t i = 0; i < size; ++i) { + const char* spec_item = spec[i]; + while (*spec_item && *spec_item == ' ') // Eat indent. + ++spec_item; + if (urls_seen.find(spec_item) != urls_seen.end()) + NOTREACHED() << "Duplicate URL found: " << spec_item; + urls_seen.insert(spec_item); + if (spec_item == spec[i]) { // No indent: add new MostVisitedURL. + base::string16 title(base::ASCIIToUTF16("Title ") + + base::Uint64ToString16(top_sites_.size() + 1)); + top_sites_.push_back(MostVisitedURL(GURL(spec_item), title)); + } + ASSERT_TRUE(!top_sites_.empty()); + // Set up redirect to canonical URL. Canonical URL redirects to itself, too. + top_sites_.back().redirects.push_back(GURL(spec_item)); + } +} + +void TopSitesCacheTest::InitTopSiteCache(const char** spec, size_t size) { + BuildTopSites(spec, size); + cache_.SetTopSites(top_sites_); +} + +const char* kTopSitesSpecBasic[] = { + "http://www.google.com", + " http://www.gogle.com", // Redirects. + " http://www.gooogle.com", // Redirects. + "http://www.youtube.com/a/b", + " http://www.youtube.com/a/b?test=1", // Redirects. + "https://www.google.com/", + " https://www.gogle.com", // Redirects. + "http://www.example.com:3141/", +}; + +TEST_F(TopSitesCacheTest, GetCanonicalURL) { + InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic)); + struct { + const char* expected; + const char* query; + } test_cases[] = { + // Already is canonical: redirects. + {"http://www.google.com/", "http://www.google.com"}, + // Exact match with stored URL: redirects. + {"http://www.google.com/", "http://www.gooogle.com"}, + // Recognizes despite trailing "/": redirects + {"http://www.google.com/", "http://www.gooogle.com/"}, + // Exact match with URL with query: redirects. + {"http://www.youtube.com/a/b", "http://www.youtube.com/a/b?test=1"}, + // No match with URL with query: as-is. + {"http://www.youtube.com/a/b?test", "http://www.youtube.com/a/b?test"}, + // Never-seen-before URL: as-is. + {"http://maps.google.com/", "http://maps.google.com/"}, + // Changing port number, does not match: as-is. + {"http://www.example.com:1234/", "http://www.example.com:1234"}, + // Smart enough to know that port 80 is HTTP: redirects. + {"http://www.google.com/", "http://www.gooogle.com:80"}, + // Prefix should not work: as-is. + {"http://www.youtube.com/a", "http://www.youtube.com/a"}, + }; + for (size_t i = 0; i < arraysize(test_cases); ++i) { + std::string expected(test_cases[i].expected); + std::string query(test_cases[i].query); + EXPECT_EQ(expected, cache_.GetCanonicalURL(GURL(query)).spec()) + << " for test_case[" << i << "]"; + } +} + +TEST_F(TopSitesCacheTest, IsKnownUrl) { + InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic)); + // Matches. + EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com"))); + EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.gooogle.com"))); + EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com/"))); + + // Non-matches. + EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com?"))); + EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.net"))); + EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com/stuff"))); + EXPECT_FALSE(cache_.IsKnownURL(GURL("https://www.gooogle.com"))); + EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.youtube.com/a"))); +} + +const char* kTopSitesSpecPrefix[] = { + "http://www.google.com/", + " http://www.google.com/test?q=3", // Redirects. + " http://www.google.com/test/y?d", // Redirects. + " http://www.chromium.org/a/b", // Redirects. + "http://www.google.com/2", + " http://www.google.com/test/q", // Redirects. + " http://www.google.com/test/y?b", // Redirects. + "http://www.google.com/3", + " http://www.google.com/testing", // Redirects. + "http://www.google.com/test-hyphen", + "http://www.google.com/sh", + " http://www.google.com/sh/1/2/3", // Redirects. + "http://www.google.com/sh/1", +}; + +TEST_F(TopSitesCacheTest, GetCanonicalURLExactMatch) { + InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix)); + for (size_t i = 0; i < arraysize(kTopSitesSpecPrefix); ++i) { + // Go through each entry in kTopSitesSpecPrefix, trimming space. + const char* s = kTopSitesSpecPrefix[i]; + while (*s && *s == ' ') + ++s; + // Get the answer from direct lookup. + GURL stored_url(s); + GURL expected(cache_.GetCanonicalURL(stored_url)); + // Test generalization. + GURL result(cache_.GetGeneralizedCanonicalURL(stored_url)); + EXPECT_EQ(expected, result) << " for kTopSitesSpecPrefix[" << i << "]"; + } +} + +TEST_F(TopSitesCacheTest, GetGeneralizedCanonicalURL) { + InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix)); + struct { + const char* expected; + const char* query; + } test_cases[] = { + // Exact match after trimming "?query": redirects. + {"http://www.google.com/", "http://www.google.com/test"}, + // Same, but different code path: redirects. + {"http://www.google.com/", "http://www.google.com/test/y?e"}, + {"http://www.google.com/", "http://www.google.com/test/y?c"}, + // Same, but code path leads to different result: redirects. + {"http://www.google.com/2", "http://www.google.com/test/y?a"}, + // Generalized match: redirects. + {"http://www.google.com/3", "http://www.google.com/3/1/4/1/5/9"}, + // Generalized match with trailing "/": redirects. + {"http://www.google.com/3", "http://www.google.com/3/1/4/1/5/9/"}, + // Unique generalization match: redirects. + {"http://www.google.com/", "http://www.chromium.org/a/b/c"}, + // Multiple exact matches after trimming: redirects to first. + {"http://www.google.com/2", "http://www.google.com/test/y"}, + // Multiple generalized matches: redirects to least general. + {"http://www.google.com/sh", "http://www.google.com/sh/1/2/3/4/"}, + // Multiple generalized matches: redirects to least general. + {"http://www.google.com/sh", "http://www.google.com/sh/1/2/3/4/"}, + // Competing generalized match: take the most specilized. + {"http://www.google.com/2", "http://www.google.com/test/q"}, + // No generalized match, early element: fails. + {"", "http://www.a.com/"}, + // No generalized match, intermediate element: fails. + {"", "http://www.e-is-between-chromium-and-google.com/"}, + // No generalized match, late element: fails. + {"", "http://www.zzzzzzz.com/"}, + // String prefix match but not URL-prefix match: fails. + {"", "http://www.chromium.org/a/beeswax"}, + // String prefix match and URL-prefix match: redirects. + {"http://www.google.com/", "http://www.google.com/shhhhhh"}, + // Different protocol: fails. + {"", "https://www.google.com/test"}, + // Smart enough to know that port 80 is HTTP: redirects. + {"http://www.google.com/", "http://www.google.com:80/test"}, + // Specialized match only: fails. + {"", "http://www.chromium.org/a"}, + }; + for (size_t i = 0; i < arraysize(test_cases); ++i) { + std::string expected(test_cases[i].expected); + std::string query(test_cases[i].query); + GURL result(cache_.GetGeneralizedCanonicalURL(GURL(query))); + EXPECT_EQ(expected, result.spec()) << " for test_case[" << i << "]"; + } +} + +// This tests a special case where there are 2 generalized matches, and both +// should be checked to find the correct match. +TEST_F(TopSitesCacheTest, GetPrefixCanonicalURLDiffByQuery) { + const char* top_sites_spec[] = { + "http://www.dest.com/1", + " http://www.source.com/a?m=5", // Redirects. + "http://www.dest.com/2", + " http://www.source.com/a/t?q=3", // Redirects. + }; + InitTopSiteCache(top_sites_spec, arraysize(top_sites_spec)); + + struct { + const char* expected; + const char* query; + } test_cases[] = { + // Slightly before "http://www.source.com/a?m=5". + {"http://www.dest.com/1", "http://www.source.com/a?l=5"}, + // Slightly after "http://www.source.com/a?m=5". + {"http://www.dest.com/1", "http://www.source.com/a?n=5"}, + // Slightly before "http://www.source.com/a/t?q=3". + {"http://www.dest.com/2", "http://www.source.com/a/t?q=2"}, + // Slightly after "http://www.source.com/a/t?q=3". + {"http://www.dest.com/2", "http://www.source.com/a/t?q=4"}, + }; + + for (size_t i = 0; i < arraysize(test_cases); ++i) { + std::string expected(test_cases[i].expected); + std::string query(test_cases[i].query); + GURL result(cache_.GetGeneralizedCanonicalURL(GURL(query))); + EXPECT_EQ(expected, result.spec()) << " for test_case[" << i << "]"; + } +} + +// This test ensures forced URLs behave in the expected way. +TEST_F(TopSitesCacheTest, CacheForcedURLs) { + // Forced URLs must always appear at the beginning of the list. + BuildTopSites(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic)); + top_sites_[0].last_forced_time = base::Time::FromJsTime(1000); + top_sites_[1].last_forced_time = base::Time::FromJsTime(2000); + cache_.SetTopSites(top_sites_); + + EXPECT_EQ(2u, cache_.GetNumForcedURLs()); + EXPECT_EQ(2u, cache_.GetNumNonForcedURLs()); +} + +} // namespace + +} // namespace history diff --git a/components/history/core/browser/url_utils.cc b/components/history/core/browser/url_utils.cc new file mode 100644 index 0000000..2f4a1f6 --- /dev/null +++ b/components/history/core/browser/url_utils.cc @@ -0,0 +1,88 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/url_utils.h" + +#include + +#include "url/gurl.h" + +namespace history { + +namespace { + +// Comparator to enforce '\0' < '?' < '#' < '/' < other characters. +int GetURLCharPriority(char ch) { + switch (ch) { + case '\0': return 0; + case '?': return 1; + case '#': return 2; + case '/': return 3; + } + return 4; +} + +} // namespace + +// Instead of splitting URLs and extract path components, we can implement +// CanonicalURLStringCompare() using string operations only. The key idea is, +// treating '/' to be less than any valid path characters would make it behave +// as a separator, so e.g., "test" < "test-case" would be enforced by +// "test/..." < "test-case/...". We also force "?" < "/", so "test?query" < +// "test/stuff". Since the routine is merely lexicographical string comparison +// with remapping of character ordering, so it is a valid strict-weak ordering. +bool CanonicalURLStringCompare(const std::string& s1, const std::string& s2) { + const std::string::value_type* ch1 = s1.c_str(); + const std::string::value_type* ch2 = s2.c_str(); + while (*ch1 && *ch2 && *ch1 == *ch2) { + ++ch1; + ++ch2; + } + int pri_diff = GetURLCharPriority(*ch1) - GetURLCharPriority(*ch2); + // We want false to be returned if |pri_diff| > 0. + return (pri_diff != 0) ? pri_diff < 0 : *ch1 < *ch2; +} + +bool HaveSameSchemeHostAndPort(const GURL&url1, const GURL& url2) { + return url1.scheme() == url2.scheme() && url1.host() == url2.host() && + url1.port() == url2.port(); +} + +bool IsPathPrefix(const std::string& p1, const std::string& p2) { + if (p1.length() > p2.length()) + return false; + std::pair + first_diff = std::mismatch(p1.begin(), p1.end(), p2.begin()); + // Necessary condition: |p1| is a string prefix of |p2|. + if (first_diff.first != p1.end()) + return false; // E.g.: (|p1| = "/test", |p2| = "/exam") => false. + + // |p1| is string prefix. + if (first_diff.second == p2.end()) // Is exact match? + return true; // E.g.: ("/test", "/test") => true. + // |p1| is strict string prefix, check full match of last path component. + if (!p1.empty() && *p1.rbegin() == '/') // Ends in '/'? + return true; // E.g.: ("/test/", "/test/stuff") => true. + + // Finally, |p1| does not end in "/": check first extra character in |p2|. + // E.g.: ("/test", "/test/stuff") => true; ("/test", "/testing") => false. + return *(first_diff.second) == '/'; +} + +GURL ToggleHTTPAndHTTPS(const GURL& url) { + std::string new_scheme; + if (url.SchemeIs("http")) + new_scheme = "https"; + else if (url.SchemeIs("https")) + new_scheme = "http"; + else + return GURL::EmptyGURL(); + url::Component comp; + comp.len = new_scheme.length(); + GURL::Replacements replacement; + replacement.SetScheme(new_scheme.c_str(), comp); + return url.ReplaceComponents(replacement); +} + +} // namespace history diff --git a/components/history/core/browser/url_utils.h b/components/history/core/browser/url_utils.h new file mode 100644 index 0000000..e60eb4f --- /dev/null +++ b/components/history/core/browser/url_utils.h @@ -0,0 +1,46 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_HISTORY_CORE_BROWSER_URL_UTILS_H_ +#define COMPONENTS_HISTORY_CORE_BROWSER_URL_UTILS_H_ + +#include + +class GURL; + +namespace history { + +// CanonicalURLStringCompare performs lexicographical comparison of two strings +// that represent valid URLs, so that if the pre-path (scheme, host, and port) +// parts are equal, then the path parts are compared by treating path components +// (delimited by "/") as separate tokens that form units of comparison. +// For example, let us compare |s1| and |s2|, with +// |s1| = "http://www.google.com:80/base/test/ab/cd?query/stuff" +// |s2| = "http://www.google.com:80/base/test-case/yz#ref/stuff" +// The pre-path parts "http://www.google.com:80/" match. We treat the paths as +// |s1| => ["base", "test", "ab", "cd"] +// |s2| => ["base", "test-case", "yz"] +// Components 1 "base" are identical. Components 2 yield "test" < "test-case", +// so we consider |s1| < |s2|, and return true. Note that naive string +// comparison would yield the opposite (|s1| > |s2|), since '/' > '-' in ASCII. +// Note that path can be terminated by "?query" or "#ref". The post-path parts +// are compared in an arbitrary (but consistent) way. +bool CanonicalURLStringCompare(const std::string& s1, const std::string& s2); + +// Returns whether |url1| and |url2| have the same scheme, host, and port. +bool HaveSameSchemeHostAndPort(const GURL&url1, const GURL& url2); + +// Treats |path1| and |path2| as lists of path components (e.g., ["a", "bb"] +// for "/a/bb"). Returns whether |path1|'s list is a prefix of |path2|'s list. +// This is used to define "URL prefix". Note that "test" does not count as a +// prefix of "testing", even though "test" is a (string) prefix of "testing". +bool IsPathPrefix(const std::string& p1, const std::string& p2); + +// Converts |url| from HTTP to HTTPS, and vice versa, then returns the result. +// If |url| is neither HTTP nor HTTPS, returns an empty URL. +GURL ToggleHTTPAndHTTPS(const GURL& url); + +} // namespace history + +#endif // COMPONENTS_HISTORY_CORE_BROWSER_URL_UTILS_H_ diff --git a/components/history/core/browser/url_utils_unittest.cc b/components/history/core/browser/url_utils_unittest.cc new file mode 100644 index 0000000..184bd66 --- /dev/null +++ b/components/history/core/browser/url_utils_unittest.cc @@ -0,0 +1,131 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/url_utils.h" + +#include "testing/gtest/include/gtest/gtest.h" +#include "url/gurl.h" + +namespace history { + +namespace { + +TEST(HistoryUrlUtilsTest, CanonicalURLStringCompare) { + // Comprehensive test by comparing each pair in sorted list. O(n^2). + const char* sorted_list[] = { + "http://www.gogle.com/redirects_to_google", + "http://www.google.com", + "http://www.google.com/", + "http://www.google.com/?q", + "http://www.google.com/A", + "http://www.google.com/index.html", + "http://www.google.com/test", + "http://www.google.com/test?query", + "http://www.google.com/test?r=3", + "http://www.google.com/test#hash", + "http://www.google.com/test/?query", + "http://www.google.com/test/#hash", + "http://www.google.com/test/zzzzz", + "http://www.google.com/test$dollar", + "http://www.google.com/test%E9%9B%80", + "http://www.google.com/test-case", + "http://www.google.com:80/", + "https://www.google.com", + }; + for (size_t i = 0; i < arraysize(sorted_list); ++i) { + EXPECT_FALSE(CanonicalURLStringCompare(sorted_list[i], sorted_list[i])) + << " for \"" << sorted_list[i] << "\" < \"" << sorted_list[i] << "\""; + // Every disjoint pair-wise comparison. + for (size_t j = i + 1; j < arraysize(sorted_list); ++j) { + EXPECT_TRUE(CanonicalURLStringCompare(sorted_list[i], sorted_list[j])) + << " for \"" << sorted_list[i] << "\" < \"" << sorted_list[j] << "\""; + EXPECT_FALSE(CanonicalURLStringCompare(sorted_list[j], sorted_list[i])) + << " for \"" << sorted_list[j] << "\" < \"" << sorted_list[i] << "\""; + } + } +} + +TEST(HistoryUrlUtilsTest, HaveSameSchemeHostAndPort) { + struct { + const char* s1; + const char* s2; + } true_cases[] = { + {"http://www.google.com", "http://www.google.com"}, + {"http://www.google.com/a/b", "http://www.google.com/a/b"}, + {"http://www.google.com?test=3", "http://www.google.com/"}, + {"http://www.google.com/#hash", "http://www.google.com/?q"}, + {"http://www.google.com/", "http://www.google.com/test/with/dir/"}, + {"http://www.google.com:360", "http://www.google.com:360/?q=1234"}, + {"http://www.google.com:80", "http://www.google.com/gurl/is/smart"}, + {"http://www.google.com/test", "http://www.google.com/test/with/dir/"}, + {"http://www.google.com/test?", "http://www.google.com/test/with/dir/"}, + }; + for (size_t i = 0; i < arraysize(true_cases); ++i) { + EXPECT_TRUE(HaveSameSchemeHostAndPort(GURL(true_cases[i].s1), + GURL(true_cases[i].s2))) + << " for true_cases[" << i << "]"; + } + struct { + const char* s1; + const char* s2; + } false_cases[] = { + {"http://www.google.co", "http://www.google.com"}, + {"http://google.com", "http://www.google.com"}, + {"http://www.google.com", "https://www.google.com"}, + {"http://www.google.com/path", "http://www.google.com:137/path"}, + {"http://www.google.com/same/dir", "http://www.youtube.com/same/dir"}, + }; + for (size_t i = 0; i < arraysize(false_cases); ++i) { + EXPECT_FALSE(HaveSameSchemeHostAndPort(GURL(false_cases[i].s1), + GURL(false_cases[i].s2))) + << " for false_cases[" << i << "]"; + } +} + +TEST(HistoryUrlUtilsTest, IsPathPrefix) { + struct { + const char* p1; + const char* p2; + } true_cases[] = { + {"", ""}, + {"", "/"}, + {"/", "/"}, + {"/a/b", "/a/b"}, + {"/", "/test/with/dir/"}, + {"/test", "/test/with/dir/"}, + {"/test/", "/test/with/dir"}, + }; + for (size_t i = 0; i < arraysize(true_cases); ++i) { + EXPECT_TRUE(IsPathPrefix(true_cases[i].p1, true_cases[i].p2)) + << " for true_cases[" << i << "]"; + } + struct { + const char* p1; + const char* p2; + } false_cases[] = { + {"/test", ""}, + {"/", ""}, // Arguable. + {"/a/b/", "/a/b"}, // Arguable. + {"/te", "/test"}, + {"/test", "/test-bed"}, + {"/test-", "/test"}, + }; + for (size_t i = 0; i < arraysize(false_cases); ++i) { + EXPECT_FALSE(IsPathPrefix(false_cases[i].p1, false_cases[i].p2)) + << " for false_cases[" << i << "]"; + } +} + +TEST(HistoryUrlUtilsTest, ToggleHTTPAndHTTPS) { + EXPECT_EQ(GURL("http://www.google.com/test?q#r"), + ToggleHTTPAndHTTPS(GURL("https://www.google.com/test?q#r"))); + EXPECT_EQ(GURL("https://www.google.com:137/"), + ToggleHTTPAndHTTPS(GURL("http://www.google.com:137/"))); + EXPECT_EQ(GURL::EmptyGURL(), + ToggleHTTPAndHTTPS(GURL("ftp://www.google.com/"))); +} + +} // namespace + +} // namespace history diff --git a/components/history/core/browser/visit_database.cc b/components/history/core/browser/visit_database.cc new file mode 100644 index 0000000..2a9234b --- /dev/null +++ b/components/history/core/browser/visit_database.cc @@ -0,0 +1,625 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/visit_database.h" + +#include +#include +#include +#include + +#include "base/logging.h" +#include "base/strings/string_number_conversions.h" +#include "components/history/core/browser/url_database.h" +#include "components/history/core/browser/visit_filter.h" +#include "sql/statement.h" +#include "ui/base/page_transition_types.h" +#include "url/url_constants.h" + +namespace history { + +VisitDatabase::VisitDatabase() { +} + +VisitDatabase::~VisitDatabase() { +} + +bool VisitDatabase::InitVisitTable() { + if (!GetDB().DoesTableExist("visits")) { + if (!GetDB().Execute("CREATE TABLE visits(" + "id INTEGER PRIMARY KEY," + "url INTEGER NOT NULL," // key of the URL this corresponds to + "visit_time INTEGER NOT NULL," + "from_visit INTEGER," + "transition INTEGER DEFAULT 0 NOT NULL," + "segment_id INTEGER," + // Some old DBs may have an "is_indexed" field here, but this is no + // longer used and should NOT be read or written from any longer. + "visit_duration INTEGER DEFAULT 0 NOT NULL)")) + return false; + } + + // Visit source table contains the source information for all the visits. To + // save space, we do not record those user browsed visits which would be the + // majority in this table. Only other sources are recorded. + // Due to the tight relationship between visit_source and visits table, they + // should be created and dropped at the same time. + if (!GetDB().DoesTableExist("visit_source")) { + if (!GetDB().Execute("CREATE TABLE visit_source(" + "id INTEGER PRIMARY KEY,source INTEGER NOT NULL)")) + return false; + } + + // Index over url so we can quickly find visits for a page. + if (!GetDB().Execute( + "CREATE INDEX IF NOT EXISTS visits_url_index ON visits (url)")) + return false; + + // Create an index over from visits so that we can efficiently find + // referrers and redirects. + if (!GetDB().Execute( + "CREATE INDEX IF NOT EXISTS visits_from_index ON " + "visits (from_visit)")) + return false; + + // Create an index over time so that we can efficiently find the visits in a + // given time range (most history views are time-based). + if (!GetDB().Execute( + "CREATE INDEX IF NOT EXISTS visits_time_index ON " + "visits (visit_time)")) + return false; + + return true; +} + +bool VisitDatabase::DropVisitTable() { + // This will also drop the indices over the table. + return + GetDB().Execute("DROP TABLE IF EXISTS visit_source") && + GetDB().Execute("DROP TABLE visits"); +} + +// Must be in sync with HISTORY_VISIT_ROW_FIELDS. +// static +void VisitDatabase::FillVisitRow(sql::Statement& statement, VisitRow* visit) { + visit->visit_id = statement.ColumnInt64(0); + visit->url_id = statement.ColumnInt64(1); + visit->visit_time = base::Time::FromInternalValue(statement.ColumnInt64(2)); + visit->referring_visit = statement.ColumnInt64(3); + visit->transition = ui::PageTransitionFromInt(statement.ColumnInt(4)); + visit->segment_id = statement.ColumnInt64(5); + visit->visit_duration = + base::TimeDelta::FromInternalValue(statement.ColumnInt64(6)); +} + +// static +bool VisitDatabase::FillVisitVector(sql::Statement& statement, + VisitVector* visits) { + if (!statement.is_valid()) + return false; + + while (statement.Step()) { + history::VisitRow visit; + FillVisitRow(statement, &visit); + visits->push_back(visit); + } + + return statement.Succeeded(); +} + +// static +bool VisitDatabase::FillVisitVectorWithOptions(sql::Statement& statement, + const QueryOptions& options, + VisitVector* visits) { + std::set found_urls; + + // Keeps track of the day that |found_urls| is holding the URLs for, in order + // to handle removing per-day duplicates. + base::Time found_urls_midnight; + + while (statement.Step()) { + VisitRow visit; + FillVisitRow(statement, &visit); + + if (options.duplicate_policy != QueryOptions::KEEP_ALL_DUPLICATES) { + if (options.duplicate_policy == QueryOptions::REMOVE_DUPLICATES_PER_DAY && + found_urls_midnight != visit.visit_time.LocalMidnight()) { + found_urls.clear(); + found_urls_midnight = visit.visit_time.LocalMidnight(); + } + // Make sure the URL this visit corresponds to is unique. + if (found_urls.find(visit.url_id) != found_urls.end()) + continue; + found_urls.insert(visit.url_id); + } + + if (static_cast(visits->size()) >= options.EffectiveMaxCount()) + return true; + visits->push_back(visit); + } + return false; +} + +VisitID VisitDatabase::AddVisit(VisitRow* visit, VisitSource source) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO visits " + "(url, visit_time, from_visit, transition, segment_id, " + "visit_duration) VALUES (?,?,?,?,?,?)")); + statement.BindInt64(0, visit->url_id); + statement.BindInt64(1, visit->visit_time.ToInternalValue()); + statement.BindInt64(2, visit->referring_visit); + statement.BindInt64(3, visit->transition); + statement.BindInt64(4, visit->segment_id); + statement.BindInt64(5, visit->visit_duration.ToInternalValue()); + + if (!statement.Run()) { + DVLOG(0) << "Failed to execute visit insert statement: " + << "url_id = " << visit->url_id; + return 0; + } + + visit->visit_id = GetDB().GetLastInsertRowId(); + + if (source != SOURCE_BROWSED) { + // Record the source of this visit when it is not browsed. + sql::Statement statement1(GetDB().GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO visit_source (id, source) VALUES (?,?)")); + statement1.BindInt64(0, visit->visit_id); + statement1.BindInt64(1, source); + + if (!statement1.Run()) { + DVLOG(0) << "Failed to execute visit_source insert statement: " + << "id = " << visit->visit_id; + return 0; + } + } + + return visit->visit_id; +} + +void VisitDatabase::DeleteVisit(const VisitRow& visit) { + // Patch around this visit. Any visits that this went to will now have their + // "source" be the deleted visit's source. + sql::Statement update_chain(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE visits SET from_visit=? WHERE from_visit=?")); + update_chain.BindInt64(0, visit.referring_visit); + update_chain.BindInt64(1, visit.visit_id); + if (!update_chain.Run()) + return; + + // Now delete the actual visit. + sql::Statement del(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM visits WHERE id=?")); + del.BindInt64(0, visit.visit_id); + if (!del.Run()) + return; + + // Try to delete the entry in visit_source table as well. + // If the visit was browsed, there is no corresponding entry in visit_source + // table, and nothing will be deleted. + del.Assign(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM visit_source WHERE id=?")); + del.BindInt64(0, visit.visit_id); + del.Run(); +} + +bool VisitDatabase::GetRowForVisit(VisitID visit_id, VisitRow* out_visit) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits WHERE id=?")); + statement.BindInt64(0, visit_id); + + if (!statement.Step()) + return false; + + FillVisitRow(statement, out_visit); + + // We got a different visit than we asked for, something is wrong. + DCHECK_EQ(visit_id, out_visit->visit_id); + if (visit_id != out_visit->visit_id) + return false; + + return true; +} + +bool VisitDatabase::UpdateVisitRow(const VisitRow& visit) { + // Don't store inconsistent data to the database. + DCHECK_NE(visit.visit_id, visit.referring_visit); + if (visit.visit_id == visit.referring_visit) + return false; + + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE visits SET " + "url=?,visit_time=?,from_visit=?,transition=?,segment_id=?," + "visit_duration=? WHERE id=?")); + statement.BindInt64(0, visit.url_id); + statement.BindInt64(1, visit.visit_time.ToInternalValue()); + statement.BindInt64(2, visit.referring_visit); + statement.BindInt64(3, visit.transition); + statement.BindInt64(4, visit.segment_id); + statement.BindInt64(5, visit.visit_duration.ToInternalValue()); + statement.BindInt64(6, visit.visit_id); + + return statement.Run(); +} + +bool VisitDatabase::GetVisitsForURL(URLID url_id, VisitVector* visits) { + visits->clear(); + + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS + "FROM visits " + "WHERE url=? " + "ORDER BY visit_time ASC")); + statement.BindInt64(0, url_id); + return FillVisitVector(statement, visits); +} + +bool VisitDatabase::GetVisibleVisitsForURL(URLID url_id, + const QueryOptions& options, + VisitVector* visits) { + visits->clear(); + + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS + "FROM visits " + "WHERE url=? AND visit_time >= ? AND visit_time < ? " + "AND (transition & ?) != 0 " // CHAIN_END + "AND (transition & ?) NOT IN (?, ?, ?) " // NO SUBFRAME or + // KEYWORD_GENERATED + "ORDER BY visit_time DESC")); + statement.BindInt64(0, url_id); + statement.BindInt64(1, options.EffectiveBeginTime()); + statement.BindInt64(2, options.EffectiveEndTime()); + statement.BindInt(3, ui::PAGE_TRANSITION_CHAIN_END); + statement.BindInt(4, ui::PAGE_TRANSITION_CORE_MASK); + statement.BindInt(5, ui::PAGE_TRANSITION_AUTO_SUBFRAME); + statement.BindInt(6, ui::PAGE_TRANSITION_MANUAL_SUBFRAME); + statement.BindInt(7, ui::PAGE_TRANSITION_KEYWORD_GENERATED); + + return FillVisitVectorWithOptions(statement, options, visits); +} + +bool VisitDatabase::GetVisitsForTimes(const std::vector& times, + VisitVector* visits) { + visits->clear(); + + for (std::vector::const_iterator it = times.begin(); + it != times.end(); ++it) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " + "WHERE visit_time == ?")); + + statement.BindInt64(0, it->ToInternalValue()); + + if (!FillVisitVector(statement, visits)) + return false; + } + return true; +} + +bool VisitDatabase::GetAllVisitsInRange(base::Time begin_time, + base::Time end_time, + int max_results, + VisitVector* visits) { + visits->clear(); + + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " + "WHERE visit_time >= ? AND visit_time < ?" + "ORDER BY visit_time LIMIT ?")); + + // See GetVisibleVisitsInRange for more info on how these times are bound. + int64 end = end_time.ToInternalValue(); + statement.BindInt64(0, begin_time.ToInternalValue()); + statement.BindInt64(1, end ? end : std::numeric_limits::max()); + statement.BindInt64(2, + max_results ? max_results : std::numeric_limits::max()); + + return FillVisitVector(statement, visits); +} + +bool VisitDatabase::GetVisitsInRangeForTransition( + base::Time begin_time, + base::Time end_time, + int max_results, + ui::PageTransition transition, + VisitVector* visits) { + DCHECK(visits); + visits->clear(); + + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " + "WHERE visit_time >= ? AND visit_time < ? " + "AND (transition & ?) == ?" + "ORDER BY visit_time LIMIT ?")); + + // See GetVisibleVisitsInRange for more info on how these times are bound. + int64 end = end_time.ToInternalValue(); + statement.BindInt64(0, begin_time.ToInternalValue()); + statement.BindInt64(1, end ? end : std::numeric_limits::max()); + statement.BindInt(2, ui::PAGE_TRANSITION_CORE_MASK); + statement.BindInt(3, transition); + statement.BindInt64(4, + max_results ? max_results : std::numeric_limits::max()); + + return FillVisitVector(statement, visits); +} + +bool VisitDatabase::GetVisibleVisitsInRange(const QueryOptions& options, + VisitVector* visits) { + visits->clear(); + // The visit_time values can be duplicated in a redirect chain, so we sort + // by id too, to ensure a consistent ordering just in case. + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " + "WHERE visit_time >= ? AND visit_time < ? " + "AND (transition & ?) != 0 " // CHAIN_END + "AND (transition & ?) NOT IN (?, ?, ?) " // NO SUBFRAME or + // KEYWORD_GENERATED + "ORDER BY visit_time DESC, id DESC")); + + statement.BindInt64(0, options.EffectiveBeginTime()); + statement.BindInt64(1, options.EffectiveEndTime()); + statement.BindInt(2, ui::PAGE_TRANSITION_CHAIN_END); + statement.BindInt(3, ui::PAGE_TRANSITION_CORE_MASK); + statement.BindInt(4, ui::PAGE_TRANSITION_AUTO_SUBFRAME); + statement.BindInt(5, ui::PAGE_TRANSITION_MANUAL_SUBFRAME); + statement.BindInt(6, ui::PAGE_TRANSITION_KEYWORD_GENERATED); + + return FillVisitVectorWithOptions(statement, options, visits); +} + +void VisitDatabase::GetDirectVisitsDuringTimes(const VisitFilter& time_filter, + int max_results, + VisitVector* visits) { + visits->clear(); + if (max_results) + visits->reserve(max_results); + for (VisitFilter::TimeVector::const_iterator it = time_filter.times().begin(); + it != time_filter.times().end(); ++it) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " + "WHERE visit_time >= ? AND visit_time < ? " + "AND (transition & ?) != 0 " // CHAIN_START + "AND (transition & ?) IN (?, ?) " // TYPED or AUTO_BOOKMARK only + "ORDER BY visit_time DESC, id DESC")); + + statement.BindInt64(0, it->first.ToInternalValue()); + statement.BindInt64(1, it->second.ToInternalValue()); + statement.BindInt(2, ui::PAGE_TRANSITION_CHAIN_START); + statement.BindInt(3, ui::PAGE_TRANSITION_CORE_MASK); + statement.BindInt(4, ui::PAGE_TRANSITION_TYPED); + statement.BindInt(5, ui::PAGE_TRANSITION_AUTO_BOOKMARK); + + while (statement.Step()) { + VisitRow visit; + FillVisitRow(statement, &visit); + visits->push_back(visit); + + if (max_results > 0 && static_cast(visits->size()) >= max_results) + return; + } + } +} + +VisitID VisitDatabase::GetMostRecentVisitForURL(URLID url_id, + VisitRow* visit_row) { + // The visit_time values can be duplicated in a redirect chain, so we sort + // by id too, to ensure a consistent ordering just in case. + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " + "WHERE url=? " + "ORDER BY visit_time DESC, id DESC " + "LIMIT 1")); + statement.BindInt64(0, url_id); + if (!statement.Step()) + return 0; // No visits for this URL. + + if (visit_row) { + FillVisitRow(statement, visit_row); + return visit_row->visit_id; + } + return statement.ColumnInt64(0); +} + +bool VisitDatabase::GetMostRecentVisitsForURL(URLID url_id, + int max_results, + VisitVector* visits) { + visits->clear(); + + // The visit_time values can be duplicated in a redirect chain, so we sort + // by id too, to ensure a consistent ordering just in case. + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT" HISTORY_VISIT_ROW_FIELDS + "FROM visits " + "WHERE url=? " + "ORDER BY visit_time DESC, id DESC " + "LIMIT ?")); + statement.BindInt64(0, url_id); + statement.BindInt(1, max_results); + + return FillVisitVector(statement, visits); +} + +bool VisitDatabase::GetRedirectFromVisit(VisitID from_visit, + VisitID* to_visit, + GURL* to_url) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT v.id,u.url " + "FROM visits v JOIN urls u ON v.url = u.id " + "WHERE v.from_visit = ? " + "AND (v.transition & ?) != 0")); // IS_REDIRECT_MASK + statement.BindInt64(0, from_visit); + statement.BindInt(1, ui::PAGE_TRANSITION_IS_REDIRECT_MASK); + + if (!statement.Step()) + return false; // No redirect from this visit. (Or SQL error) + if (to_visit) + *to_visit = statement.ColumnInt64(0); + if (to_url) + *to_url = GURL(statement.ColumnString(1)); + return true; +} + +bool VisitDatabase::GetRedirectToVisit(VisitID to_visit, + VisitID* from_visit, + GURL* from_url) { + VisitRow row; + if (!GetRowForVisit(to_visit, &row)) + return false; + + if (from_visit) + *from_visit = row.referring_visit; + + if (from_url) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT u.url " + "FROM visits v JOIN urls u ON v.url = u.id " + "WHERE v.id = ?")); + statement.BindInt64(0, row.referring_visit); + + if (!statement.Step()) + return false; + + *from_url = GURL(statement.ColumnString(0)); + } + return true; +} + +bool VisitDatabase::GetVisibleVisitCountToHost(const GURL& url, + int* count, + base::Time* first_visit) { + if (!url.SchemeIs(url::kHttpScheme) && + !url.SchemeIs(url::kHttpsScheme)) + return false; + + // We need to search for URLs with a matching host/port. One way to query for + // this is to use the LIKE operator, eg 'url LIKE http://google.com/%'. This + // is inefficient though in that it doesn't use the index and each entry must + // be visited. The same query can be executed by using >= and < operator. + // The query becomes: + // 'url >= http://google.com/' and url < http://google.com0'. + // 0 is used as it is one character greater than '/'. + const std::string host_query_min = url.GetOrigin().spec(); + if (host_query_min.empty()) + return false; + + // We also want to restrict ourselves to main frame navigations that are not + // in the middle of redirect chains, hence the transition checks. + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT MIN(v.visit_time), COUNT(*) " + "FROM visits v INNER JOIN urls u ON v.url = u.id " + "WHERE u.url >= ? AND u.url < ? " + "AND (transition & ?) != 0 " + "AND (transition & ?) NOT IN (?, ?, ?)")); + statement.BindString(0, host_query_min); + statement.BindString(1, + host_query_min.substr(0, host_query_min.size() - 1) + '0'); + statement.BindInt(2, ui::PAGE_TRANSITION_CHAIN_END); + statement.BindInt(3, ui::PAGE_TRANSITION_CORE_MASK); + statement.BindInt(4, ui::PAGE_TRANSITION_AUTO_SUBFRAME); + statement.BindInt(5, ui::PAGE_TRANSITION_MANUAL_SUBFRAME); + statement.BindInt(6, ui::PAGE_TRANSITION_KEYWORD_GENERATED); + + if (!statement.Step()) { + // We've never been to this page before. + *count = 0; + return true; + } + + if (!statement.Succeeded()) + return false; + + *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0)); + *count = statement.ColumnInt(1); + return true; +} + +bool VisitDatabase::GetStartDate(base::Time* first_visit) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT MIN(visit_time) FROM visits WHERE visit_time != 0")); + if (!statement.Step() || statement.ColumnInt64(0) == 0) { + *first_visit = base::Time::Now(); + return false; + } + *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0)); + return true; +} + +void VisitDatabase::GetVisitsSource(const VisitVector& visits, + VisitSourceMap* sources) { + DCHECK(sources); + sources->clear(); + + // We query the source in batch. Here defines the batch size. + const size_t batch_size = 500; + size_t visits_size = visits.size(); + + size_t start_index = 0, end_index = 0; + while (end_index < visits_size) { + start_index = end_index; + end_index = end_index + batch_size < visits_size ? end_index + batch_size + : visits_size; + + // Compose the sql statement with a list of ids. + std::string sql = "SELECT id,source FROM visit_source "; + sql.append("WHERE id IN ("); + // Append all the ids in the statement. + for (size_t j = start_index; j < end_index; j++) { + if (j != start_index) + sql.push_back(','); + sql.append(base::Int64ToString(visits[j].visit_id)); + } + sql.append(") ORDER BY id"); + sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str())); + + // Get the source entries out of the query result. + while (statement.Step()) { + std::pair source_entry(statement.ColumnInt64(0), + static_cast(statement.ColumnInt(1))); + sources->insert(source_entry); + } + } +} + +bool VisitDatabase::MigrateVisitsWithoutDuration() { + if (!GetDB().DoesTableExist("visits")) { + NOTREACHED() << " Visits table should exist before migration"; + return false; + } + + if (!GetDB().DoesColumnExist("visits", "visit_duration")) { + // Old versions don't have the visit_duration column, we modify the table + // to add that field. + if (!GetDB().Execute("ALTER TABLE visits " + "ADD COLUMN visit_duration INTEGER DEFAULT 0 NOT NULL")) + return false; + } + return true; +} + +void VisitDatabase::GetBriefVisitInfoOfMostRecentVisits( + int max_visits, + std::vector* result_vector) { + result_vector->clear(); + + sql::Statement statement(GetDB().GetUniqueStatement( + "SELECT url,visit_time,transition FROM visits " + "ORDER BY id DESC LIMIT ?")); + + statement.BindInt64(0, max_visits); + + if (!statement.is_valid()) + return; + + while (statement.Step()) { + BriefVisitInfo info; + info.url_id = statement.ColumnInt64(0); + info.time = base::Time::FromInternalValue(statement.ColumnInt64(1)); + info.transition = ui::PageTransitionFromInt(statement.ColumnInt(2)); + result_vector->push_back(info); + } +} + +} // namespace history diff --git a/components/history/core/browser/visit_database.h b/components/history/core/browser/visit_database.h new file mode 100644 index 0000000..58207ad --- /dev/null +++ b/components/history/core/browser/visit_database.h @@ -0,0 +1,233 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_HISTORY_CORE_BROWSER_VISIT_DATABASE_H_ +#define COMPONENTS_HISTORY_CORE_BROWSER_VISIT_DATABASE_H_ + +#include + +#include "components/history/core/browser/history_types.h" + +namespace sql { +class Connection; +class Statement; +} + +namespace history { + +class VisitFilter; + +// A visit database is one which stores visits for URLs, that is, times and +// linking information. A visit database must also be a URLDatabase, as this +// modifies tables used by URLs directly and could be thought of as inheriting +// from URLDatabase. However, this inheritance is not explicit as things would +// get too complicated and have multiple inheritance. +class VisitDatabase { + public: + // Must call InitVisitTable() before using to make sure the database is + // initialized. + VisitDatabase(); + virtual ~VisitDatabase(); + + // Deletes the visit table. Used for rapidly clearing all visits. In this + // case, InitVisitTable would be called immediately afterward to re-create it. + // Returns true on success. + bool DropVisitTable(); + + // Adds a line to the visit database with the given information, returning + // the added row ID on success, 0 on failure. The given visit is updated with + // the new row ID on success. In addition, adds its source into visit_source + // table. + VisitID AddVisit(VisitRow* visit, VisitSource source); + + // Deletes the given visit from the database. If a visit with the given ID + // doesn't exist, it will not do anything. + void DeleteVisit(const VisitRow& visit); + + // Query a VisitInfo giving an visit id, filling the given VisitRow. + // Returns true on success. + bool GetRowForVisit(VisitID visit_id, VisitRow* out_visit); + + // Updates an existing row. The new information is set on the row, using the + // VisitID as the key. The visit must exist. Returns true on success. + bool UpdateVisitRow(const VisitRow& visit); + + // Fills in the given vector with all of the visits for the given page ID, + // sorted in ascending order of date. Returns true on success (although there + // may still be no matches). + bool GetVisitsForURL(URLID url_id, VisitVector* visits); + + // Fills in the given vector with the visits for the given page ID which + // should be user-visible, which excludes things like redirects and subframes, + // and match the set of options passed, sorted in ascending order of date. + // + // Returns true if there are more results available, i.e. if the number of + // results was restricted by |options.max_count|. + bool GetVisibleVisitsForURL(URLID url_id, + const QueryOptions& options, + VisitVector* visits); + + // Fills the vector with all visits with times in the given list. + // + // The results will be in no particular order. Also, no duplicate + // detection is performed, so if |times| has duplicate times, + // |visits| may have duplicate visits. + bool GetVisitsForTimes(const std::vector& times, + VisitVector* visits); + + // Fills all visits in the time range [begin, end) to the given vector. Either + // time can be is_null(), in which case the times in that direction are + // unbounded. + // + // If |max_results| is non-zero, up to that many results will be returned. If + // there are more results than that, the oldest ones will be returned. (This + // is used for history expiration.) + // + // The results will be in increasing order of date. + bool GetAllVisitsInRange(base::Time begin_time, base::Time end_time, + int max_results, VisitVector* visits); + + // Fills all visits with specified transition in the time range [begin, end) + // to the given vector. Either time can be is_null(), in which case the times + // in that direction are unbounded. + // + // If |max_results| is non-zero, up to that many results will be returned. If + // there are more results than that, the oldest ones will be returned. (This + // is used for history expiration.) + // + // The results will be in increasing order of date. + bool GetVisitsInRangeForTransition(base::Time begin_time, + base::Time end_time, + int max_results, + ui::PageTransition transition, + VisitVector* visits); + + // Fills all visits in the given time range into the given vector that should + // be user-visible, which excludes things like redirects and subframes. The + // begin time is inclusive, the end time is exclusive. Either time can be + // is_null(), in which case the times in that direction are unbounded. + // + // Up to |max_count| visits will be returned. If there are more visits than + // that, the most recent |max_count| will be returned. If 0, all visits in the + // range will be computed. + // + // Only one visit for each URL will be returned, and it will be the most + // recent one in the time range. + // + // Returns true if there are more results available, i.e. if the number of + // results was restricted by |options.max_count|. + bool GetVisibleVisitsInRange(const QueryOptions& options, + VisitVector* visits); + + // Fills all visits in the given time ranges into the given vector that are + // visits made directly by the user (typed or bookmarked visits only). The + // begin time is inclusive, the end time is exclusive. + // + // Up to |max_count| visits will be returned. If there are more visits than + // that, the most recent |max_count| will be returned. If 0, all visits in the + // range will be computed. + void GetDirectVisitsDuringTimes(const VisitFilter& time_filter, + int max_count, + VisitVector* visits); + + // Returns the visit ID for the most recent visit of the given URL ID, or 0 + // if there is no visit for the URL. + // + // If non-NULL, the given visit row will be filled with the information of + // the found visit. When no visit is found, the row will be unchanged. + VisitID GetMostRecentVisitForURL(URLID url_id, + VisitRow* visit_row); + + // Returns the |max_results| most recent visit sessions for |url_id|. + // + // Returns false if there's a failure preparing the statement. True + // otherwise. (No results are indicated with an empty |visits| + // vector.) + bool GetMostRecentVisitsForURL(URLID url_id, + int max_results, + VisitVector* visits); + + // Finds a redirect coming from the given |from_visit|. If a redirect is + // found, it fills the visit ID and URL into the out variables and returns + // true. If there is no redirect from the given visit, returns false. + // + // If there is more than one redirect, this will compute a random one. But + // duplicates should be very rare, and we don't actually care which one we + // get in most cases. These will occur when the user goes back and gets + // redirected again. + // + // to_visit and to_url can be NULL in which case they are ignored. + bool GetRedirectFromVisit(VisitID from_visit, + VisitID* to_visit, + GURL* to_url); + + // Similar to the above function except finds a redirect going to a given + // |to_visit|. + bool GetRedirectToVisit(VisitID to_visit, + VisitID* from_visit, + GURL* from_url); + + // Gets the number of user-visible visits to all URLs on the same + // scheme/host/port as |url|, as well as the time of the earliest visit. + // "User-visible" is defined as in GetVisibleVisitsInRange() above, i.e. + // excluding redirects and subframes. + // This function is only valid for HTTP and HTTPS URLs; all other schemes + // cause the function to return false. + bool GetVisibleVisitCountToHost(const GURL& url, + int* count, + base::Time* first_visit); + + // Get the time of the first item in our database. + bool GetStartDate(base::Time* first_visit); + + // Get the source information about the given visits. + void GetVisitsSource(const VisitVector& visits, + VisitSourceMap* sources); + + // Obtains BriefVisitInfo for the specified number of most recent visits + // from the visit database. + void GetBriefVisitInfoOfMostRecentVisits( + int max_visits, + std::vector* result_vector); + + protected: + // Returns the database for the functions in this interface. + virtual sql::Connection& GetDB() = 0; + + // Called by the derived classes on initialization to make sure the tables + // and indices are properly set up. Must be called before anything else. + bool InitVisitTable(); + + // Convenience to fill a VisitRow. Assumes the visit values are bound starting + // at index 0. + static void FillVisitRow(sql::Statement& statement, VisitRow* visit); + + // Convenience to fill a VisitVector. Assumes that statement.step() + // hasn't happened yet. + static bool FillVisitVector(sql::Statement& statement, VisitVector* visits); + + // Convenience to fill a VisitVector while respecting the set of options. + // |statement| should order the query decending by visit_time to ensure + // correct duplicate management behavior. Assumes that statement.step() + // hasn't happened yet. + static bool FillVisitVectorWithOptions(sql::Statement& statement, + const QueryOptions& options, + VisitVector* visits); + + // Called by the derived classes to migrate the older visits table which + // don't have visit_duration column yet. + bool MigrateVisitsWithoutDuration(); + + private: + + DISALLOW_COPY_AND_ASSIGN(VisitDatabase); +}; + +// Rows, in order, of the visit table. +#define HISTORY_VISIT_ROW_FIELDS \ + " id,url,visit_time,from_visit,transition,segment_id,visit_duration " + +} // namespace history + +#endif // COMPONENTS_HISTORY_CORE_BROWSER_VISIT_DATABASE_H_ diff --git a/components/history/core/browser/visit_database_unittest.cc b/components/history/core/browser/visit_database_unittest.cc new file mode 100644 index 0000000..be2594a --- /dev/null +++ b/components/history/core/browser/visit_database_unittest.cc @@ -0,0 +1,419 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include "base/files/file_path.h" +#include "base/files/scoped_temp_dir.h" +#include "base/strings/string_util.h" +#include "base/time/time.h" +#include "components/history/core/browser/url_database.h" +#include "components/history/core/browser/visit_database.h" +#include "sql/connection.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/platform_test.h" + +using base::Time; +using base::TimeDelta; + +namespace history { + +namespace { + +bool IsVisitInfoEqual(const VisitRow& a, + const VisitRow& b) { + return a.visit_id == b.visit_id && + a.url_id == b.url_id && + a.visit_time == b.visit_time && + a.referring_visit == b.referring_visit && + a.transition == b.transition; +} + +} // namespace + +class VisitDatabaseTest : public PlatformTest, + public URLDatabase, + public VisitDatabase { + public: + VisitDatabaseTest() { + } + + private: + // Test setup. + void SetUp() override { + PlatformTest::SetUp(); + ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); + base::FilePath db_file = temp_dir_.path().AppendASCII("VisitTest.db"); + + EXPECT_TRUE(db_.Open(db_file)); + + // Initialize the tables for this test. + CreateURLTable(false); + CreateMainURLIndex(); + InitVisitTable(); + } + void TearDown() override { + db_.Close(); + PlatformTest::TearDown(); + } + + // Provided for URL/VisitDatabase. + sql::Connection& GetDB() override { return db_; } + + base::ScopedTempDir temp_dir_; + sql::Connection db_; +}; + +TEST_F(VisitDatabaseTest, Add) { + // Add one visit. + VisitRow visit_info1(1, Time::Now(), 0, ui::PAGE_TRANSITION_LINK, 0); + EXPECT_TRUE(AddVisit(&visit_info1, SOURCE_BROWSED)); + + // Add second visit for the same page. + VisitRow visit_info2(visit_info1.url_id, + visit_info1.visit_time + TimeDelta::FromSeconds(1), 1, + ui::PAGE_TRANSITION_TYPED, 0); + EXPECT_TRUE(AddVisit(&visit_info2, SOURCE_BROWSED)); + + // Add third visit for a different page. + VisitRow visit_info3(2, + visit_info1.visit_time + TimeDelta::FromSeconds(2), 0, + ui::PAGE_TRANSITION_LINK, 0); + EXPECT_TRUE(AddVisit(&visit_info3, SOURCE_BROWSED)); + + // Query the first two. + std::vector matches; + EXPECT_TRUE(GetVisitsForURL(visit_info1.url_id, &matches)); + EXPECT_EQ(static_cast(2), matches.size()); + + // Make sure we got both (order in result set is visit time). + EXPECT_TRUE(IsVisitInfoEqual(matches[0], visit_info1) && + IsVisitInfoEqual(matches[1], visit_info2)); +} + +TEST_F(VisitDatabaseTest, Delete) { + // Add three visits that form a chain of navigation, and then delete the + // middle one. We should be left with the outer two visits, and the chain + // should link them. + static const int kTime1 = 1000; + VisitRow visit_info1(1, Time::FromInternalValue(kTime1), 0, + ui::PAGE_TRANSITION_LINK, 0); + EXPECT_TRUE(AddVisit(&visit_info1, SOURCE_BROWSED)); + + static const int kTime2 = kTime1 + 1; + VisitRow visit_info2(1, Time::FromInternalValue(kTime2), + visit_info1.visit_id, ui::PAGE_TRANSITION_LINK, 0); + EXPECT_TRUE(AddVisit(&visit_info2, SOURCE_BROWSED)); + + static const int kTime3 = kTime2 + 1; + VisitRow visit_info3(1, Time::FromInternalValue(kTime3), + visit_info2.visit_id, ui::PAGE_TRANSITION_LINK, 0); + EXPECT_TRUE(AddVisit(&visit_info3, SOURCE_BROWSED)); + + // First make sure all the visits are there. + std::vector matches; + EXPECT_TRUE(GetVisitsForURL(visit_info1.url_id, &matches)); + EXPECT_EQ(static_cast(3), matches.size()); + EXPECT_TRUE(IsVisitInfoEqual(matches[0], visit_info1) && + IsVisitInfoEqual(matches[1], visit_info2) && + IsVisitInfoEqual(matches[2], visit_info3)); + + // Delete the middle one. + DeleteVisit(visit_info2); + + // The outer two should be left, and the last one should have the first as + // the referrer. + visit_info3.referring_visit = visit_info1.visit_id; + matches.clear(); + EXPECT_TRUE(GetVisitsForURL(visit_info1.url_id, &matches)); + EXPECT_EQ(static_cast(2), matches.size()); + EXPECT_TRUE(IsVisitInfoEqual(matches[0], visit_info1) && + IsVisitInfoEqual(matches[1], visit_info3)); +} + +TEST_F(VisitDatabaseTest, Update) { + // Make something in the database. + VisitRow original(1, Time::Now(), 23, ui::PageTransitionFromInt(0), 19); + AddVisit(&original, SOURCE_BROWSED); + + // Mutate that row. + VisitRow modification(original); + modification.url_id = 2; + modification.transition = ui::PAGE_TRANSITION_TYPED; + modification.visit_time = Time::Now() + TimeDelta::FromDays(1); + modification.referring_visit = 9292; + UpdateVisitRow(modification); + + // Check that the mutated version was written. + VisitRow final; + GetRowForVisit(original.visit_id, &final); + EXPECT_TRUE(IsVisitInfoEqual(modification, final)); +} + +// TODO(brettw) write test for GetMostRecentVisitForURL! + +namespace { + +std::vector GetTestVisitRows() { + // Tests can be sensitive to the local timezone, so use a local time as the + // basis for all visit times. + base::Time base_time = Time::UnixEpoch().LocalMidnight(); + + // Add one visit. + VisitRow visit_info1(1, base_time + TimeDelta::FromMinutes(1), 0, + ui::PageTransitionFromInt( + ui::PAGE_TRANSITION_LINK | + ui::PAGE_TRANSITION_CHAIN_START | + ui::PAGE_TRANSITION_CHAIN_END), + 0); + visit_info1.visit_id = 1; + + // Add second visit for the same page. + VisitRow visit_info2(visit_info1.url_id, + visit_info1.visit_time + TimeDelta::FromSeconds(1), 1, + ui::PageTransitionFromInt( + ui::PAGE_TRANSITION_TYPED | + ui::PAGE_TRANSITION_CHAIN_START | + ui::PAGE_TRANSITION_CHAIN_END), + 0); + visit_info2.visit_id = 2; + + // Add third visit for a different page. + VisitRow visit_info3(2, + visit_info1.visit_time + TimeDelta::FromSeconds(2), 0, + ui::PageTransitionFromInt( + ui::PAGE_TRANSITION_LINK | + ui::PAGE_TRANSITION_CHAIN_START), + 0); + visit_info3.visit_id = 3; + + // Add a redirect visit from the last page. + VisitRow visit_info4(3, + visit_info1.visit_time + TimeDelta::FromSeconds(3), visit_info3.visit_id, + ui::PageTransitionFromInt( + ui::PAGE_TRANSITION_SERVER_REDIRECT | + ui::PAGE_TRANSITION_CHAIN_END), + 0); + visit_info4.visit_id = 4; + + // Add a subframe visit. + VisitRow visit_info5(4, + visit_info1.visit_time + TimeDelta::FromSeconds(4), visit_info4.visit_id, + ui::PageTransitionFromInt( + ui::PAGE_TRANSITION_AUTO_SUBFRAME | + ui::PAGE_TRANSITION_CHAIN_START | + ui::PAGE_TRANSITION_CHAIN_END), + 0); + visit_info5.visit_id = 5; + + // Add third visit for the same URL as visit 1 and 2, but exactly a day + // later than visit 2. + VisitRow visit_info6(visit_info1.url_id, + visit_info2.visit_time + TimeDelta::FromDays(1), 1, + ui::PageTransitionFromInt( + ui::PAGE_TRANSITION_TYPED | + ui::PAGE_TRANSITION_CHAIN_START | + ui::PAGE_TRANSITION_CHAIN_END), + 0); + visit_info6.visit_id = 6; + + std::vector test_visit_rows; + test_visit_rows.push_back(visit_info1); + test_visit_rows.push_back(visit_info2); + test_visit_rows.push_back(visit_info3); + test_visit_rows.push_back(visit_info4); + test_visit_rows.push_back(visit_info5); + test_visit_rows.push_back(visit_info6); + return test_visit_rows; +} + +} // namespace + +TEST_F(VisitDatabaseTest, GetVisitsForTimes) { + std::vector test_visit_rows = GetTestVisitRows(); + + for (size_t i = 0; i < test_visit_rows.size(); ++i) { + EXPECT_TRUE(AddVisit(&test_visit_rows[i], SOURCE_BROWSED)); + } + + // Query the visits for all our times. We should get all visits. + { + std::vector times; + for (size_t i = 0; i < test_visit_rows.size(); ++i) { + times.push_back(test_visit_rows[i].visit_time); + } + VisitVector results; + GetVisitsForTimes(times, &results); + EXPECT_EQ(test_visit_rows.size(), results.size()); + } + + // Query the visits for a single time. + for (size_t i = 0; i < test_visit_rows.size(); ++i) { + std::vector times; + times.push_back(test_visit_rows[i].visit_time); + VisitVector results; + GetVisitsForTimes(times, &results); + ASSERT_EQ(static_cast(1), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[i])); + } +} + +TEST_F(VisitDatabaseTest, GetAllVisitsInRange) { + std::vector test_visit_rows = GetTestVisitRows(); + + for (size_t i = 0; i < test_visit_rows.size(); ++i) { + EXPECT_TRUE(AddVisit(&test_visit_rows[i], SOURCE_BROWSED)); + } + + // Query the visits for all time. We should get all visits. + VisitVector results; + GetAllVisitsInRange(Time(), Time(), 0, &results); + ASSERT_EQ(test_visit_rows.size(), results.size()); + for (size_t i = 0; i < test_visit_rows.size(); ++i) { + EXPECT_TRUE(IsVisitInfoEqual(results[i], test_visit_rows[i])); + } + + // Query a time range and make sure beginning is inclusive and ending is + // exclusive. + GetAllVisitsInRange(test_visit_rows[1].visit_time, + test_visit_rows[3].visit_time, 0, + &results); + ASSERT_EQ(static_cast(2), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[1])); + EXPECT_TRUE(IsVisitInfoEqual(results[1], test_visit_rows[2])); + + // Query for a max count and make sure we get only that number. + GetAllVisitsInRange(Time(), Time(), 1, &results); + ASSERT_EQ(static_cast(1), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[0])); +} + +TEST_F(VisitDatabaseTest, GetVisibleVisitsInRange) { + std::vector test_visit_rows = GetTestVisitRows(); + + for (size_t i = 0; i < test_visit_rows.size(); ++i) { + EXPECT_TRUE(AddVisit(&test_visit_rows[i], SOURCE_BROWSED)); + } + + // Query the visits for all time. We should not get the first or the second + // visit (duplicates of the sixth) or the redirect or subframe visits. + VisitVector results; + QueryOptions options; + GetVisibleVisitsInRange(options, &results); + ASSERT_EQ(static_cast(2), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); + EXPECT_TRUE(IsVisitInfoEqual(results[1], test_visit_rows[3])); + + // Now try with only per-day de-duping -- the second visit should appear, + // since it's a duplicate of visit6 but on a different day. + options.duplicate_policy = QueryOptions::REMOVE_DUPLICATES_PER_DAY; + GetVisibleVisitsInRange(options, &results); + ASSERT_EQ(static_cast(3), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); + EXPECT_TRUE(IsVisitInfoEqual(results[1], test_visit_rows[3])); + EXPECT_TRUE(IsVisitInfoEqual(results[2], test_visit_rows[1])); + + // Now try without de-duping, expect to see all visible visits. + options.duplicate_policy = QueryOptions::KEEP_ALL_DUPLICATES; + GetVisibleVisitsInRange(options, &results); + ASSERT_EQ(static_cast(4), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); + EXPECT_TRUE(IsVisitInfoEqual(results[1], test_visit_rows[3])); + EXPECT_TRUE(IsVisitInfoEqual(results[2], test_visit_rows[1])); + EXPECT_TRUE(IsVisitInfoEqual(results[3], test_visit_rows[0])); + + // Set the end time to exclude the second visit. The first visit should be + // returned. Even though the second is a more recent visit, it's not in the + // query range. + options.end_time = test_visit_rows[1].visit_time; + GetVisibleVisitsInRange(options, &results); + ASSERT_EQ(static_cast(1), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[0])); + + options = QueryOptions(); // Reset to options to default. + + // Query for a max count and make sure we get only that number. + options.max_count = 1; + GetVisibleVisitsInRange(options, &results); + ASSERT_EQ(static_cast(1), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); + + // Query a time range and make sure beginning is inclusive and ending is + // exclusive. + options.begin_time = test_visit_rows[1].visit_time; + options.end_time = test_visit_rows[3].visit_time; + options.max_count = 0; + GetVisibleVisitsInRange(options, &results); + ASSERT_EQ(static_cast(1), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[1])); +} + +TEST_F(VisitDatabaseTest, VisitSource) { + // Add visits. + VisitRow visit_info1(111, Time::Now(), 0, ui::PAGE_TRANSITION_LINK, 0); + ASSERT_TRUE(AddVisit(&visit_info1, SOURCE_BROWSED)); + + VisitRow visit_info2(112, Time::Now(), 1, ui::PAGE_TRANSITION_TYPED, 0); + ASSERT_TRUE(AddVisit(&visit_info2, SOURCE_SYNCED)); + + VisitRow visit_info3(113, Time::Now(), 0, ui::PAGE_TRANSITION_TYPED, 0); + ASSERT_TRUE(AddVisit(&visit_info3, SOURCE_EXTENSION)); + + // Query each visit. + std::vector matches; + ASSERT_TRUE(GetVisitsForURL(111, &matches)); + ASSERT_EQ(1U, matches.size()); + VisitSourceMap sources; + GetVisitsSource(matches, &sources); + EXPECT_EQ(0U, sources.size()); + + ASSERT_TRUE(GetVisitsForURL(112, &matches)); + ASSERT_EQ(1U, matches.size()); + GetVisitsSource(matches, &sources); + ASSERT_EQ(1U, sources.size()); + EXPECT_EQ(SOURCE_SYNCED, sources[matches[0].visit_id]); + + ASSERT_TRUE(GetVisitsForURL(113, &matches)); + ASSERT_EQ(1U, matches.size()); + GetVisitsSource(matches, &sources); + ASSERT_EQ(1U, sources.size()); + EXPECT_EQ(SOURCE_EXTENSION, sources[matches[0].visit_id]); +} + +TEST_F(VisitDatabaseTest, GetVisibleVisitsForURL) { + std::vector test_visit_rows = GetTestVisitRows(); + + for (size_t i = 0; i < test_visit_rows.size(); ++i) { + EXPECT_TRUE(AddVisit(&test_visit_rows[i], SOURCE_BROWSED)); + } + + // Query the visits for the first url id. We should not get the first or the + // second visit (duplicates of the sixth) or any other urls, redirects or + // subframe visits. + VisitVector results; + QueryOptions options; + int url_id = test_visit_rows[0].url_id; + GetVisibleVisitsForURL(url_id, options, &results); + ASSERT_EQ(static_cast(1), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); + + // Now try with only per-day de-duping -- the second visit should appear, + // since it's a duplicate of visit6 but on a different day. + options.duplicate_policy = QueryOptions::REMOVE_DUPLICATES_PER_DAY; + GetVisibleVisitsForURL(url_id, options, &results); + ASSERT_EQ(static_cast(2), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); + EXPECT_TRUE(IsVisitInfoEqual(results[1], test_visit_rows[1])); + + // Now try without de-duping, expect to see all visible visits to url id 1. + options.duplicate_policy = QueryOptions::KEEP_ALL_DUPLICATES; + GetVisibleVisitsForURL(url_id, options, &results); + ASSERT_EQ(static_cast(3), results.size()); + EXPECT_TRUE(IsVisitInfoEqual(results[0], test_visit_rows[5])); + EXPECT_TRUE(IsVisitInfoEqual(results[1], test_visit_rows[1])); + EXPECT_TRUE(IsVisitInfoEqual(results[2], test_visit_rows[0])); +} + +} // namespace history diff --git a/components/history/core/browser/visit_filter.cc b/components/history/core/browser/visit_filter.cc new file mode 100644 index 0000000..1d1f152 --- /dev/null +++ b/components/history/core/browser/visit_filter.cc @@ -0,0 +1,358 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/visit_filter.h" + +#include + +#include + +#include "base/logging.h" +#include "base/time/time.h" +#include "components/history/core/browser/history_types.h" + +namespace history { + +const double kLn2 = 0.6931471805599453; + +VisitFilter::VisitFilter() + : day_(DAY_UNDEFINED), + max_results_(0), + sorting_order_(ORDER_BY_RECENCY) { +} + +VisitFilter::~VisitFilter() { +} + +void VisitFilter::SetFilterTime(const base::Time& filter_time) { + filter_time_ = filter_time; + UpdateTimeVector(); +} + +void VisitFilter::SetFilterWidth(const base::TimeDelta& filter_width) { + filter_width_ = filter_width; + UpdateTimeVector(); +} + +void VisitFilter::SetDayOfTheWeekFilter(int day) { + day_ = day; + UpdateTimeVector(); +} + +void VisitFilter::SetDayTypeFilter(bool workday) { + day_ = workday ? WORKDAY : HOLIDAY; + UpdateTimeVector(); +} + +void VisitFilter::ClearFilters() { + filter_time_ = base::Time(); + filter_width_ = base::TimeDelta::FromHours(0); + day_ = DAY_UNDEFINED; + UpdateTimeVector(); +} + +bool VisitFilter::UpdateTimeVector() { + + TimeVector days_of_the_week; + if (day_ >= 0 && day_ <= 6) { + GetTimesOnTheDayOfTheWeek(day_, filter_time_, max_results_, + &days_of_the_week); + } else if (day_ == WORKDAY || day_ == HOLIDAY) { + GetTimesOnTheSameDayType( + (day_ == WORKDAY), filter_time_, max_results_, &days_of_the_week); + } + + TimeVector times_of_the_day; + if (filter_width_ != base::TimeDelta::FromSeconds(0)) { + if (sorting_order_ == ORDER_BY_TIME_GAUSSIAN) { + // Limit queries to 5 standard deviations. + GetTimesInRange(filter_time_ - 5 * filter_width_, + filter_time_ + 5 * filter_width_, + max_results_, ×_of_the_day); + } else { + GetTimesInRange(filter_time_ - filter_width_, + filter_time_ + filter_width_, + max_results_, ×_of_the_day); + } + } + + if (times_of_the_day.empty()) { + if (days_of_the_week.empty()) + times_.clear(); + else + times_.swap(days_of_the_week); + } else { + if (days_of_the_week.empty()) + times_.swap(times_of_the_day); + else + IntersectTimeVectors(times_of_the_day, days_of_the_week, ×_); + } + + return !times_.empty(); +} + +// static +void VisitFilter::GetTimesInRange(base::Time begin_time_of_the_day, + base::Time end_time_of_the_day, + size_t max_results, + TimeVector* times) { + DCHECK(times); + times->clear(); + times->reserve(max_results); + const size_t kMaxReturnedResults = 62; // 2 months (<= 62 days). + + if (!max_results) + max_results = kMaxReturnedResults; + + // If range is more than 24 hours, return a contiguous interval covering + // |max_results| days. + base::TimeDelta one_day = base::TimeDelta::FromDays(1); + if (end_time_of_the_day - begin_time_of_the_day >= one_day) { + times->push_back( + std::make_pair(begin_time_of_the_day - one_day * (max_results - 1), + end_time_of_the_day)); + return; + } + + for (size_t i = 0; i < max_results; ++i) { + times->push_back( + std::make_pair(begin_time_of_the_day - base::TimeDelta::FromDays(i), + end_time_of_the_day - base::TimeDelta::FromDays(i))); + } +} + +double VisitFilter::GetVisitScore(const VisitRow& visit) const { + // Decay score by half each week. + base::TimeDelta time_passed = filter_time_ - visit.visit_time; + // Clamp to 0 in case time jumps backwards (e.g. due to DST). + double decay_exponent = std::max(0.0, kLn2 * static_cast( + time_passed.InMicroseconds()) / base::Time::kMicrosecondsPerWeek); + double staleness = 1.0 / exp(decay_exponent); + + double score = 0; + switch (sorting_order()) { + case ORDER_BY_RECENCY: + score = 1.0; // Let the staleness factor take care of it. + break; + case ORDER_BY_VISIT_COUNT: + score = 1.0; // Every visit counts the same. + staleness = 1.0; // No decay on this one. + break; + case ORDER_BY_TIME_GAUSSIAN: { + double offset = + GetTimeOfDayDifference(filter_time_, + visit.visit_time).InMicroseconds(); + double sd = filter_width_.InMicroseconds(); + + // Calculate score using the normal distribution density function. + score = exp(-(offset * offset) / (2 * sd * sd)); + break; + } + case ORDER_BY_TIME_LINEAR: { + base::TimeDelta offset = GetTimeOfDayDifference(filter_time_, + visit.visit_time); + if (offset > filter_width_) { + score = 0; + } else { + score = 1 - offset.InMicroseconds() / static_cast( + filter_width_.InMicroseconds()); + } + break; + } + case ORDER_BY_DURATION_SPENT: + default: + NOTREACHED() << "Not implemented!"; + } + return staleness * score; +} + +base::TimeDelta +VisitFilter::GetTimeOfDayDifference(base::Time t1, base::Time t2) { + base::TimeDelta time_of_day1 = t1 - t1.LocalMidnight(); + base::TimeDelta time_of_day2 = t2 - t2.LocalMidnight(); + + base::TimeDelta difference; + if (time_of_day1 < time_of_day2) + difference = time_of_day2 - time_of_day1; + else + difference = time_of_day1 - time_of_day2; + + // If the difference is more than 12 hours, we'll get closer by 'wrapping' + // around the day barrier. + if (difference > base::TimeDelta::FromHours(12)) + difference = base::TimeDelta::FromHours(24) - difference; + + return difference; +} + +// static +void VisitFilter::GetTimesOnTheDayOfTheWeek(int day, + base::Time week, + size_t max_results, + TimeVector* times) { + DCHECK(times); + + base::Time::Exploded exploded_time; + if (week.is_null()) + week = base::Time::Now(); + week.LocalExplode(&exploded_time); + base::TimeDelta shift = base::TimeDelta::FromDays( + exploded_time.day_of_week - day); + + base::Time day_base = week.LocalMidnight(); + day_base -= shift; + + times->clear(); + times->reserve(max_results); + + base::TimeDelta one_day = base::TimeDelta::FromDays(1); + + const size_t kMaxReturnedResults = 9; // 2 months (<= 9 weeks). + + if (!max_results) + max_results = kMaxReturnedResults; + + for (size_t i = 0; i < max_results; ++i) { + times->push_back( + std::make_pair(day_base - base::TimeDelta::FromDays(i * 7), + day_base + one_day - base::TimeDelta::FromDays(i * 7))); + } +} + +// static +void VisitFilter::GetTimesOnTheSameDayType(bool workday, + base::Time week, + size_t max_results, + TimeVector* times) { + DCHECK(times); + if (week.is_null()) + week = base::Time::Now(); + // TODO(georgey): internationalize workdays/weekends/holidays. + if (!workday) { + TimeVector sunday; + TimeVector saturday; + base::Time::Exploded exploded_time; + week.LocalExplode(&exploded_time); + + GetTimesOnTheDayOfTheWeek(exploded_time.day_of_week ? 7 : 0, week, + max_results, &sunday); + GetTimesOnTheDayOfTheWeek(exploded_time.day_of_week ? 6 : -1, week, + max_results, &saturday); + UniteTimeVectors(sunday, saturday, times); + if (max_results && times->size() > max_results) + times->resize(max_results); + } else { + TimeVector vectors[3]; + GetTimesOnTheDayOfTheWeek(1, week, max_results, &vectors[0]); + for (size_t i = 2; i <= 5; ++i) { + GetTimesOnTheDayOfTheWeek(i, week, max_results, &vectors[(i - 1) % 3]); + UniteTimeVectors(vectors[(i - 2) % 3], vectors[(i - 1) % 3], + &vectors[i % 3]); + if (max_results && vectors[i % 3].size() > max_results) + vectors[i % 3].resize(max_results); + vectors[i % 3].swap(vectors[(i - 1) % 3]); + } + // 1 == 5 - 1 % 3 + times->swap(vectors[1]); + } +} + +// static +bool VisitFilter::UniteTimeVectors(const TimeVector& vector1, + const TimeVector& vector2, + TimeVector* result) { + // The vectors are sorted going back in time, but each pair has |first| as the + // beginning of time period and |second| as the end, for example: + // { 19:20, 20:00 } { 17:00, 18:10 } { 11:33, 11:35 }... + // The pairs in one vector are guaranteed not to intersect. + DCHECK(result); + result->clear(); + result->reserve(vector1.size() + vector2.size()); + + size_t vi[2]; + const TimeVector* vectors[2] = { &vector1, &vector2 }; + for (vi[0] = 0, vi[1] = 0; + vi[0] < vectors[0]->size() && vi[1] < vectors[1]->size();) { + std::pair united_timeslot; + // Check which element occurs later (for the following diagrams time is + // increasing to the right, 'f' means first, 's' means second). + // after the folowing 2 statements: + // vectors[iterator_index][vi[iterator_index]] f---s + // vectors[1 - iterator_index][vi[1 - iterator_index]] f---s + // united_timeslot f---s + // or + // vectors[iterator_index][vi[iterator_index]] f---s + // vectors[1 - iterator_index][vi[1 - iterator_index]] f-s + // united_timeslot f---s + size_t iterator_index = + ((*vectors[0])[vi[0]].second >= (*vectors[1])[vi[1]].second) ? 0 : 1; + united_timeslot = (*vectors[iterator_index])[vi[iterator_index]]; + ++vi[iterator_index]; + bool added_timeslot; + // Merge all timeslots intersecting with |united_timeslot|. + do { + added_timeslot = false; + for (size_t i = 0; i <= 1; ++i) { + if (vi[i] < vectors[i]->size() && + (*vectors[i])[vi[i]].second >= united_timeslot.first) { + // vectors[i][vi[i]] f---s + // united_timeslot f---s + // or + // united_timeslot f------s + added_timeslot = true; + if ((*vectors[i])[vi[i]].first < united_timeslot.first) { + // vectors[i][vi[i]] f---s + // united_timeslot f---s + // results in: + // united_timeslot f-----s + united_timeslot.first = (*vectors[i])[vi[i]].first; + } + ++vi[i]; + } + } + } while (added_timeslot); + result->push_back(united_timeslot); + } + for (size_t i = 0; i <= 1; ++i) { + for (; vi[i] < vectors[i]->size(); ++vi[i]) + result->push_back((*vectors[i])[vi[i]]); + } + return !result->empty(); +} + +// static +bool VisitFilter::IntersectTimeVectors(const TimeVector& vector1, + const TimeVector& vector2, + TimeVector* result) { + DCHECK(result); + result->clear(); + result->reserve(std::max(vector1.size(), vector2.size())); + + TimeVector::const_iterator vi[2]; + for (vi[0] = vector1.begin(), vi[1] = vector2.begin(); + vi[0] != vector1.end() && vi[1] != vector2.end();) { + size_t it_index = (vi[0]->second >= vi[1]->second) ? 0 : 1; + if (vi[it_index]->first >= vi[1 - it_index]->second) { + // vector 1 ++++ + // vector 2 ++ + ++vi[it_index]; + } else if (vi[it_index]->first >= vi[1 - it_index]->first) { + // vector 1 ++++ + // vector 2 +++++ + result->push_back(std::make_pair(vi[it_index]->first, + vi[1 - it_index]->second)); + ++vi[it_index]; + } else { + // vector 1 ++++ + // vector 2 ++ + result->push_back(std::make_pair(vi[1 - it_index]->first, + vi[1 - it_index]->second)); + ++vi[1 - it_index]; + } + } + + return !result->empty(); +} + +} // namespace history diff --git a/components/history/core/browser/visit_filter.h b/components/history/core/browser/visit_filter.h new file mode 100644 index 0000000..01b9331 --- /dev/null +++ b/components/history/core/browser/visit_filter.h @@ -0,0 +1,165 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_HISTORY_CORE_BROWSER_VISIT_FILTER_H_ +#define COMPONENTS_HISTORY_CORE_BROWSER_VISIT_FILTER_H_ + +#include + +#include "base/gtest_prod_util.h" +#include "base/time/time.h" + +namespace history { + +class VisitRow; + +// Helper class for creation of filters for VisitDatabase that is used to filter +// out visits by time of the day, day of the week, workdays, holidays, duration +// of the visit, location and the combinations of that. +// It also stores sorting order of the returned resilts. +class VisitFilter { + public: + VisitFilter(); + virtual ~VisitFilter(); + + // Vector of time intervals [begin time, end time]. All of the following + // functions produce vectors that are sorted in order from most recent to + // least recent and have intervals that do not intersect. + // |first| always points to the beginning of the time period, |second| - to + // the end. + typedef std::vector > TimeVector; + + // Returns time vector associated with the object. + const TimeVector& times() const { + return times_; + } + + // Sets |max_results| of the results to be returned. 0 means "return results + // for the two months prior to passed time". + void set_max_results(size_t max_results) { + max_results_ = max_results; + if (times_.size() > max_results_) + times_.resize(max_results_); + } + + // Sets the time that should be used as a basis for the filter. Normally this + // is the time that a query is made. + void SetFilterTime(const base::Time& filter_time); + + // Sets the amount of time around the filter time to take into account. This + // only applies to the filter time's time-of-day, restrictions on how long + // back in time to look should be controlled by changing |max_results|. + // + // How the filter width is used depends on the sorting order. For + // |ORDER_BY_TIME_LINEAR| it is the distance to the cutoff point, while for + // |ORDER_BY_TIME_GAUSSIAN| it is the standard deviation. + void SetFilterWidth(const base::TimeDelta& filter_width); + + // The following two filters are exclusive - setting one, clears the other + // one. + + // Sets the filter to use only visits that happened on the specified day of + // the week. + // |day| - day of the week: 0 - sunday, 1 - monday, etc. + void SetDayOfTheWeekFilter(int day); + + // Sets the filter to use only visits that happened on a holiday/workday. + // |workday| - if true means Monday-Friday, if false means Saturday-Sunday. + // TODO(georgey) - internationalize it. + void SetDayTypeFilter(bool workday); + + // Sorting order that results after applying this filter are sorted by. + enum SortingOrder { + ORDER_BY_RECENCY, // Most recent visits are most relevant ones. (default) + ORDER_BY_VISIT_COUNT, // Most visited are listed first. + ORDER_BY_DURATION_SPENT, // The sites that user spents more time in are + // sorted first. + ORDER_BY_TIME_GAUSSIAN, // Visits that happened closer to the filter time's + // time-of-day are scored higher. The dropoff in + // score follows a normal distribution curve with + // the filter width as the standard deviation. + ORDER_BY_TIME_LINEAR, // Visits that happened closer to the filter time's + // time-of-day are score higher. The dropoff in score + // is a linear function, with filter width being the + // point where a visit does not count at all anymore. + }; + + double GetVisitScore(const VisitRow& visit) const; + + void set_sorting_order(SortingOrder order) { + sorting_order_ = order; + UpdateTimeVector(); + } + + SortingOrder sorting_order() const { + return sorting_order_; + } + + // Clears all of the filters. + void ClearFilters(); + + private: + FRIEND_TEST_ALL_PREFIXES(VisitFilterTest, CheckFilters); + FRIEND_TEST_ALL_PREFIXES(VisitFilterTest, GetTimesInRange); + FRIEND_TEST_ALL_PREFIXES(VisitFilterTest, GetTimesOnTheDayOfTheWeek); + FRIEND_TEST_ALL_PREFIXES(VisitFilterTest, GetTimesOnTheSameDayType); + FRIEND_TEST_ALL_PREFIXES(VisitFilterTest, UniteTimeVectors); + FRIEND_TEST_ALL_PREFIXES(VisitFilterTest, IntersectTimeVectors); + + // Internal helper for the update. + bool UpdateTimeVector(); + + // Internal helper for getting the times in range. See SetTimeInRangeFilter(). + static void GetTimesInRange(base::Time begin_time_of_the_day, + base::Time end_time_of_the_day, + size_t max_results, + TimeVector* times); + + // Internal helper for getting the days in range. See SetDayOfTheWeekFilter(). + // |day| could be outside of the range: -4 (3 - 7) means Wednesday last week, + // 17 (3 + 2 * 7) means Wednesday in two weeks. + static void GetTimesOnTheDayOfTheWeek(int day, + base::Time week, + size_t max_results, + TimeVector* times); + + // Internal helper for getting the days in range. See SetDayTypeFilter(). + static void GetTimesOnTheSameDayType(bool workday, + base::Time week, + size_t max_results, + TimeVector* times); + + // Unites two vectors, so the new vector has non-intersecting union of the + // original ranges. Returns true if the result is non-empty, false otherwise. + static bool UniteTimeVectors(const TimeVector& vector1, + const TimeVector& vector2, + TimeVector* result); + + // Intersects two vectors, so the new vector has ranges that are covered by + // both of the original ranges. Returns true if the result is non-empty, false + // otherwise. + static bool IntersectTimeVectors(const TimeVector& vector1, + const TimeVector& vector2, + TimeVector* result); + + // Returns the time-of-day difference between the two times. The result will + // always represent a value between 0 and 12 hours inclusive. + static base::TimeDelta GetTimeOfDayDifference(base::Time t1, base::Time t2); + + base::Time filter_time_; + base::TimeDelta filter_width_; + enum { + DAY_UNDEFINED = -1, + WORKDAY = 7, + HOLIDAY = 8, + }; + int day_; + TimeVector times_; + size_t max_results_; + SortingOrder sorting_order_; +}; + +} // history + +#endif // COMPONENTS_HISTORY_CORE_BROWSER_VISIT_FILTER_H_ diff --git a/components/history/core/browser/visit_filter_unittest.cc b/components/history/core/browser/visit_filter_unittest.cc new file mode 100644 index 0000000..0840ad5 --- /dev/null +++ b/components/history/core/browser/visit_filter_unittest.cc @@ -0,0 +1,314 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/visit_filter.h" + +#include + +#include "base/logging.h" +#include "base/time/time.h" +#include "components/history/core/browser/history_types.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +// So the tests won't go into the other day +/- several hours, return midday of +// today. +base::Time GetClosestMidday() { + return base::Time::Now().LocalMidnight() + base::TimeDelta::FromHours(12); +} + +} // namespace + +namespace history { + +class VisitFilterTest : public testing::Test { + public: + VisitFilterTest(); + + protected: + void SetUp() override; + void TearDown() override; +}; + +VisitFilterTest::VisitFilterTest() { +} + +void VisitFilterTest::SetUp() { +} + +void VisitFilterTest::TearDown() { +} + +TEST_F(VisitFilterTest, CheckFilters) { + base::Time t(GetClosestMidday()); + base::TimeDelta two_hours(base::TimeDelta::FromHours(2)); + VisitFilter f; + f.set_max_results(21U); + f.SetFilterTime(t); + f.SetFilterWidth(two_hours); + EXPECT_EQ(21U, f.times().size()); + for (size_t i = 0; i < f.times().size(); ++i) { + base::Time t_interval(t); + t_interval -= base::TimeDelta::FromDays(i); + EXPECT_EQ(t_interval - two_hours, f.times()[i].first) << + "Fails at index:" << i; + EXPECT_EQ(t_interval + two_hours, f.times()[i].second) << + "Fails at index:" << i; + } + base::Time::Exploded et; + t.LocalExplode(&et); + f.SetDayOfTheWeekFilter(et.day_of_week); + // 3 weeks in 21 days. + ASSERT_EQ(3U, f.times().size()); + for (size_t i = 1; i < f.times().size(); ++i) { + base::Time t_interval(t); + t_interval -= base::TimeDelta::FromDays(i); + EXPECT_EQ(f.times()[i].first + base::TimeDelta::FromDays(7), + f.times()[i - 1].first) << + "Fails at index:" << i; + EXPECT_EQ(f.times()[i].second + base::TimeDelta::FromDays(7), + f.times()[i - 1].second) << + "Fails at index:" << i; + EXPECT_EQ(two_hours * 2, + f.times()[i].second - f.times()[i].first) << + "Fails at index:" << i; + } +} + +TEST_F(VisitFilterTest, GetTimesInRange) { + base::Time::Exploded et = { 2011, 7, 0, 19, 22, 15, 11, 0 }; + base::Time t(base::Time::FromLocalExploded(et)); + base::TimeDelta two_hours(base::TimeDelta::FromHours(2)); + VisitFilter::TimeVector times; + VisitFilter::GetTimesInRange(t - two_hours, t + two_hours, 10U, ×); + EXPECT_GT(11U, times.size()); + for (size_t i = 0; i < times.size(); ++i) { + base::Time t_interval(t); + t_interval -= base::TimeDelta::FromDays(i); + EXPECT_EQ(t_interval - two_hours, times[i].first) << "Fails at index:" << i; + EXPECT_EQ(t_interval + two_hours, times[i].second) << + "Fails at index:" << i; + } +} + +TEST_F(VisitFilterTest, GetTimesOnTheDayOfTheWeek) { + base::Time t(GetClosestMidday()); + VisitFilter::TimeVector times; + base::Time::Exploded et; + t.LocalExplode(&et); + VisitFilter::GetTimesOnTheDayOfTheWeek(et.day_of_week, t, 10U, ×); + EXPECT_GT(11U, times.size()); + et.hour = 0; + et.minute = 0; + et.second = 0; + et.millisecond = 0; + for (size_t i = 0; i < times.size(); ++i) { + base::Time t_interval(base::Time::FromLocalExploded(et)); + t_interval -= base::TimeDelta::FromDays(7 * i); + EXPECT_EQ(t_interval, times[i].first) << "Fails at index:" << i; + EXPECT_EQ(t_interval + base::TimeDelta::FromDays(1), times[i].second) << + "Fails at index:" << i; + } +} + +TEST_F(VisitFilterTest, GetTimesOnTheSameDayType) { + base::Time::Exploded et = { 2011, 7, 0, 19, 22, 15, 11, 0 }; + base::Time t(base::Time::FromLocalExploded(et)); + VisitFilter::TimeVector times; + t.LocalExplode(&et); + VisitFilter::GetTimesOnTheSameDayType(et.day_of_week, t, 10U, ×); + EXPECT_GT(11U, times.size()); + et.hour = 0; + et.minute = 0; + et.second = 0; + et.millisecond = 0; + base::Time t_start(base::Time::FromLocalExploded(et)); + base::TimeDelta t_length; + if (et.day_of_week == 0 || et.day_of_week == 6) { + // Sunday and Saturday. + t_length = base::TimeDelta::FromDays(2); + if (et.day_of_week == 0) + t_start -= base::TimeDelta::FromDays(1); + } else { + t_length = base::TimeDelta::FromDays(5); + if (et.day_of_week != 1) + t_start -= base::TimeDelta::FromDays(et.day_of_week - 1); + } + for (size_t i = 0; i < times.size(); ++i) { + base::Time t_interval(t_start); + t_interval -= base::TimeDelta::FromDays(7 * i); + EXPECT_EQ(t_interval, times[i].first) << "Fails at index:" << i; + EXPECT_EQ(t_interval + t_length, times[i].second) << "Fails at index:" << i; + } +} + +TEST_F(VisitFilterTest, UniteTimeVectors) { + base::Time t(base::Time::Now()); + base::TimeDelta one_hour(base::TimeDelta::FromHours(1)); + base::TimeDelta one_day(base::TimeDelta::FromDays(1)); + VisitFilter::TimeVector times1; + times1.push_back(std::make_pair(t - one_hour, t + one_hour)); + times1.push_back(std::make_pair(t - one_hour - one_day, + t + one_hour - one_day)); + times1.push_back(std::make_pair(t - one_hour - one_day * 2, + t + one_hour - one_day * 2)); + times1.push_back(std::make_pair(t - one_hour - one_day * 3, + t + one_hour - one_day * 3)); + + VisitFilter::TimeVector times2; + // Should lie completely within times1[0]. + times2.push_back(std::make_pair(t - one_hour / 2, t + one_hour / 2)); + // Should lie just before times1[1]. + times2.push_back(std::make_pair(t + one_hour * 2 - one_day, + t + one_hour * 3 - one_day)); + // Should intersect with times1. + times2.push_back(std::make_pair(t - one_day * 2, + t + one_hour * 2 - one_day * 2)); + times2.push_back(std::make_pair(t - one_hour * 2 - one_day * 3, + t - one_day * 3)); + + VisitFilter::TimeVector result; + EXPECT_TRUE(VisitFilter::UniteTimeVectors(times1, times2, &result)); + ASSERT_EQ(5U, result.size()); + EXPECT_EQ(t - one_hour, result[0].first); + EXPECT_EQ(t + one_hour, result[0].second); + EXPECT_EQ(t + one_hour * 2 - one_day, result[1].first); + EXPECT_EQ(t + one_hour * 3 - one_day, result[1].second); + EXPECT_EQ(t - one_hour - one_day, result[2].first); + EXPECT_EQ(t + one_hour - one_day, result[2].second); + EXPECT_EQ(t - one_hour - one_day * 2, result[3].first); + EXPECT_EQ(t + one_hour * 2 - one_day * 2, result[3].second); + EXPECT_EQ(t - one_hour * 2 - one_day * 3, result[4].first); + EXPECT_EQ(t + one_hour - one_day * 3, result[4].second); + + EXPECT_FALSE(VisitFilter::UniteTimeVectors(VisitFilter::TimeVector(), + VisitFilter::TimeVector(), + &result)); + EXPECT_TRUE(result.empty()); +} + +TEST_F(VisitFilterTest, IntersectTimeVectors) { + base::Time t(base::Time::Now()); + base::TimeDelta one_hour(base::TimeDelta::FromHours(1)); + base::TimeDelta one_day(base::TimeDelta::FromDays(1)); + VisitFilter::TimeVector times1; + times1.push_back(std::make_pair(t - one_hour, t + one_hour)); + + VisitFilter::TimeVector times2; + // Should lie just before times1[0]. + times2.push_back(std::make_pair(t + one_hour * 2, + t + one_hour * 3)); + + VisitFilter::TimeVector result; + EXPECT_FALSE(VisitFilter::IntersectTimeVectors(times1, times2, &result)); + EXPECT_TRUE(result.empty()); + + times1.push_back(std::make_pair(t - one_hour - one_day, + t + one_hour - one_day)); + times1.push_back(std::make_pair(t - one_hour - one_day * 2, + t + one_hour - one_day * 2)); + times1.push_back(std::make_pair(t - one_hour - one_day * 3, + t + one_hour - one_day * 3)); + + // Should lie completely within times1[1]. + times2.push_back(std::make_pair(t - one_hour / 2 - one_day, + t + one_hour / 2 - one_day)); + // Should intersect with times1. + times2.push_back(std::make_pair(t - one_day * 2, + t + one_hour * 2 - one_day * 2)); + times2.push_back(std::make_pair(t - one_hour * 2 - one_day * 3, + t - one_day * 3)); + + EXPECT_TRUE(VisitFilter::IntersectTimeVectors(times1, times2, &result)); + ASSERT_EQ(3U, result.size()); + EXPECT_EQ(t - one_hour / 2 - one_day, result[0].first); + EXPECT_EQ(t + one_hour / 2 - one_day, result[0].second); + EXPECT_EQ(t - one_day * 2, result[1].first); + EXPECT_EQ(t + one_hour - one_day * 2, result[1].second); + EXPECT_EQ(t - one_hour - one_day * 3, result[2].first); + EXPECT_EQ(t - one_day * 3, result[2].second); + + // Check that touching ranges do not intersect. + times1.clear(); + times1.push_back(std::make_pair(t - one_hour, t)); + times2.clear(); + times2.push_back(std::make_pair(t, t + one_hour)); + EXPECT_FALSE(VisitFilter::IntersectTimeVectors(times1, times2, &result)); + EXPECT_TRUE(result.empty()); +} + +TEST_F(VisitFilterTest, GetVisitScore) { + base::Time filter_time; + ASSERT_TRUE(base::Time::FromString("Tue, 24 Apr 2012, 12:00:00", + &filter_time)); + VisitFilter filter; + VisitRow visit; + + filter.set_sorting_order(VisitFilter::ORDER_BY_RECENCY); + filter.SetFilterTime(filter_time); + filter.SetFilterWidth(base::TimeDelta::FromHours(1)); + + double one_week_one_hour_staleness = pow(2, -(24.0 * 7.0 + 1.0) / + (24.0 * 7.0)); + + // No decay on current visit. + visit.visit_time = filter_time; + EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); + // Half score after a week. + visit.visit_time = filter_time - base::TimeDelta::FromDays(7); + EXPECT_DOUBLE_EQ(0.5, filter.GetVisitScore(visit)); + // Future visits should be treated as current. + visit.visit_time = filter_time + base::TimeDelta::FromDays(1); + EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); + + filter.set_sorting_order(VisitFilter::ORDER_BY_VISIT_COUNT); + // Every visit should score 1 with this filter. + visit.visit_time = filter_time; + EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); + visit.visit_time = filter_time - base::TimeDelta::FromDays(7); + EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); + visit.visit_time = filter_time + base::TimeDelta::FromDays(7); + EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); + + filter.set_sorting_order(VisitFilter::ORDER_BY_TIME_LINEAR); + visit.visit_time = filter_time; + EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); + // Half the filter width forward in time should get half the score for the + // time difference, but no staleness decay. + visit.visit_time = filter_time + base::TimeDelta::FromMinutes(30); + EXPECT_DOUBLE_EQ(0.5, filter.GetVisitScore(visit)); + // One week back in time gets full time difference score, but a staleness + // factor of 0.5 + visit.visit_time = filter_time - base::TimeDelta::FromDays(7); + EXPECT_DOUBLE_EQ(0.5, filter.GetVisitScore(visit)); + // One week plus half a filter width should have it's score halved before + // the staleness factor. + filter.SetFilterWidth(base::TimeDelta::FromHours(2)); + visit.visit_time = filter_time - base::TimeDelta::FromDays(7) - + base::TimeDelta::FromHours(1); + EXPECT_DOUBLE_EQ(0.5 * one_week_one_hour_staleness, + filter.GetVisitScore(visit)); + filter.SetFilterWidth(base::TimeDelta::FromHours(1)); + + filter.set_sorting_order(VisitFilter::ORDER_BY_TIME_GAUSSIAN); + visit.visit_time = filter_time; + EXPECT_DOUBLE_EQ(1.0, filter.GetVisitScore(visit)); + // Going forward in time to test the normal distribution function. + visit.visit_time = filter_time + base::TimeDelta::FromHours(1); + EXPECT_DOUBLE_EQ(exp(-0.5), filter.GetVisitScore(visit)); + visit.visit_time = filter_time + base::TimeDelta::FromMinutes(30); + EXPECT_DOUBLE_EQ(exp(-0.125), filter.GetVisitScore(visit)); + // One week back in time gets full time difference score, but a staleness + // factor of 0.5 + visit.visit_time = filter_time - base::TimeDelta::FromDays(7); + EXPECT_DOUBLE_EQ(0.5, filter.GetVisitScore(visit)); + // One standard deviation of decay, plus the staleness factor. + visit.visit_time = filter_time - base::TimeDelta::FromDays(7) - + base::TimeDelta::FromHours(1); + EXPECT_DOUBLE_EQ(exp(-0.5) * one_week_one_hour_staleness, + filter.GetVisitScore(visit)); +} + +} // namespace history diff --git a/components/history/core/browser/visit_tracker.cc b/components/history/core/browser/visit_tracker.cc new file mode 100644 index 0000000..71d772f --- /dev/null +++ b/components/history/core/browser/visit_tracker.cc @@ -0,0 +1,106 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/visit_tracker.h" + +#include "base/stl_util.h" + +namespace history { + +// When the list gets longer than 'MaxItems', CleanupTransitionList will resize +// the list down to 'ResizeTo' size. This is so we only do few block moves of +// the data rather than constantly shuffle stuff around in the vector. +static const size_t kMaxItemsInTransitionList = 96; +static const size_t kResizeBigTransitionListTo = 64; +static_assert(kResizeBigTransitionListTo < kMaxItemsInTransitionList, + "maxium number of items must be larger than we are resizing to"); + +VisitTracker::VisitTracker() { +} + +VisitTracker::~VisitTracker() { + STLDeleteContainerPairSecondPointers(contexts_.begin(), contexts_.end()); +} + +// This function is potentially slow because it may do up to two brute-force +// searches of the transitions list. This transitions list is kept to a +// relatively small number by CleanupTransitionList so it shouldn't be a big +// deal. However, if this ends up being noticable for performance, we may want +// to optimize lookup. +VisitID VisitTracker::GetLastVisit(ContextID context_id, + int32 page_id, + const GURL& referrer) { + if (referrer.is_empty() || !context_id) + return 0; + + ContextList::iterator i = contexts_.find(context_id); + if (i == contexts_.end()) + return 0; // We don't have any entries for this context. + TransitionList& transitions = *i->second; + + // Recall that a page ID is associated with a single session history entry. + // In the case of automatically loaded iframes, many visits/URLs can have the + // same page ID. + // + // We search backwards, starting at the current page ID, for the referring + // URL. This won't always be correct. For example, if a render process has + // the same page open in two different tabs, or even in two different frames, + // we can get confused about which was which. We can have the renderer + // report more precise referrer information in the future, but this is a + // hard problem and doesn't affect much in terms of real-world issues. + // + // We assume that the page IDs are increasing over time, so larger IDs than + // the current input ID happened in the future (this will occur if the user + // goes back). We can ignore future transitions because if you navigate, go + // back, and navigate some more, we'd like to have one node with two out + // edges in our visit graph. + for (int i = static_cast(transitions.size()) - 1; i >= 0; i--) { + if (transitions[i].page_id <= page_id && transitions[i].url == referrer) { + // Found it. + return transitions[i].visit_id; + } + } + + // We can't find the referrer. + return 0; +} + +void VisitTracker::AddVisit(ContextID context_id, + int32 page_id, + const GURL& url, + VisitID visit_id) { + TransitionList* transitions = contexts_[context_id]; + if (!transitions) { + transitions = new TransitionList; + contexts_[context_id] = transitions; + } + + Transition t; + t.url = url; + t.page_id = page_id; + t.visit_id = visit_id; + transitions->push_back(t); + + CleanupTransitionList(transitions); +} + +void VisitTracker::ClearCachedDataForContextID(ContextID context_id) { + ContextList::iterator i = contexts_.find(context_id); + if (i == contexts_.end()) + return; // We don't have any entries for this context. + + delete i->second; + contexts_.erase(i); +} + + +void VisitTracker::CleanupTransitionList(TransitionList* transitions) { + if (transitions->size() <= kMaxItemsInTransitionList) + return; // Nothing to do. + + transitions->erase(transitions->begin(), + transitions->begin() + kResizeBigTransitionListTo); +} + +} // namespace history diff --git a/components/history/core/browser/visit_tracker.h b/components/history/core/browser/visit_tracker.h new file mode 100644 index 0000000..29aefcf --- /dev/null +++ b/components/history/core/browser/visit_tracker.h @@ -0,0 +1,66 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_HISTORY_CORE_BROWSER_VISIT_TRACKER_H__ +#define COMPONENTS_HISTORY_CORE_BROWSER_VISIT_TRACKER_H__ + +#include +#include + +#include "base/basictypes.h" +#include "components/history/core/browser/history_types.h" + +namespace history { + +// Tracks history transitions between pages. The history backend uses this to +// link up page transitions to form a chain of page visits, and to set the +// transition type properly. +// +// This class is not thread safe. +class VisitTracker { + public: + VisitTracker(); + ~VisitTracker(); + + // Notifications ------------------------------------------------------------- + + void AddVisit(ContextID context_id, + int32 page_id, + const GURL& url, + VisitID visit_id); + + // When a RenderProcessHost is destroyed, we want to clear out our saved + // transitions/visit IDs for it. + void ClearCachedDataForContextID(ContextID context_id); + + // Querying ------------------------------------------------------------------ + + // Returns the visit ID for the transition given information about the visit + // supplied by the renderer. We will return 0 if there is no appropriate + // referring visit. + VisitID GetLastVisit(ContextID context_id, int32 page_id, const GURL& url); + + private: + struct Transition { + GURL url; // URL that the event happened to. + int32 page_id; // ID generated by the render process host. + VisitID visit_id; // Visit ID generated by history. + }; + typedef std::vector TransitionList; + typedef std::map ContextList; + + // Expires oldish items in the given transition list. This keeps the list + // size small by removing items that are unlikely to be needed, which is + // important for GetReferrer which does brute-force searches of this list. + void CleanupTransitionList(TransitionList* transitions); + + // Maps render view hosts to lists of recent transitions. + ContextList contexts_; + + DISALLOW_COPY_AND_ASSIGN(VisitTracker); +}; + +} // namespace history + +#endif // COMPONENTS_HISTORY_CORE_BROWSER_VISIT_TRACKER_H__ diff --git a/components/history/core/browser/visit_tracker_unittest.cc b/components/history/core/browser/visit_tracker_unittest.cc new file mode 100644 index 0000000..bfd6bdb --- /dev/null +++ b/components/history/core/browser/visit_tracker_unittest.cc @@ -0,0 +1,130 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/visit_tracker.h" + +#include "base/basictypes.h" +#include "testing/gtest/include/gtest/gtest.h" + +using history::ContextID; +using history::VisitTracker; + +namespace { + +struct VisitToTest { + // Identifies the context. + int context_id_int; + int32 page_id; + + // Used when adding this to the tracker + const char* url; + const history::VisitID visit_id; + + // Used when finding the referrer + const char* referrer; + + // the correct referring visit ID to compare to the computed one + history::VisitID referring_visit_id; +}; + +void RunTest(VisitTracker* tracker, VisitToTest* test, int test_count) { + for (int i = 0; i < test_count; i++) { + // Our host pointer is actually just an int, convert it (it will not get + // dereferenced). + ContextID context_id = reinterpret_cast(test[i].context_id_int); + + // Check the referrer for this visit. + history::VisitID ref_visit = tracker->GetLastVisit( + context_id, test[i].page_id, GURL(test[i].referrer)); + EXPECT_EQ(test[i].referring_visit_id, ref_visit); + + // Now add this visit. + tracker->AddVisit( + context_id, test[i].page_id, GURL(test[i].url), test[i].visit_id); + } +} + +} // namespace + +// A simple test that makes sure we transition between main pages in the +// presence of back/forward. +TEST(VisitTracker, SimpleTransitions) { + VisitToTest test_simple[] = { + // Started here: + {1, 1, "http://www.google.com/", 1, "", 0}, + // Clicked a link: + {1, 2, "http://images.google.com/", 2, "http://www.google.com/", 1}, + // Went back, then clicked a link: + {1, 3, "http://video.google.com/", 3, "http://www.google.com/", 1}, + }; + + VisitTracker tracker; + RunTest(&tracker, test_simple, arraysize(test_simple)); +} + +// Test that referrer is properly computed when there are different frame +// navigations happening. +TEST(VisitTracker, Frames) { + VisitToTest test_frames[] = { + // Started here: + {1, 1, "http://foo.com/", 1, "", 0}, + // Which had an auto-loaded subframe: + {1, 1, "http://foo.com/ad.html", 2, "http://foo.com/", 1}, + // ...and another auto-loaded subframe: + {1, 1, "http://foo.com/ad2.html", 3, "http://foo.com/", 1}, + // ...and the user navigated the first subframe to somwhere else + {1, 2, "http://bar.com/", 4, "http://foo.com/ad.html", 2}, + // ...and then the second subframe somewhere else + {1, 3, "http://fud.com/", 5, "http://foo.com/ad2.html", 3}, + // ...and then the main frame somewhere else. + {1, 4, "http://www.google.com/", 6, "http://foo.com/", 1}, + }; + + VisitTracker tracker; + RunTest(&tracker, test_frames, arraysize(test_frames)); +} + +// Test frame navigation to make sure that the referrer is properly computed +// when there are multiple processes navigating the same pages. +TEST(VisitTracker, MultiProcess) { + VisitToTest test_processes[] = { + // Process 1 and 2 start here: + {1, 1, "http://foo.com/", 1, "", 0}, + {2, 1, "http://foo.com/", 2, "", 0}, + // They have some subframes: + {1, 1, "http://foo.com/ad.html", 3, "http://foo.com/", 1}, + {2, 1, "http://foo.com/ad.html", 4, "http://foo.com/", 2}, + // Subframes are navigated: + {1, 2, "http://bar.com/", 5, "http://foo.com/ad.html", 3}, + {2, 2, "http://bar.com/", 6, "http://foo.com/ad.html", 4}, + // Main frame is navigated: + {1, 3, "http://www.google.com/", 7, "http://foo.com/", 1}, + {2, 3, "http://www.google.com/", 8, "http://foo.com/", 2}, + }; + + VisitTracker tracker; + RunTest(&tracker, test_processes, arraysize(test_processes)); +} + +// Test that processes get removed properly. +TEST(VisitTracker, ProcessRemove) { + // Simple navigation from one process. + VisitToTest part1[] = { + {1, 1, "http://www.google.com/", 1, "", 0}, + {1, 2, "http://images.google.com/", 2, "http://www.google.com/", 1}, + }; + + VisitTracker tracker; + RunTest(&tracker, part1, arraysize(part1)); + + // Say that context has been invalidated. + tracker.ClearCachedDataForContextID(reinterpret_cast(1)); + + // Simple navigation from a new process with the same ID, it should not find + // a referrer. + VisitToTest part2[] = { + {1, 1, "http://images.google.com/", 2, "http://www.google.com/", 0}, + }; + RunTest(&tracker, part2, arraysize(part2)); +} diff --git a/components/history/core/browser/visitsegment_database.cc b/components/history/core/browser/visitsegment_database.cc new file mode 100644 index 0000000..7beabd0 --- /dev/null +++ b/components/history/core/browser/visitsegment_database.cc @@ -0,0 +1,327 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/visitsegment_database.h" + +#include + +#include +#include +#include + +#include "base/logging.h" +#include "base/stl_util.h" +#include "base/strings/string_util.h" +#include "base/strings/utf_string_conversions.h" +#include "components/history/core/browser/page_usage_data.h" +#include "sql/statement.h" +#include "sql/transaction.h" + +// The following tables are used to store url segment information. +// +// segments +// id Primary key +// name A unique string to represent that segment. (URL derived) +// url_id ID of the url currently used to represent this segment. +// +// segment_usage +// id Primary key +// segment_id Corresponding segment id +// time_slot time stamp identifying for what day this entry is about +// visit_count Number of visit in the segment +// + +namespace history { + +VisitSegmentDatabase::VisitSegmentDatabase() { +} + +VisitSegmentDatabase::~VisitSegmentDatabase() { +} + +bool VisitSegmentDatabase::InitSegmentTables() { + // Segments table. + if (!GetDB().DoesTableExist("segments")) { + if (!GetDB().Execute("CREATE TABLE segments (" + "id INTEGER PRIMARY KEY," + "name VARCHAR," + "url_id INTEGER NON NULL)")) { + return false; + } + + if (!GetDB().Execute( + "CREATE INDEX segments_name ON segments(name)")) { + return false; + } + } + + // This was added later, so we need to try to create it even if the table + // already exists. + if (!GetDB().Execute("CREATE INDEX IF NOT EXISTS segments_url_id ON " + "segments(url_id)")) + return false; + + // Segment usage table. + if (!GetDB().DoesTableExist("segment_usage")) { + if (!GetDB().Execute("CREATE TABLE segment_usage (" + "id INTEGER PRIMARY KEY," + "segment_id INTEGER NOT NULL," + "time_slot INTEGER NOT NULL," + "visit_count INTEGER DEFAULT 0 NOT NULL)")) { + return false; + } + if (!GetDB().Execute( + "CREATE INDEX segment_usage_time_slot_segment_id ON " + "segment_usage(time_slot, segment_id)")) { + return false; + } + } + + // Added in a later version, so we always need to try to creat this index. + if (!GetDB().Execute("CREATE INDEX IF NOT EXISTS segments_usage_seg_id " + "ON segment_usage(segment_id)")) + return false; + + return true; +} + +bool VisitSegmentDatabase::DropSegmentTables() { + // Dropping the tables will implicitly delete the indices. + return GetDB().Execute("DROP TABLE segments") && + GetDB().Execute("DROP TABLE segment_usage"); +} + +// Note: the segment name is derived from the URL but is not a URL. It is +// a string that can be easily recreated from various URLS. Maybe this should +// be an MD5 to limit the length. +// +// static +std::string VisitSegmentDatabase::ComputeSegmentName(const GURL& url) { + // TODO(brettw) this should probably use the registry controlled + // domains service. + GURL::Replacements r; + const char kWWWDot[] = "www."; + const int kWWWDotLen = arraysize(kWWWDot) - 1; + + std::string host = url.host(); + const char* host_c = host.c_str(); + // Remove www. to avoid some dups. + if (static_cast(host.size()) > kWWWDotLen && + LowerCaseEqualsASCII(host_c, host_c + kWWWDotLen, kWWWDot)) { + r.SetHost(host.c_str(), + url::Component(kWWWDotLen, + static_cast(host.size()) - kWWWDotLen)); + } + // Remove other stuff we don't want. + r.ClearUsername(); + r.ClearPassword(); + r.ClearQuery(); + r.ClearRef(); + r.ClearPort(); + + return url.ReplaceComponents(r).spec(); +} + +SegmentID VisitSegmentDatabase::GetSegmentNamed( + const std::string& segment_name) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT id FROM segments WHERE name = ?")); + statement.BindString(0, segment_name); + + if (statement.Step()) + return statement.ColumnInt64(0); + return 0; +} + +bool VisitSegmentDatabase::UpdateSegmentRepresentationURL(SegmentID segment_id, + URLID url_id) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE segments SET url_id = ? WHERE id = ?")); + statement.BindInt64(0, url_id); + statement.BindInt64(1, segment_id); + + return statement.Run(); +} + +URLID VisitSegmentDatabase::GetSegmentRepresentationURL(SegmentID segment_id) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT url_id FROM segments WHERE id = ?")); + statement.BindInt64(0, segment_id); + + if (statement.Step()) + return statement.ColumnInt64(0); + return 0; +} + +SegmentID VisitSegmentDatabase::CreateSegment(URLID url_id, + const std::string& segment_name) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO segments (name, url_id) VALUES (?,?)")); + statement.BindString(0, segment_name); + statement.BindInt64(1, url_id); + + if (statement.Run()) + return GetDB().GetLastInsertRowId(); + return 0; +} + +bool VisitSegmentDatabase::IncreaseSegmentVisitCount(SegmentID segment_id, + base::Time ts, + int amount) { + base::Time t = ts.LocalMidnight(); + + sql::Statement select(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT id, visit_count FROM segment_usage " + "WHERE time_slot = ? AND segment_id = ?")); + select.BindInt64(0, t.ToInternalValue()); + select.BindInt64(1, segment_id); + + if (!select.is_valid()) + return false; + + if (select.Step()) { + sql::Statement update(GetDB().GetCachedStatement(SQL_FROM_HERE, + "UPDATE segment_usage SET visit_count = ? WHERE id = ?")); + update.BindInt64(0, select.ColumnInt64(1) + static_cast(amount)); + update.BindInt64(1, select.ColumnInt64(0)); + + return update.Run(); + } else { + sql::Statement insert(GetDB().GetCachedStatement(SQL_FROM_HERE, + "INSERT INTO segment_usage " + "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); + insert.BindInt64(0, segment_id); + insert.BindInt64(1, t.ToInternalValue()); + insert.BindInt64(2, static_cast(amount)); + + return insert.Run(); + } +} + +void VisitSegmentDatabase::QuerySegmentUsage( + base::Time from_time, + int max_result_count, + std::vector* results) { + // This function gathers the highest-ranked segments in two queries. + // The first gathers scores for all segments. + // The second gathers segment data (url, title, etc.) for the highest-ranked + // segments. + + // Gather all the segment scores. + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT segment_id, time_slot, visit_count " + "FROM segment_usage WHERE time_slot >= ? " + "ORDER BY segment_id")); + if (!statement.is_valid()) + return; + + base::Time ts = from_time.LocalMidnight(); + statement.BindInt64(0, ts.ToInternalValue()); + + base::Time now = base::Time::Now(); + SegmentID last_segment_id = 0; + PageUsageData* pud = NULL; + float score = 0; + while (statement.Step()) { + SegmentID segment_id = statement.ColumnInt64(0); + if (segment_id != last_segment_id) { + if (pud) { + pud->SetScore(score); + results->push_back(pud); + } + + pud = new PageUsageData(segment_id); + score = 0; + last_segment_id = segment_id; + } + + base::Time timeslot = + base::Time::FromInternalValue(statement.ColumnInt64(1)); + int visit_count = statement.ColumnInt(2); + int days_ago = (now - timeslot).InDays(); + + // Score for this day in isolation. + float day_visits_score = 1.0f + log(static_cast(visit_count)); + // Recent visits count more than historical ones, so we multiply in a boost + // related to how long ago this day was. + // This boost is a curve that smoothly goes through these values: + // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x + // at the limit of how far we reach into the past. + float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); + score += recency_boost * day_visits_score; + } + + if (pud) { + pud->SetScore(score); + results->push_back(pud); + } + + // Limit to the top kResultCount results. + std::sort(results->begin(), results->end(), PageUsageData::Predicate); + if (static_cast(results->size()) > max_result_count) { + STLDeleteContainerPointers(results->begin() + max_result_count, + results->end()); + results->resize(max_result_count); + } + + // Now fetch the details about the entries we care about. + sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, + "SELECT urls.url, urls.title FROM urls " + "JOIN segments ON segments.url_id = urls.id " + "WHERE segments.id = ?")); + + if (!statement2.is_valid()) + return; + + for (size_t i = 0; i < results->size(); ++i) { + PageUsageData* pud = (*results)[i]; + statement2.BindInt64(0, pud->GetID()); + if (statement2.Step()) { + pud->SetURL(GURL(statement2.ColumnString(0))); + pud->SetTitle(statement2.ColumnString16(1)); + } + statement2.Reset(true); + } +} + +bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { + sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM segment_usage WHERE time_slot < ?")); + statement.BindInt64(0, older_than.LocalMidnight().ToInternalValue()); + + return statement.Run(); +} + +bool VisitSegmentDatabase::DeleteSegmentForURL(URLID url_id) { + sql::Statement delete_usage(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM segment_usage WHERE segment_id IN " + "(SELECT id FROM segments WHERE url_id = ?)")); + delete_usage.BindInt64(0, url_id); + + if (!delete_usage.Run()) + return false; + + sql::Statement delete_seg(GetDB().GetCachedStatement(SQL_FROM_HERE, + "DELETE FROM segments WHERE url_id = ?")); + delete_seg.BindInt64(0, url_id); + + return delete_seg.Run(); +} + +bool VisitSegmentDatabase::MigratePresentationIndex() { + sql::Transaction transaction(&GetDB()); + return transaction.Begin() && + GetDB().Execute("DROP TABLE presentation") && + GetDB().Execute("CREATE TABLE segments_tmp (" + "id INTEGER PRIMARY KEY," + "name VARCHAR," + "url_id INTEGER NON NULL)") && + GetDB().Execute("INSERT INTO segments_tmp SELECT " + "id, name, url_id FROM segments") && + GetDB().Execute("DROP TABLE segments") && + GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && + transaction.Commit(); +} + +} // namespace history diff --git a/components/history/core/browser/visitsegment_database.h b/components/history/core/browser/visitsegment_database.h new file mode 100644 index 0000000..a2aa156d --- /dev/null +++ b/components/history/core/browser/visitsegment_database.h @@ -0,0 +1,92 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_HISTORY_CORE_BROWSER_VISITSEGMENT_DATABASE_H_ +#define COMPONENTS_HISTORY_CORE_BROWSER_VISITSEGMENT_DATABASE_H_ + +#include "base/basictypes.h" +#include "components/history/core/browser/history_types.h" + +class PageUsageData; + +namespace sql { +class Connection; +} + +namespace history { + +// Tracks pages used for the most visited view. +class VisitSegmentDatabase { + public: + // Must call InitSegmentTables before using any other part of this class. + VisitSegmentDatabase(); + virtual ~VisitSegmentDatabase(); + + // Compute a segment name given a URL. The segment name is currently the + // source url spec less some information such as query strings. + static std::string ComputeSegmentName(const GURL& url); + + // Returns the ID of the segment with the corresponding name, or 0 if there + // is no segment with that name. + SegmentID GetSegmentNamed(const std::string& segment_name); + + // Update the segment identified by |out_segment_id| with the provided URL ID. + // The URL identifies the page that will now represent the segment. If url_id + // is non zero, it is assumed to be the row id of |url|. + bool UpdateSegmentRepresentationURL(SegmentID segment_id, + URLID url_id); + + // Return the ID of the URL currently used to represent this segment or 0 if + // an error occured. + URLID GetSegmentRepresentationURL(SegmentID segment_id); + + // Create a segment for the provided URL ID with the given name. Returns the + // ID of the newly created segment, or 0 on failure. + SegmentID CreateSegment(URLID url_id, const std::string& segment_name); + + // Increase the segment visit count by the provided amount. Return true on + // success. + bool IncreaseSegmentVisitCount(SegmentID segment_id, base::Time ts, + int amount); + + // Compute the segment usage since |from_time| using the provided aggregator. + // A PageUsageData is added in |result| for the highest-scored segments up to + // |max_result_count|. + void QuerySegmentUsage(base::Time from_time, + int max_result_count, + std::vector* result); + + // Delete all the segment usage data which is older than the provided time + // stamp. + bool DeleteSegmentData(base::Time older_than); + + // Change the presentation id for the segment identified by |segment_id| + bool SetSegmentPresentationIndex(SegmentID segment_id, int index); + + // Delete the segment currently using the provided url for representation. + // This will also delete any associated segment usage data. + bool DeleteSegmentForURL(URLID url_id); + + protected: + // Returns the database for the functions in this interface. + virtual sql::Connection& GetDB() = 0; + + // Creates the tables used by this class if necessary. Returns true on + // success. + bool InitSegmentTables(); + + // Deletes all the segment tables, returning true on success. + bool DropSegmentTables(); + + // Removes the 'pres_index' column from the segments table and the + // presentation table is removed entirely. + bool MigratePresentationIndex(); + + private: + DISALLOW_COPY_AND_ASSIGN(VisitSegmentDatabase); +}; + +} // namespace history + +#endif // COMPONENTS_HISTORY_CORE_BROWSER_VISITSEGMENT_DATABASE_H_ -- cgit v1.1