diff options
-rw-r--r-- | chrome/browser/autocomplete/history_quick_provider_unittest.cc | 102 | ||||
-rw-r--r-- | chrome/browser/autocomplete/history_url_provider.cc | 3 | ||||
-rw-r--r-- | chrome/browser/history/history_database.h | 3 | ||||
-rw-r--r-- | chrome/browser/history/history_service.h | 1 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index_unittest.cc | 18 | ||||
-rw-r--r-- | chrome/browser/history/scored_history_match.cc | 220 | ||||
-rw-r--r-- | chrome/browser/history/scored_history_match.h | 53 | ||||
-rw-r--r-- | chrome/browser/history/scored_history_match_unittest.cc | 220 | ||||
-rw-r--r-- | chrome/browser/omnibox/omnibox_field_trial.cc | 14 | ||||
-rw-r--r-- | chrome/browser/omnibox/omnibox_field_trial.h | 18 |
10 files changed, 122 insertions, 530 deletions
diff --git a/chrome/browser/autocomplete/history_quick_provider_unittest.cc b/chrome/browser/autocomplete/history_quick_provider_unittest.cc index 22527a8..e1c805e 100644 --- a/chrome/browser/autocomplete/history_quick_provider_unittest.cc +++ b/chrome/browser/autocomplete/history_quick_provider_unittest.cc @@ -10,13 +10,17 @@ #include <string> #include <vector> +#include "base/format_macros.h" #include "base/memory/scoped_ptr.h" #include "base/message_loop.h" #include "base/prefs/pref_service.h" +#include "base/strings/stringprintf.h" #include "base/strings/utf_string_conversions.h" #include "chrome/browser/autocomplete/autocomplete_provider_listener.h" #include "chrome/browser/autocomplete/autocomplete_result.h" #include "chrome/browser/autocomplete/history_url_provider.h" +#include "chrome/browser/history/history_backend.h" +#include "chrome/browser/history/history_database.h" #include "chrome/browser/history/history_service.h" #include "chrome/browser/history/history_service_factory.h" #include "chrome/browser/history/in_memory_url_index.h" @@ -30,6 +34,7 @@ #include "chrome/test/base/testing_profile.h" #include "chrome/test/base/ui_test_utils.h" #include "content/public/test/test_browser_thread.h" +#include "sql/transaction.h" #include "testing/gtest/include/gtest/gtest.h" using base::Time; @@ -52,19 +57,19 @@ struct TestURLInfo { {"http://news.google.com/", "Google News", 1, 1, 0}, {"http://foo.com/", "Dir", 200, 100, 0}, {"http://foo.com/dir/", "Dir", 2, 1, 10}, - {"http://foo.com/dir/another/", "Dir", 5, 10, 0}, + {"http://foo.com/dir/another/", "Dir", 10, 5, 0}, {"http://foo.com/dir/another/again/", "Dir", 5, 1, 0}, - {"http://foo.com/dir/another/again/myfile.html", "File", 3, 2, 0}, + {"http://foo.com/dir/another/again/myfile.html", "File", 3, 1, 0}, {"http://visitedest.com/y/a", "VA", 10, 1, 20}, {"http://visitedest.com/y/b", "VB", 9, 1, 20}, {"http://visitedest.com/x/c", "VC", 8, 1, 20}, {"http://visitedest.com/x/d", "VD", 7, 1, 20}, {"http://visitedest.com/y/e", "VE", 6, 1, 20}, - {"http://typeredest.com/y/a", "TA", 3, 5, 0}, - {"http://typeredest.com/y/b", "TB", 3, 4, 0}, - {"http://typeredest.com/x/c", "TC", 3, 3, 0}, - {"http://typeredest.com/x/d", "TD", 3, 2, 0}, - {"http://typeredest.com/y/e", "TE", 3, 1, 0}, + {"http://typeredest.com/y/a", "TA", 5, 5, 0}, + {"http://typeredest.com/y/b", "TB", 5, 4, 0}, + {"http://typeredest.com/x/c", "TC", 5, 3, 0}, + {"http://typeredest.com/x/d", "TD", 5, 2, 0}, + {"http://typeredest.com/y/e", "TE", 5, 1, 0}, {"http://daysagoest.com/y/a", "DA", 1, 1, 0}, {"http://daysagoest.com/y/b", "DB", 1, 1, 1}, {"http://daysagoest.com/x/c", "DC", 1, 1, 2}, @@ -132,9 +137,6 @@ class HistoryQuickProviderTest : public testing::Test, bool can_inline_top_result, string16 expected_fill_into_edit); - // Pass-through functions to simplify our friendship with URLIndexPrivateData. - bool UpdateURL(const history::URLRow& row); - base::MessageLoopForUI message_loop_; content::TestBrowserThread ui_thread_; content::TestBrowserThread file_thread_; @@ -161,24 +163,14 @@ void HistoryQuickProviderTest::SetUp() { TemplateURLServiceFactory::GetInstance()->SetTestingFactoryAndUse( profile_.get(), &HistoryQuickProviderTest::CreateTemplateURLService); FillData(); + provider_->GetIndex()->RebuildFromHistory( + history_service_->history_backend_->db()); } void HistoryQuickProviderTest::TearDown() { provider_ = NULL; } -bool HistoryQuickProviderTest::UpdateURL(const history::URLRow& row) { - history::URLDatabase* db = history_service_->InMemoryDatabase(); - DCHECK(db); - EXPECT_NE(db->AddURL(row), 0); - history::InMemoryURLIndex* index = provider_->GetIndex(); - DCHECK(index); - history::URLIndexPrivateData* private_data = index->private_data(); - DCHECK(private_data); - return private_data->UpdateURL(history_service_, row, index->languages_, - index->scheme_whitelist_); -} - void HistoryQuickProviderTest::GetTestData(size_t* data_count, TestURLInfo** test_data) { DCHECK(data_count); @@ -188,24 +180,48 @@ void HistoryQuickProviderTest::GetTestData(size_t* data_count, } void HistoryQuickProviderTest::FillData() { - history::URLDatabase* db = history_service_->InMemoryDatabase(); - ASSERT_TRUE(db != NULL); + sql::Connection& db(history_service_->history_backend_->db()->GetDB()); + ASSERT_TRUE(db.is_open()); + size_t data_count = 0; TestURLInfo* test_data = NULL; GetTestData(&data_count, &test_data); + size_t visit_id = 1; for (size_t i = 0; i < data_count; ++i) { const TestURLInfo& cur(test_data[i]); - const GURL current_url(cur.url); Time visit_time = Time::Now() - TimeDelta::FromDays(cur.days_from_now); - - history::URLRow url_info(current_url); - url_info.set_id(i + 5000); - url_info.set_title(UTF8ToUTF16(cur.title)); - url_info.set_visit_count(cur.visit_count); - url_info.set_typed_count(cur.typed_count); - url_info.set_last_visit(visit_time); - url_info.set_hidden(false); - UpdateURL(url_info); + sql::Transaction transaction(&db); + + // Add URL. + transaction.Begin(); + std::string sql_cmd_line = base::StringPrintf( + "INSERT INTO \"urls\" VALUES(%" PRIuS ", \'%s\', \'%s\', %d, %d, %" + PRId64 ", 0, 0)", + i + 1, cur.url.c_str(), cur.title.c_str(), cur.visit_count, + cur.typed_count, visit_time.ToInternalValue()); + std::string sql_cmd(sql_cmd_line); + sql::Statement sql_stmt(db.GetUniqueStatement(sql_cmd_line.c_str())); + EXPECT_TRUE(sql_stmt.Run()); + transaction.Commit(); + + // Add visits. + for (int j = 0; j < cur.visit_count; ++j) { + // Assume earlier visits are at one-day intervals. + visit_time -= TimeDelta::FromDays(1); + transaction.Begin(); + // Mark the most recent |cur.typed_count| visits as typed. + std::string sql_cmd_line = base::StringPrintf( + "INSERT INTO \"visits\" VALUES(%" PRIuS ", %" PRIuS ", %" PRId64 + ", 0, %d, 0, 0, 1)", + visit_id++, i + 1, visit_time.ToInternalValue(), + (j < cur.typed_count) ? content::PAGE_TRANSITION_TYPED : + content::PAGE_TRANSITION_LINK); + + std::string sql_cmd(sql_cmd_line); + sql::Statement sql_stmt(db.GetUniqueStatement(sql_cmd_line.c_str())); + EXPECT_TRUE(sql_stmt.Run()); + transaction.Commit(); + } } } @@ -327,19 +343,10 @@ TEST_F(HistoryQuickProviderTest, MultiMatch) { TEST_F(HistoryQuickProviderTest, StartRelativeMatch) { std::vector<std::string> expected_urls; expected_urls.push_back("http://xyzabcdefghijklmnopqrstuvw.com/a"); - RunTest(ASCIIToUTF16("xyz"), expected_urls, true, + RunTest(ASCIIToUTF16("xyza"), expected_urls, true, ASCIIToUTF16("xyzabcdefghijklmnopqrstuvw.com/a")); } -TEST_F(HistoryQuickProviderTest, PrefixOnlyMatch) { - std::vector<std::string> expected_urls; - expected_urls.push_back("http://foo.com/"); - expected_urls.push_back("http://popularsitewithroot.com/"); - expected_urls.push_back("http://slashdot.org/favorite_page.html"); - RunTest(ASCIIToUTF16("http://"), expected_urls, true, - ASCIIToUTF16("http://foo.com")); -} - TEST_F(HistoryQuickProviderTest, EncodingMatch) { std::vector<std::string> expected_urls; expected_urls.push_back("http://spaces.com/path%20with%20spaces/foo.html"); @@ -495,15 +502,14 @@ TEST_F(HistoryQuickProviderTest, PreventBeatingURLWhatYouTypedMatch) { RunTest(ASCIIToUTF16("popularsitewithpathonly.com"), expected_urls, true, ASCIIToUTF16("popularsitewithpathonly.com/moo")); EXPECT_LT(ac_matches_[0].relevance, - HistoryURLProvider::kScoreForWhatYouTypedResult); + HistoryURLProvider::kScoreForUnvisitedIntranetResult); // Verify the same thing happens if the user adds a / to end of the // hostname. RunTest(ASCIIToUTF16("popularsitewithpathonly.com/"), expected_urls, true, ASCIIToUTF16("popularsitewithpathonly.com/moo")); EXPECT_LT(ac_matches_[0].relevance, - HistoryURLProvider::kScoreForWhatYouTypedResult); - + HistoryURLProvider::kScoreForUnvisitedIntranetResult); // Check that if the user didn't quite enter the full hostname, this // page would've normally scored above the URL-what-you-typed match. @@ -576,7 +582,7 @@ TestURLInfo ordering_test_db[] = { 2}, {"http://store.steampowered.com/", "the steam summer camp sale", 6, 6, 1}, {"http://www.teamliquid.net/tlpd/korean/players", "tlpd - bw korean - player " - "index", 100, 45, 219}, + "index", 25, 7, 219}, {"http://slashdot.org/", "slashdot: news for nerds, stuff that matters", 3, 3, 6}, {"http://translate.google.com/", "google translate", 3, 3, 0}, diff --git a/chrome/browser/autocomplete/history_url_provider.cc b/chrome/browser/autocomplete/history_url_provider.cc index d59eb5f..9bd2e0b 100644 --- a/chrome/browser/autocomplete/history_url_provider.cc +++ b/chrome/browser/autocomplete/history_url_provider.cc @@ -323,8 +323,7 @@ HistoryURLProvider::HistoryURLProvider(AutocompleteProviderListener* listener, !OmniboxFieldTrial::InHUPCreateShorterMatchFieldTrial() || !OmniboxFieldTrial:: InHUPCreateShorterMatchFieldTrialExperimentGroup()), - search_url_database_( - !OmniboxFieldTrial::InHQPReplaceHUPScoringExperimentGroup()) { + search_url_database_(true) { } // static diff --git a/chrome/browser/history/history_database.h b/chrome/browser/history/history_database.h index d29d44b..eecdc87 100644 --- a/chrome/browser/history/history_database.h +++ b/chrome/browser/history/history_database.h @@ -27,6 +27,8 @@ namespace base { class FilePath; } +class HistoryQuickProviderTest; + namespace history { // Encapsulates the SQL connection for the history database. This class holds @@ -170,6 +172,7 @@ class HistoryDatabase : public DownloadDatabase, friend class AndroidProviderBackend; FRIEND_TEST_ALL_PREFIXES(AndroidURLsMigrationTest, MigrateToVersion22); #endif + friend class ::HistoryQuickProviderTest; friend class InMemoryURLIndexTest; FRIEND_TEST_ALL_PREFIXES(IconMappingMigrationTest, TestIconMappingMigration); diff --git a/chrome/browser/history/history_service.h b/chrome/browser/history/history_service.h index 9a672c5..2a0cb810 100644 --- a/chrome/browser/history/history_service.h +++ b/chrome/browser/history/history_service.h @@ -643,6 +643,7 @@ class HistoryService : public CancelableRequestProvider, friend class history::HistoryBackend; friend class history::HistoryQueryTest; friend class HistoryOperation; + friend class HistoryQuickProviderTest; friend class HistoryURLProvider; friend class HistoryURLProviderTest; friend class history::InMemoryURLIndexTest; diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc index cd1fbf0..8a2d1de 100644 --- a/chrome/browser/history/in_memory_url_index_unittest.cc +++ b/chrome/browser/history/in_memory_url_index_unittest.cc @@ -472,7 +472,7 @@ TEST_F(InMemoryURLIndexTest, Retrieval) { // Search which should result in nearly perfect result. matches = url_index_->HistoryItemsForTerms( - ASCIIToUTF16("https NearlyPerfectResult"), string16::npos); + ASCIIToUTF16("Nearly Perfect Result"), string16::npos); ASSERT_EQ(1U, matches.size()); // The results should have a very high score. EXPECT_GT(matches[0].raw_score, 900); @@ -570,8 +570,8 @@ TEST_F(InMemoryURLIndexTest, URLPrefixMatching) { ASSERT_EQ(1U, matches.size()); EXPECT_TRUE(matches[0].can_inline); - // "http://drudgere" - found, can inline - matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("http://drudgere"), + // "drudgere" - found, can inline + matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("drudgere"), string16::npos); ASSERT_EQ(1U, matches.size()); EXPECT_TRUE(matches[0].can_inline); @@ -593,8 +593,8 @@ TEST_F(InMemoryURLIndexTest, URLPrefixMatching) { ASSERT_EQ(1U, matches.size()); EXPECT_TRUE(matches[0].can_inline); - // "http://view.atdmt" - found, can inline - matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("http://view.atdmt"), + // "view.atdmt" - found, can inline + matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("view.atdmt"), string16::npos); ASSERT_EQ(1U, matches.size()); EXPECT_TRUE(matches[0].can_inline); @@ -612,14 +612,14 @@ TEST_F(InMemoryURLIndexTest, URLPrefixMatching) { ASSERT_EQ(1U, matches.size()); EXPECT_TRUE(matches[0].can_inline); - // "ww.cnn.com" - not found because we don't allow ww as a mid-term match + // "ww.cnn.com" - found because we allow mid-term matches in hostnames matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("ww.cnn.com"), string16::npos); - ASSERT_EQ(0U, matches.size()); + ASSERT_EQ(1U, matches.size()); - // "http://www.cnn.com" - found, can inline + // "www.cnn.com" - found, can inline matches = - url_index_->HistoryItemsForTerms(ASCIIToUTF16("http://www.cnn.com"), + url_index_->HistoryItemsForTerms(ASCIIToUTF16("www.cnn.com"), string16::npos); ASSERT_EQ(1U, matches.size()); EXPECT_TRUE(matches[0].can_inline); diff --git a/chrome/browser/history/scored_history_match.cc b/chrome/browser/history/scored_history_match.cc index 895aa1a..3563a9d 100644 --- a/chrome/browser/history/scored_history_match.cc +++ b/chrome/browser/history/scored_history_match.cc @@ -18,27 +18,14 @@ #include "chrome/browser/autocomplete/history_url_provider.h" #include "chrome/browser/autocomplete/url_prefix.h" #include "chrome/browser/bookmarks/bookmark_service.h" -#include "chrome/browser/omnibox/omnibox_field_trial.h" #include "chrome/common/chrome_switches.h" #include "content/public/browser/browser_thread.h" namespace history { -// The maximum score any candidate match can achieve. -const int kMaxTotalScore = 1425; - -// Score ranges used to get a 'base' score for each of the scoring factors -// (such as recency of last visit, times visited, times the URL was typed, -// and the quality of the string match). There is a matching value range for -// each of these scores for each factor. Note that the top score is greater -// than |kMaxTotalScore|. The score for each candidate will be capped in the -// final calculation. -const int kScoreRank[] = { 1450, 1200, 900, 400 }; - // ScoredHistoryMatch ---------------------------------------------------------- bool ScoredHistoryMatch::initialized_ = false; -bool ScoredHistoryMatch::use_new_scoring = false; const size_t ScoredHistoryMatch::kMaxVisitsToScore = 10u; bool ScoredHistoryMatch::also_do_hup_like_scoring = false; int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches = -1; @@ -47,7 +34,6 @@ ScoredHistoryMatch::ScoredHistoryMatch() : raw_score(0), can_inline(false) { if (!initialized_) { - InitializeNewScoringField(); InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField(); initialized_ = true; } @@ -65,7 +51,6 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, raw_score(0), can_inline(false) { if (!initialized_) { - InitializeNewScoringField(); InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField(); initialized_ = true; } @@ -158,66 +143,12 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, num_components_in_best_prefix); } - // Determine if the associated URLs is referenced by any bookmarks. - float bookmark_boost = - (bookmark_service && bookmark_service->IsBookmarked(gurl)) ? 10.0 : 0.0; - - if (use_new_scoring) { - const float topicality_score = GetTopicalityScore( - terms.size(), url, url_matches, title_matches, word_starts); - const float frecency_score = GetFrecency(now, visits); - raw_score = GetFinalRelevancyScore(topicality_score, frecency_score); - raw_score = - (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; - } else { // "old" scoring - // Get partial scores based on term matching. Note that the score for - // each of the URL and title are adjusted by the fraction of the - // terms appearing in each. - int url_score = - ScoreComponentForMatches(url_matches, word_starts.url_word_starts_, - url.length()) * - std::min(url_matches.size(), terms.size()) / terms.size(); - int title_score = - ScoreComponentForMatches(title_matches, word_starts.title_word_starts_, - title.length()) * - std::min(title_matches.size(), terms.size()) / terms.size(); - // Arbitrarily pick the best. - // TODO(mrossetti): It might make sense that a term which appears in both - // the URL and the Title should boost the score a bit. - int term_score = std::max(url_score, title_score); - if (term_score == 0) - return; - - // Determine scoring factors for the recency of visit, visit count and typed - // count attributes of the URLRow. - const int kDaysAgoLevel[] = { 1, 10, 20, 30 }; - int days_ago_value = ScoreForValue((base::Time::Now() - - row.last_visit()).InDays(), kDaysAgoLevel); - const int kVisitCountLevel[] = { 50, 30, 10, 5 }; - int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel); - const int kTypedCountLevel[] = { 50, 30, 10, 5 }; - int typed_count_value = ScoreForValue(row.typed_count() + bookmark_boost, - kTypedCountLevel); - - // The final raw score is calculated by: - // - multiplying each factor by a 'relevance' - // - calculating the average. - // Note that visit_count is reduced by typed_count because both are bumped - // when a typed URL is recorded thus giving visit_count too much weight. - const int kTermScoreRelevance = 4; - const int kDaysAgoRelevance = 2; - const int kVisitCountRelevance = 2; - const int kTypedCountRelevance = 5; - int effective_visit_count_value = - std::max(0, visit_count_value - typed_count_value); - raw_score = term_score * kTermScoreRelevance + - days_ago_value * kDaysAgoRelevance + - effective_visit_count_value * kVisitCountRelevance + - typed_count_value * kTypedCountRelevance; - raw_score /= (kTermScoreRelevance + kDaysAgoRelevance + - kVisitCountRelevance + kTypedCountRelevance); - raw_score = std::min(kMaxTotalScore, raw_score); - } + const float topicality_score = GetTopicalityScore( + terms.size(), url, url_matches, title_matches, word_starts); + const float frecency_score = GetFrecency(now, visits); + raw_score = GetFinalRelevancyScore(topicality_score, frecency_score); + raw_score = + (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; if (also_do_hup_like_scoring && can_inline) { // HistoryURL-provider-like scoring gives any match that is @@ -273,125 +204,6 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, ScoredHistoryMatch::~ScoredHistoryMatch() {} -// std::accumulate helper function to add up TermMatches' lengths as used in -// ScoreComponentForMatches -int AccumulateMatchLength(int total, const TermMatch& match) { - return total + match.length; -} - -// static -int ScoredHistoryMatch::ScoreComponentForMatches( - const TermMatches& provided_matches, - const WordStarts& word_starts, - size_t max_length) { - if (provided_matches.empty() || (max_length == 0)) - return 0; - - // The actual matches we'll use for matching. This is |provided_matches| - // with all the matches not at a word boundary removed. - TermMatches matches; - MakeTermMatchesOnlyAtWordBoundaries(provided_matches, word_starts, - &matches); - - if (matches.empty()) - return 0; - - // Score component for whether the input terms (if more than one) were found - // in the same order in the match. Start with kOrderMaxValue points divided - // equally among (number of terms - 1); then discount each of those terms that - // is out-of-order in the match. - const int kOrderMaxValue = 1000; - int order_value = kOrderMaxValue; - if (matches.size() > 1) { - int max_possible_out_of_order = matches.size() - 1; - int out_of_order = 0; - for (size_t i = 1; i < matches.size(); ++i) { - if (matches[i - 1].term_num > matches[i].term_num) - ++out_of_order; - } - order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / - max_possible_out_of_order; - } - - // Score component for how early in the match string the first search term - // appears. Start with kStartMaxValue points and discount by - // kStartMaxValue/kMaxSignificantChars points for each character later than - // the first at which the term begins. No points are earned if the start of - // the match occurs at or after kMaxSignificantChars. - const int kStartMaxValue = 1000; - int start_value = (kMaxSignificantChars - - std::min(kMaxSignificantChars, matches[0].offset)) * kStartMaxValue / - kMaxSignificantChars; - - // Score component for how much of the matched string the input terms cover. - // kCompleteMaxValue points times the fraction of the URL/page title string - // that was matched. - size_t term_length_total = std::accumulate(matches.begin(), matches.end(), - 0, AccumulateMatchLength); - const size_t kMaxSignificantLength = 50; - size_t max_significant_length = - std::min(max_length, std::max(term_length_total, kMaxSignificantLength)); - const int kCompleteMaxValue = 1000; - int complete_value = - term_length_total * kCompleteMaxValue / max_significant_length; - - const int kOrderRelevance = 1; - const int kStartRelevance = 6; - const int kCompleteRelevance = 3; - int raw_score = order_value * kOrderRelevance + - start_value * kStartRelevance + - complete_value * kCompleteRelevance; - raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance); - - // Scale the raw score into a single score component in the same manner as - // used in ScoredMatchForURL(). - const int kTermScoreLevel[] = { 1000, 750, 500, 200 }; - return ScoreForValue(raw_score, kTermScoreLevel); -} - -// static -void ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries( - const TermMatches& provided_matches, - const WordStarts& word_starts, - TermMatches* matches_at_word_boundaries) { - matches_at_word_boundaries->clear(); - // Resize it to an upper-bound estimate of the correct size. - matches_at_word_boundaries->reserve(provided_matches.size()); - WordStarts::const_iterator next_word_starts = word_starts.begin(); - for (TermMatches::const_iterator iter = provided_matches.begin(); - iter != provided_matches.end(); ++iter) { - // Advance next_word_starts until it's >= the position of the term - // we're considering. - while ((next_word_starts != word_starts.end()) && - (*next_word_starts < iter->offset)) { - ++next_word_starts; - } - if ((next_word_starts != word_starts.end()) && - (*next_word_starts == iter->offset)) { - // At word boundary: copy this element into |matches_at_word_boundaries|. - matches_at_word_boundaries->push_back(*iter); - } - } -} - -// static -int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) { - int i = 0; - int rank_count = arraysize(kScoreRank); - while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ? - (value > value_ranks[i]) : (value < value_ranks[i]))) - ++i; - if (i >= rank_count) - return 0; - int score = kScoreRank[i]; - if (i > 0) { - score += (value - value_ranks[i]) * - (kScoreRank[i - 1] - kScoreRank[i]) / - (value_ranks[i - 1] - value_ranks[i]); - } - return score; -} - // Comparison function for sorting ScoredMatches by their scores with // intelligent tie-breaking. bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, @@ -647,6 +459,8 @@ float ScoredHistoryMatch::GetFrecency(const base::Time& now, // how many visits there were in order to penalize a match that has // fewer visits than kMaxVisitsToScore. const int total_sampled_visits = std::min(visits.size(), kMaxVisitsToScore); + if (total_sampled_visits == 0) + return 0.0f; float summed_visit_points = 0; for (int i = 0; i < total_sampled_visits; ++i) { const int value_of_transition = @@ -699,24 +513,8 @@ float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0)); } -void ScoredHistoryMatch::InitializeNewScoringField() { - // For the field trial stuff to work correctly, we must be running - // on the same thread as the thread that created the field trial, - // which happens via a call to OmniboxFieldTrial::Active in - // chrome_browser_main.cc on the main thread. Let's check this to - // be sure. We check "if we've heard of the UI thread then we'd better - // be on it." The first part is necessary so unit tests pass. (Many - // unit tests don't set up the threading naming system; hence - // CurrentlyOn(UI thread) will fail.) - DCHECK(!content::BrowserThread::IsWellKnownThread( - content::BrowserThread::UI) || - content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); - use_new_scoring = OmniboxFieldTrial::InHQPNewScoringExperimentGroup(); -} - void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField() { - also_do_hup_like_scoring = - OmniboxFieldTrial::InHQPReplaceHUPScoringExperimentGroup(); + also_do_hup_like_scoring = false; // When doing HUP-like scoring, don't allow a non-inlineable match // to beat the score of good inlineable matches. This is a problem // because if a non-inlineable match ends up with the highest score diff --git a/chrome/browser/history/scored_history_match.h b/chrome/browser/history/scored_history_match.h index 7f5a463..46cfa87 100644 --- a/chrome/browser/history/scored_history_match.h +++ b/chrome/browser/history/scored_history_match.h @@ -43,51 +43,12 @@ struct ScoredHistoryMatch : public history::HistoryMatch { BookmarkService* bookmark_service); ~ScoredHistoryMatch(); - // Calculates a component score based on position, ordering, word - // boundaries, and total substring match size using metrics recorded - // in |matches| and |word_starts|. |max_length| is the length of - // the string against which the terms are being searched. - // |provided_matches| should already be sorted and de-duped, and - // |word_starts| must be sorted. - static int ScoreComponentForMatches(const TermMatches& provided_matches, - const WordStarts& word_starts, - size_t max_length); - - // Given a set of term matches |provided_matches| and word boundaries - // |word_starts|, fills in |matches_at_word_boundaries| with only the - // matches in |provided_matches| that are at word boundaries. - // |provided_matches| should already be sorted and de-duped, and - // |word_starts| must be sorted. - static void MakeTermMatchesOnlyAtWordBoundaries( - const TermMatches& provided_matches, - const WordStarts& word_starts, - TermMatches* matches_at_word_boundaries); - - // Converts a raw value for some particular scoring factor into a score - // component for that factor. The conversion function is piecewise linear, - // with input values provided in |value_ranks| and resulting output scores - // from |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]). A - // score cannot be higher than kScoreRank[0], and drops directly to 0 if - // lower than kScoreRank[3]. - // - // For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }. - // Because 70 falls between ranks 0 (100) and 1 (50), the score is given by - // the linear function: - // score = m * value + b, where - // m = (kScoreRank[0] - kScoreRank[1]) / (value_ranks[0] - value_ranks[1]) - // b = value_ranks[1] - // Any value higher than 100 would be scored as if it were 100, and any value - // lower than 10 scored 0. - static int ScoreForValue(int value, const int* value_ranks); - // Compares two matches by score. Functor supporting URLIndexPrivateData's // HistoryItemsForTerms function. Looks at particular fields within // with url_info to make tie-breaking a bit smarter. static bool MatchScoreGreater(const ScoredHistoryMatch& m1, const ScoredHistoryMatch& m2); - // Start of functions used only in "new" scoring ------------------------ - // Return a topicality score based on how many matches appear in the // |url| and the page's title and where they are (e.g., at word // boundaries). |url_matches| and |title_matches| provide details @@ -127,17 +88,11 @@ struct ScoredHistoryMatch : public history::HistoryMatch { float topicality_score, float frecency_score); - // Sets use_new_scoring based on command line flags and/or - // field trial state. - static void InitializeNewScoringField(); - // Sets also_do_hup_like_scoring and // max_assigned_score_for_non_inlineable_matches based on the field // trial state. static void InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField(); - // End of functions used only in "new" scoring -------------------------- - // An interim score taking into consideration location and completeness // of the match. int raw_score; @@ -168,14 +123,6 @@ struct ScoredHistoryMatch : public history::HistoryMatch { // Used so we initialize static variables only once (on first use). static bool initialized_; - // Whether to use new-scoring or old-scoring. Set in the - // constructor by examining command line flags and field trial - // state. Note that new-scoring has to do with a new version of the - // ordinary scoring done here. It has nothing to do with and no - // affect on HistoryURLProvider-like scoring that can happen in this - // class as well (see boolean below). - static bool use_new_scoring; - // The maximum number of recent visits to examine in GetFrecency(). static const size_t kMaxVisitsToScore; diff --git a/chrome/browser/history/scored_history_match_unittest.cc b/chrome/browser/history/scored_history_match_unittest.cc index 7466f5d..7c23ff3 100644 --- a/chrome/browser/history/scored_history_match_unittest.cc +++ b/chrome/browser/history/scored_history_match_unittest.cc @@ -12,6 +12,21 @@ namespace history { +// Returns a VisitInfoVector that includes |num_visits| spread over the +// last |frecency|*|num_visits| days (relative to |now|). A frequency of +// one means one visit each day, two means every other day, etc. +VisitInfoVector CreateVisitInfoVector(int num_visits, + int frequency, + base::Time now) { + VisitInfoVector visits; + for (int i = 0; i < num_visits; ++i) { + visits.push_back( + std::make_pair(now - base::TimeDelta::FromDays(i * frequency), + content::PAGE_TRANSITION_LINK)); + } + return visits; +} + class ScoredHistoryMatchTest : public testing::Test { protected: // Convenience function to create a URLRow with basic data for |url|, |title|, @@ -89,161 +104,73 @@ float ScoredHistoryMatchTest::GetTopicalityScoreOfTermAgainstURLAndTitle( 1, url, url_matches, title_matches, word_starts); } -TEST_F(ScoredHistoryMatchTest, MakeTermMatchesOnlyAtWordBoundaries) { - TermMatches matches, matches_at_word_boundaries; - WordStarts word_starts; - - // no matches but some word starts -> no matches at word boundary - matches.clear(); - word_starts.clear(); - word_starts.push_back(2); - word_starts.push_back(5); - word_starts.push_back(10); - ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries( - matches, word_starts, &matches_at_word_boundaries); - EXPECT_EQ(0u, matches_at_word_boundaries.size()); - - // matches but no word starts -> no matches at word boundary - matches.clear(); - matches.push_back(TermMatch(0, 1, 2)); // 2-character match at pos 1 - matches.push_back(TermMatch(0, 7, 2)); // 2-character match at pos 7 - word_starts.clear(); - ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries( - matches, word_starts, &matches_at_word_boundaries); - EXPECT_EQ(0u, matches_at_word_boundaries.size()); - - // matches and word starts don't overlap -> no matches at word boundary - matches.clear(); - matches.push_back(TermMatch(0, 1, 2)); // 2-character match at pos 1 - matches.push_back(TermMatch(0, 7, 2)); // 2-character match at pos 7 - word_starts.clear(); - word_starts.push_back(2); - word_starts.push_back(5); - word_starts.push_back(10); - ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries( - matches, word_starts, &matches_at_word_boundaries); - EXPECT_EQ(0u, matches_at_word_boundaries.size()); - - // some matches are at word boundary and some aren't - matches.clear(); - matches.push_back(TermMatch(0, 1, 2)); // 2-character match at pos 1 - matches.push_back(TermMatch(1, 6, 3)); // 3-character match at pos 6 - matches.push_back(TermMatch(0, 8, 2)); // 2-character match at pos 8 - matches.push_back(TermMatch(2, 15, 7)); // 7-character match at pos 15 - matches.push_back(TermMatch(1, 26, 3)); // 3-character match at pos 26 - word_starts.clear(); - word_starts.push_back(0); - word_starts.push_back(6); - word_starts.push_back(9); - word_starts.push_back(15); - word_starts.push_back(24); - ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries( - matches, word_starts, &matches_at_word_boundaries); - EXPECT_EQ(2u, matches_at_word_boundaries.size()); - EXPECT_EQ(1, matches_at_word_boundaries[0].term_num); - EXPECT_EQ(6u, matches_at_word_boundaries[0].offset); - EXPECT_EQ(3u, matches_at_word_boundaries[0].length); - EXPECT_EQ(2, matches_at_word_boundaries[1].term_num); - EXPECT_EQ(15u, matches_at_word_boundaries[1].offset); - EXPECT_EQ(7u, matches_at_word_boundaries[1].length); - - // all matches are at word boundary - matches.clear(); - matches.push_back(TermMatch(0, 2, 2)); // 2-character match at pos 2 - matches.push_back(TermMatch(1, 9, 3)); // 3-character match at pos 9 - word_starts.clear(); - word_starts.push_back(0); - word_starts.push_back(2); - word_starts.push_back(6); - word_starts.push_back(9); - word_starts.push_back(15); - ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries( - matches, word_starts, &matches_at_word_boundaries); - EXPECT_EQ(2u, matches_at_word_boundaries.size()); - EXPECT_EQ(0, matches_at_word_boundaries[0].term_num); - EXPECT_EQ(2u, matches_at_word_boundaries[0].offset); - EXPECT_EQ(2u, matches_at_word_boundaries[0].length); - EXPECT_EQ(1, matches_at_word_boundaries[1].term_num); - EXPECT_EQ(9u, matches_at_word_boundaries[1].offset); - EXPECT_EQ(3u, matches_at_word_boundaries[1].length); -} - TEST_F(ScoredHistoryMatchTest, Scoring) { - URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1)); - RowWordStarts word_starts_a; - PopulateWordStarts(row_a, &word_starts_a); // We use NowFromSystemTime() because MakeURLRow uses the same function // to calculate last visit time when building a row. base::Time now = base::Time::NowFromSystemTime(); - VisitInfoVector visits; - - // Test scores based on position. - ScoredHistoryMatch scored_a(row_a, visits, std::string(), - ASCIIToUTF16("abc"), Make1Term("abc"), - word_starts_a, now, NULL); - ScoredHistoryMatch scored_b(row_a, visits, std::string(), - ASCIIToUTF16("bcd"), Make1Term("bcd"), - word_starts_a, now, NULL); - EXPECT_GT(scored_a.raw_score, scored_b.raw_score); - - // Test scores based on length. - ScoredHistoryMatch scored_c(row_a, visits, std::string(), - ASCIIToUTF16("abcd"), Make1Term("abcd"), - word_starts_a, now, NULL); - EXPECT_LT(scored_a.raw_score, scored_c.raw_score); - // Test scores based on order. - ScoredHistoryMatch scored_d(row_a, visits, std::string(), - ASCIIToUTF16("abc bcd"), Make2Terms("abc", "bcd"), - word_starts_a, now, NULL); - ScoredHistoryMatch scored_e(row_a, visits, std::string(), - ASCIIToUTF16("bcd abc"), Make2Terms("bcd", "abc"), - word_starts_a, now, NULL); - EXPECT_GT(scored_d.raw_score, scored_e.raw_score); + URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1)); + RowWordStarts word_starts_a; + PopulateWordStarts(row_a, &word_starts_a); + VisitInfoVector visits_a = CreateVisitInfoVector(3, 30, now); + // Mark one visit as typed. + visits_a[0].second = content::PAGE_TRANSITION_TYPED; + ScoredHistoryMatch scored_a(row_a, visits_a, std::string(), + ASCIIToUTF16("abc"), Make1Term("abc"), + word_starts_a, now, NULL); // Test scores based on visit_count. URLRow row_b(MakeURLRow("http://abcdef", "abcd bcd", 10, 30, 1)); RowWordStarts word_starts_b; PopulateWordStarts(row_b, &word_starts_b); - ScoredHistoryMatch scored_f(row_b, visits, std::string(), + VisitInfoVector visits_b = CreateVisitInfoVector(10, 30, now); + visits_b[0].second = content::PAGE_TRANSITION_TYPED; + ScoredHistoryMatch scored_b(row_b, visits_b, std::string(), ASCIIToUTF16("abc"), Make1Term("abc"), word_starts_b, now, NULL); - EXPECT_GT(scored_f.raw_score, scored_a.raw_score); + EXPECT_GT(scored_b.raw_score, scored_a.raw_score); // Test scores based on last_visit. URLRow row_c(MakeURLRow("http://abcdef", "abcd bcd", 3, 10, 1)); RowWordStarts word_starts_c; PopulateWordStarts(row_c, &word_starts_c); - ScoredHistoryMatch scored_g(row_c, visits, std::string(), + VisitInfoVector visits_c = CreateVisitInfoVector(3, 10, now); + visits_c[0].second = content::PAGE_TRANSITION_TYPED; + ScoredHistoryMatch scored_c(row_c, visits_c, std::string(), ASCIIToUTF16("abc"), Make1Term("abc"), word_starts_c, now, NULL); - EXPECT_GT(scored_g.raw_score, scored_a.raw_score); + EXPECT_GT(scored_c.raw_score, scored_a.raw_score); // Test scores based on typed_count. - URLRow row_d(MakeURLRow("http://abcdef", "abcd bcd", 3, 30, 10)); + URLRow row_d(MakeURLRow("http://abcdef", "abcd bcd", 3, 30, 3)); RowWordStarts word_starts_d; PopulateWordStarts(row_d, &word_starts_d); - ScoredHistoryMatch scored_h(row_d, visits, std::string(), + VisitInfoVector visits_d = CreateVisitInfoVector(3, 30, now); + visits_d[0].second = content::PAGE_TRANSITION_TYPED; + visits_d[2].second = content::PAGE_TRANSITION_TYPED; + visits_d[3].second = content::PAGE_TRANSITION_TYPED; + ScoredHistoryMatch scored_d(row_d, visits_d, std::string(), ASCIIToUTF16("abc"), Make1Term("abc"), word_starts_d, now, NULL); - EXPECT_GT(scored_h.raw_score, scored_a.raw_score); + EXPECT_GT(scored_d.raw_score, scored_a.raw_score); // Test scores based on a terms appearing multiple times. URLRow row_e(MakeURLRow("http://csi.csi.csi/csi_csi", - "CSI Guide to CSI Las Vegas, CSI New York, CSI Provo", 3, 30, 10)); + "CSI Guide to CSI Las Vegas, CSI New York, CSI Provo", 3, 30, 3)); RowWordStarts word_starts_e; PopulateWordStarts(row_e, &word_starts_e); - ScoredHistoryMatch scored_i(row_e, visits, std::string(), + const VisitInfoVector visits_e = visits_d; + ScoredHistoryMatch scored_e(row_e, visits_e, std::string(), ASCIIToUTF16("csi"), Make1Term("csi"), word_starts_e, now, NULL); - EXPECT_LT(scored_i.raw_score, 1400); + EXPECT_LT(scored_e.raw_score, 1400); // Test that a result with only a mid-term match (i.e., not at a word // boundary) scores 0. - ScoredHistoryMatch scored_j(row_a, visits, std::string(), + ScoredHistoryMatch scored_f(row_a, visits_a, std::string(), ASCIIToUTF16("cd"), Make1Term("cd"), word_starts_a, now, NULL); - EXPECT_EQ(scored_j.raw_score, 0); + EXPECT_EQ(scored_f.raw_score, 0); } TEST_F(ScoredHistoryMatchTest, Inlining) { @@ -316,63 +243,6 @@ TEST_F(ScoredHistoryMatchTest, Inlining) { } } -class BookmarkServiceMock : public BookmarkService { - public: - explicit BookmarkServiceMock(const GURL& url); - virtual ~BookmarkServiceMock() {} - - // Returns true if the given |url| is the same as |url_|. - virtual bool IsBookmarked(const GURL& url) OVERRIDE; - - // Required but unused. - virtual void GetBookmarks(std::vector<URLAndTitle>* bookmarks) OVERRIDE {} - virtual void BlockTillLoaded() OVERRIDE {} - - private: - const GURL url_; - - DISALLOW_COPY_AND_ASSIGN(BookmarkServiceMock); -}; - -BookmarkServiceMock::BookmarkServiceMock(const GURL& url) - : BookmarkService(), - url_(url) { -} - -bool BookmarkServiceMock::IsBookmarked(const GURL& url) { - return url == url_; -} - -TEST_F(ScoredHistoryMatchTest, ScoringWithBookmarks) { - const GURL url("http://www.nanny.org"); - BookmarkServiceMock bookmark_model_mock(url); - URLRow row_a(MakeURLRow("http://www.nanny.org", "Nanny", 3, 30, 1)); - RowWordStarts word_starts_a; - PopulateWordStarts(row_a, &word_starts_a); - // We use NowFromSystemTime() because MakeURLRow uses the same function - // to calculate last visit time when building a row. - base::Time now = base::Time::NowFromSystemTime(); - VisitInfoVector visits; - - // Identical queries but the first should be boosted by having a bookmark. - ScoredHistoryMatch scored_a(row_a, visits, std::string(), - ASCIIToUTF16("nanny"), Make1Term("nanny"), - word_starts_a, now, &bookmark_model_mock); - ScoredHistoryMatch scored_b(row_a, visits, std::string(), - ASCIIToUTF16("nanny"), Make1Term("nanny"), - word_starts_a, now, NULL); - EXPECT_GT(scored_a.raw_score, scored_b.raw_score); - - // Identical queries, neither should be boosted by having a bookmark. - ScoredHistoryMatch scored_c(row_a, visits, std::string(), - ASCIIToUTF16("stick"), Make1Term("stick"), - word_starts_a, now, &bookmark_model_mock); - ScoredHistoryMatch scored_d(row_a, visits, std::string(), - ASCIIToUTF16("stick"), Make1Term("stick"), - word_starts_a, now, NULL); - EXPECT_EQ(scored_c.raw_score, scored_d.raw_score); -} - TEST_F(ScoredHistoryMatchTest, GetTopicalityScoreTrailingSlash) { const float hostname = GetTopicalityScoreOfTermAgainstURLAndTitle( ASCIIToUTF16("def"), diff --git a/chrome/browser/omnibox/omnibox_field_trial.cc b/chrome/browser/omnibox/omnibox_field_trial.cc index 4568264..4ec2c5c 100644 --- a/chrome/browser/omnibox/omnibox_field_trial.cc +++ b/chrome/browser/omnibox/omnibox_field_trial.cc @@ -18,8 +18,6 @@ namespace { // Field trial names. const char kDisallowInlineHQPFieldTrialName[] = "OmniboxDisallowInlineHQP"; -const char kHQPReplaceHUPAndNewScoringFieldTrialName[] = - "OmniboxReplaceHUPAndNewScoring"; const char kHUPCullRedirectsFieldTrialName[] = "OmniboxHUPCullRedirects"; const char kHUPCreateShorterMatchFieldTrialName[] = "OmniboxHUPCreateShorterMatch"; @@ -193,18 +191,6 @@ void OmniboxFieldTrial::GetActiveSuggestFieldTrialHashes( } } -bool OmniboxFieldTrial::InHQPNewScoringExperimentGroup() { - return base::FieldTrialList::FindFullName( - kHQPReplaceHUPAndNewScoringFieldTrialName).find( - "NewScoring") != std::string::npos; -} - -bool OmniboxFieldTrial::InHQPReplaceHUPScoringExperimentGroup() { - return base::FieldTrialList::FindFullName( - kHQPReplaceHUPAndNewScoringFieldTrialName).find( - "HQPReplaceHUP") != std::string::npos; -} - bool OmniboxFieldTrial::InHUPCullRedirectsFieldTrial() { return base::FieldTrialList::TrialExists(kHUPCullRedirectsFieldTrialName); } diff --git a/chrome/browser/omnibox/omnibox_field_trial.h b/chrome/browser/omnibox/omnibox_field_trial.h index 6fe5149..e6f0345 100644 --- a/chrome/browser/omnibox/omnibox_field_trial.h +++ b/chrome/browser/omnibox/omnibox_field_trial.h @@ -55,24 +55,6 @@ class OmniboxFieldTrial { std::vector<uint32>* field_trial_hash); // --------------------------------------------------------- - // For the HistoryQuick provider field trial that combines replacing - // the HistoryURL provider and turning on "new scoring" in HistoryQuick - // provider. - - // Returns whether the user should get "new scoring" in HistoryQuick - // provider or the default scoring. "New scoring" is based on the - // frequency of recent visits to the URL, a.k.a. "frecency" - // scoring). - static bool InHQPNewScoringExperimentGroup(); - - // Returns whether the user experiment the replace HUP behavior or - // the default behavior. The experiment group simultaneously - // disables HistoryURL provider from searching the URL database and - // directs HistoryQuick provider to calculate both HUP-style and - // HQP-style scores for matches, then return whichever is larger. - static bool InHQPReplaceHUPScoringExperimentGroup(); - - // --------------------------------------------------------- // For the HistoryURL provider disable culling redirects field trial. // Returns whether the user is in any group for this field trial. |