diff options
author | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-20 04:24:55 +0000 |
---|---|---|
committer | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-20 04:24:55 +0000 |
commit | c99037d1bad96b729e015a84ca19e5a144689ccd (patch) | |
tree | 90fde2ca30a50cd28b60e8368e4a4ca94650e32f /chrome | |
parent | 456c8047e4c8cea12174f649e209798e11d83781 (diff) | |
download | chromium_src-c99037d1bad96b729e015a84ca19e5a144689ccd.zip chromium_src-c99037d1bad96b729e015a84ca19e5a144689ccd.tar.gz chromium_src-c99037d1bad96b729e015a84ca19e5a144689ccd.tar.bz2 |
Multi-highlight and scoring improvements.
Incorporate multi-offset highlighting into HQP. Update results scoring. Switch to integer scoring operations from float.
BUG=79271,78153
TEST=Added unit tests. Updated unit tests.
Review URL: http://codereview.chromium.org/6859005
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@82237 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r-- | chrome/app/generated_resources.grd | 6 | ||||
-rw-r--r-- | chrome/browser/autocomplete/autocomplete.cc | 4 | ||||
-rw-r--r-- | chrome/browser/autocomplete/history_quick_provider.cc | 59 | ||||
-rw-r--r-- | chrome/browser/autocomplete/history_quick_provider.h | 4 | ||||
-rw-r--r-- | chrome/browser/autocomplete/history_quick_provider_unittest.cc | 26 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index.cc | 199 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index.h | 15 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index_unittest.cc | 45 | ||||
-rw-r--r-- | chrome/test/data/History/url_history_provider_test.db.txt | 53 |
9 files changed, 273 insertions, 138 deletions
diff --git a/chrome/app/generated_resources.grd b/chrome/app/generated_resources.grd index fddb05c..2d44c7d 100644 --- a/chrome/app/generated_resources.grd +++ b/chrome/app/generated_resources.grd @@ -4189,6 +4189,12 @@ Keep your key file in a safe place. You will need it to create new versions of y <message name="IDS_FLAGS_DISABLE_INTERACTIVE_FORM_VALIDATION_DESCRIPTION" desc="Description of the 'Disable HTML5 interactive form validation' lab."> Disable showing validation messages and preventing form submission. </message> + <message name="IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER" desc="Name of the 'Enable better visit history matching in the omnibox' lab."> + Better omnibox history matching + </message> + <message name="IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER_DESCRIPTION" desc="Description of the 'Enable better visit history matching in the omnibox' lab."> + Enables substring and multi-fragment matching within URLs from history. + </message> <message name="IDS_FLAGS_NEW_TAB_PAGE_4_NAME" desc="Name of the new tab page 4 lab."> Experimental new tab page </message> diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc index c022f96..f072c0c 100644 --- a/chrome/browser/autocomplete/autocomplete.cc +++ b/chrome/browser/autocomplete/autocomplete.cc @@ -788,7 +788,9 @@ AutocompleteController::AutocompleteController( in_start_(false) { search_provider_ = new SearchProvider(this, profile); providers_.push_back(search_provider_); - if (!CommandLine::ForCurrentProcess()->HasSwitch( + if (CommandLine::ForCurrentProcess()->HasSwitch( + switches::kEnableHistoryQuickProvider) && + !CommandLine::ForCurrentProcess()->HasSwitch( switches::kDisableHistoryQuickProvider)) providers_.push_back(new HistoryQuickProvider(this, profile)); if (!CommandLine::ForCurrentProcess()->HasSwitch( diff --git a/chrome/browser/autocomplete/history_quick_provider.cc b/chrome/browser/autocomplete/history_quick_provider.cc index 8a7046a..8e31343 100644 --- a/chrome/browser/autocomplete/history_quick_provider.cc +++ b/chrome/browser/autocomplete/history_quick_provider.cc @@ -4,6 +4,8 @@ #include "chrome/browser/autocomplete/history_quick_provider.h" +#include <vector> + #include "base/basictypes.h" #include "base/i18n/break_iterator.h" #include "base/logging.h" @@ -94,13 +96,20 @@ void HistoryQuickProvider::DoAutocomplete() { if (matches.empty()) return; + // Artificially reduce the score of high-scoring results which should not be + // inline autocompletd. Each such result gets the next available + // |next_dont_inline_score|. Upon use of next_dont_inline_score it is + // decremented. + int next_dont_inline_score = 1199; size_t match_num = matches.size() - 1; for (ScoredHistoryMatches::const_iterator match_iter = matches.begin(); match_iter != matches.end(); ++match_iter, --match_num) { const ScoredHistoryMatch& history_match(*match_iter); if (history_match.raw_score > 0) { AutocompleteMatch ac_match = - QuickMatchToACMatch(history_match, match_num); + QuickMatchToACMatch(history_match, match_num, + autocomplete_input_.prevent_inline_autocomplete(), + &next_dont_inline_score); matches_.push_back(ac_match); } } @@ -108,35 +117,57 @@ void HistoryQuickProvider::DoAutocomplete() { AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch( const ScoredHistoryMatch& history_match, - size_t match_number) { + size_t match_number, + bool prevent_inline_autocomplete, + int* next_dont_inline_score) { + DCHECK(next_dont_inline_score); const history::URLRow& info = history_match.url_info; int score = CalculateRelevance(history_match.raw_score, autocomplete_input_.type(), NORMAL, match_number); + + // Discount a very high score when a) a match doesn't start at the beginning + // of the URL, or b) there are more than one substring matches in the URL, or + // c) the type of request does not allow inline autocompletion. This prevents + // the URL from being offered as an inline completion. + const int kMaxDontInlineScore = 1199; + if (score > kMaxDontInlineScore && + (prevent_inline_autocomplete || history_match.url_matches.size() > 1 || + history_match.url_matches[0].offset > 0)) { + score = std::min(*next_dont_inline_score, score); + --*next_dont_inline_score; + } + AutocompleteMatch match(this, score, !!info.visit_count(), history_match.url_matches.empty() ? AutocompleteMatch::HISTORY_URL : AutocompleteMatch::HISTORY_TITLE); + match.destination_url = info.url(); DCHECK(match.destination_url.is_valid()); + + // Format the fill_into_edit and determine its offset. size_t inline_autocomplete_offset = history_match.input_location + autocomplete_input_.text().length(); - const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll; match.fill_into_edit = AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(), - net::FormatUrl(info.url(), languages_, format_types, - UnescapeRule::SPACES, NULL, NULL, - &inline_autocomplete_offset)); + net::FormatUrl(info.url(), languages_, net::kFormatUrlOmitAll, + UnescapeRule::SPACES, NULL, NULL, &inline_autocomplete_offset)); if (!autocomplete_input_.prevent_inline_autocomplete()) match.inline_autocomplete_offset = inline_autocomplete_offset; DCHECK((match.inline_autocomplete_offset == string16::npos) || (match.inline_autocomplete_offset <= match.fill_into_edit.length())); // Format the URL autocomplete presentation. - match.contents = net::FormatUrl(info.url(), languages_, format_types, - UnescapeRule::SPACES, NULL, NULL, NULL); - match.contents_class = SpansFromTermMatch(history_match.url_matches, - match.contents.size()); + std::vector<size_t> offsets = + InMemoryURLIndex::OffsetsFromTermMatches(history_match.url_matches); + match.contents = + net::FormatUrlWithOffsets(info.url(), languages_, net::kFormatUrlOmitAll, + UnescapeRule::SPACES, NULL, NULL, &offsets); + history::TermMatches new_matches = + InMemoryURLIndex::ReplaceOffsetsInTermMatches(history_match.url_matches, + offsets); + match.contents_class = SpansFromTermMatch(new_matches, match.contents.size()); // Format the description autocomplete presentation. match.description = info.title(); @@ -177,7 +208,7 @@ int HistoryQuickProvider::CalculateRelevance(int raw_score, return 1200; default: - return 400 + raw_score; + return raw_score; } } @@ -196,12 +227,6 @@ ACMatchClassifications HistoryQuickProvider::SpansFromTermMatch( size_t match_count = matches.size(); for (size_t i = 0; i < match_count;) { size_t offset = matches[i].offset; - // TODO(mrossetti): Remove the following 'if' when http://crbug.com/77210 - // has been properly fixed. This guards against trying to highlight - // substrings which fall off the end of the string as a result of having - // encoded characters in the string. - if (offset >= text_length) - return spans; spans.push_back(ACMatchClassification(offset, ACMatchClassification::MATCH)); // Skip all adjacent matches. diff --git a/chrome/browser/autocomplete/history_quick_provider.h b/chrome/browser/autocomplete/history_quick_provider.h index 3b0277a..996e4be 100644 --- a/chrome/browser/autocomplete/history_quick_provider.h +++ b/chrome/browser/autocomplete/history_quick_provider.h @@ -46,7 +46,9 @@ class HistoryQuickProvider : public HistoryProvider { AutocompleteMatch QuickMatchToACMatch( const history::ScoredHistoryMatch& history_match, - size_t match_number); + size_t match_number, + bool prevent_inline_autocomplete, + int* next_dont_inline_score); // Determines the relevance for some input, given its type and which match it // is. If |match_type| is NORMAL, |match_number| is a number diff --git a/chrome/browser/autocomplete/history_quick_provider_unittest.cc b/chrome/browser/autocomplete/history_quick_provider_unittest.cc index 4ecf7f1..985340d 100644 --- a/chrome/browser/autocomplete/history_quick_provider_unittest.cc +++ b/chrome/browser/autocomplete/history_quick_provider_unittest.cc @@ -43,7 +43,7 @@ struct TestURLInfo { {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2, 0}, {"http://news.google.com/", "Google News", 1, 1, 0}, {"http://foo.com/", "Dir", 5, 5, 0}, - {"http://foo.com/dir/", "Dir", 2, 2, 0}, + {"http://foo.com/dir/", "Dir", 2, 1, 10}, {"http://foo.com/dir/another/", "Dir", 5, 1, 0}, {"http://foo.com/dir/another/again/", "Dir", 10, 0, 0}, {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2, 0}, @@ -62,12 +62,12 @@ struct TestURLInfo { {"http://daysagoest.com/x/c", "DC", 1, 1, 2}, {"http://daysagoest.com/x/d", "DD", 1, 1, 3}, {"http://daysagoest.com/y/e", "DE", 1, 1, 4}, - {"http://abcdefghixyzjklmnopqrstuvw.com/a", "", 1, 1, 0}, + {"http://abcdefghixyzjklmnopqrstuvw.com/a", "", 3, 1, 0}, {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2, 0}, - {"http://abcdefghijklxyzmnopqrstuvw.com/a", "", 1, 1, 0}, - {"http://abcdefxyzghijklmnopqrstuvw.com/a", "", 1, 1, 0}, - {"http://abcxyzdefghijklmnopqrstuvw.com/a", "", 1, 1, 0}, - {"http://xyzabcdefghijklmnopqrstuvw.com/a", "", 1, 1, 0}, + {"http://abcdefghijklxyzmnopqrstuvw.com/a", "", 3, 1, 0}, + {"http://abcdefxyzghijklmnopqrstuvw.com/a", "", 3, 1, 0}, + {"http://abcxyzdefghijklmnopqrstuvw.com/a", "", 3, 1, 0}, + {"http://xyzabcdefghijklmnopqrstuvw.com/a", "", 3, 1, 0}, {"http://cda.com/Dogs%20Cats%20Gorillas%20Sea%20Slugs%20and%20Mice", "Dogs & Cats & Mice", 1, 1, 0}, }; @@ -220,11 +220,11 @@ TEST_F(HistoryQuickProviderTest, SimpleSingleMatch) { TEST_F(HistoryQuickProviderTest, MultiMatch) { string16 text(ASCIIToUTF16("foo")); std::vector<std::string> expected_urls; - // Scores high because of completion length. + // Scores high because of typed_count. expected_urls.push_back("http://foo.com/"); // Scores high because of visit count. - expected_urls.push_back("http://foo.com/dir/another/again/"); - // Scores high because of visit count but less match span. + expected_urls.push_back("http://foo.com/dir/another/"); + // Scores high because of high visit count. expected_urls.push_back("http://foo.com/dir/another/again/myfile.html"); RunTest(text, expected_urls, "http://foo.com/"); } @@ -278,12 +278,12 @@ TEST_F(HistoryQuickProviderTest, EncodingLimitMatch) { const size_t max_offset = url.size() - ((6 * 2) + 7); for (ACMatchClassifications::const_iterator citer = content.begin(); citer != content.end(); ++citer) - EXPECT_GT(max_offset, citer->offset); + EXPECT_LT(citer->offset, max_offset); ACMatchClassifications description(ac_matches_[0].description_class); std::string page_title("Dogs & Cats & Mice"); - for (ACMatchClassifications::const_iterator diter = content.begin(); - diter != content.end(); ++diter) - EXPECT_GT(page_title.size(), diter->offset); + for (ACMatchClassifications::const_iterator diter = description.begin(); + diter != description.end(); ++diter) + EXPECT_LT(diter->offset, page_title.size()); } TEST_F(HistoryQuickProviderTest, Spans) { diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc index 67546a2..9085853 100644 --- a/chrome/browser/history/in_memory_url_index.cc +++ b/chrome/browser/history/in_memory_url_index.cc @@ -49,20 +49,11 @@ typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; -// Scoring constants. -const float kOrderMaxValue = 50.0; -const float kStartMaxValue = 50.0; -const size_t kMaxSignificantStart = 20; -const float kCompleteMaxValue = 100.0; -const float kLastVisitMaxValue = 200.0; -const base::TimeDelta kMaxSignificantDay = base::TimeDelta::FromDays(30); -const float kVisitCountMaxValue = 100.0; -const int kMaxSignificantVisits = 20; -const float kTypedCountMaxValue = 300.0; -const int kMaxSignificantTyped = 20; -const float kMaxRawScore = kOrderMaxValue + kStartMaxValue + kCompleteMaxValue + - kLastVisitMaxValue + kVisitCountMaxValue + kTypedCountMaxValue; -const float kMaxNormalizedRawScore = 1000.0; +// Score ranges used to get a 'base' score for each of the scoring factors +// (such as recency of last visit, times visited, times the URL was typed, +// and the quality of the string match). There is a matching value range for +// each of these scores for each factor. +const int kScoreRank[] = { 1425, 1200, 900, 400 }; ScoredHistoryMatch::ScoredHistoryMatch() : raw_score(0), @@ -111,6 +102,38 @@ int AccumulateMatchLength(int total, const TermMatch& match) { return total + match.length; } +// Converts a raw value for some particular scoring factor into a score +// component for that factor. The conversion function is piecewise linear, with +// input values provided in |value_ranks| and resulting output scores from +// |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]). A score +// cannot be higher than kScoreRank[0], and drops directly to 0 if lower than +// kScoreRank[3]. +// +// For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }. +// Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the +// linear function: +// score = m * value + b, where +// m = (kScoreRank[0] - kScoreRank[1]) / (value_ranks[0] - value_ranks[1]) +// b = value_ranks[1] +// Any value higher than 100 would be scored as if it were 100, and any value +// lower than 10 scored 0. +int ScoreForValue(int value, const int* value_ranks) { + int i = 0; + int rank_count = arraysize(kScoreRank); + while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ? + (value > value_ranks[i]) : (value < value_ranks[i]))) + ++i; + if (i >= rank_count) + return 0; + int score = kScoreRank[i]; + if (i > 0) { + score += (value - value_ranks[i]) * + (kScoreRank[i - 1] - kScoreRank[i]) / + (value_ranks[i - 1] - value_ranks[i]); + } + return score; +} + InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) : history_dir_(history_dir), history_item_count_(0) { @@ -342,6 +365,7 @@ ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( // substring match, inserting those which pass in order by score. scored_items = std::for_each(history_id_set.begin(), history_id_set.end(), AddHistoryMatch(*this, lower_terms)).ScoredMatches(); + // Select and sort only the top kMaxMatches results. if (scored_items.size() > AutocompleteProvider::kMaxMatches) { std::partial_sort(scored_items.begin(), @@ -648,6 +672,33 @@ TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) { } // static +std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches( + const TermMatches& matches) { + std::vector<size_t> offsets; + for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) + offsets.push_back(i->offset); + return offsets; +} + +// static +TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches( + const TermMatches& matches, + const std::vector<size_t>& offsets) { + DCHECK_EQ(matches.size(), offsets.size()); + TermMatches new_matches; + std::vector<size_t>::const_iterator offset_iter = offsets.begin(); + for (TermMatches::const_iterator term_iter = matches.begin(); + term_iter != matches.end(); ++term_iter, ++offset_iter) { + if (*offset_iter != string16::npos) { + TermMatch new_match(*term_iter); + new_match.offset = *offset_iter; + new_matches.push_back(new_match); + } + } + return new_matches; +} + +// static ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( const URLRow& row, const String16Vector& terms) { @@ -688,52 +739,61 @@ ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( // Get partial scores based on term matching. Note that the score for // each of the URL and title are adjusted by the fraction of the // terms appearing in each. - int url_score = RawScoreForMatches(match.url_matches, url.size()) * + int url_score = ScoreComponentForMatches(match.url_matches, url.size()) * match.url_matches.size() / terms.size(); - int title_score = RawScoreForMatches(match.title_matches, title.size()) * - match.title_matches.size() / terms.size(); + int title_score = + ScoreComponentForMatches(match.title_matches, title.size()) * + static_cast<int>(match.title_matches.size()) / + static_cast<int>(terms.size()); // Arbitrarily pick the best. // TODO(mrossetti): It might make sense that a term which appears in both the // URL and the Title should boost the score a bit. int term_score = std::max(url_score, title_score); + if (term_score == 0) + return match; + + // Factor in recency of visit, visit count and typed count attributes of the + // URLRow. + const int kDaysAgoLevel[] = { 0, 10, 20, 30 }; + int score = ScoreForValue((base::Time::Now() - + row.last_visit()).InDays(), kDaysAgoLevel); + const int kVisitCountLevel[] = { 30, 10, 5, 3 }; + int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel); + const int kTypedCountLevel[] = { 10, 5, 3, 1 }; + int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel); + + // Determine how many of the factors comprising the final score are + // significant by summing the relative factors for each and subtracting how + // many will be 'discarded' even if they are low. + const int kVisitCountMultiplier = 2; + const int kTypedCountMultiplier = 3; + const int kSignificantFactors = + kVisitCountMultiplier + // Visit count factor plus + kTypedCountMultiplier + // typed count factor plus + 2 - // one each for string match and last visit + 2; // minus 2 insignificant factors. + // The following, in effect, discards up to |kSignificantFactors| low scoring + // elements which contribute little to the score but which can inordinately + // drag down an otherwise good score. + match.raw_score = std::min(kScoreRank[0], (term_score + score + + (visit_count_value * kVisitCountMultiplier) + (typed_count_value * + kTypedCountMultiplier)) / kSignificantFactors); - // Factor in attributes of the URLRow. - // Items which have been visited recently score higher. - int64 delta_time = (kMaxSignificantDay - - std::min((base::Time::Now() - row.last_visit()), - kMaxSignificantDay)).ToInternalValue(); - float last_visit_value = - (static_cast<float>(delta_time) / - static_cast<float>(kMaxSignificantDay.ToInternalValue())) * - kLastVisitMaxValue; - - float visit_count_value = - (static_cast<float>(std::min(row.visit_count(), - kMaxSignificantVisits))) / static_cast<float>(kMaxSignificantVisits) * - kVisitCountMaxValue; - - float typed_count_value = - (static_cast<float>(std::min(row.typed_count(), - kMaxSignificantTyped))) / static_cast<float>(kMaxSignificantTyped) * - kTypedCountMaxValue; - - float raw_score = term_score + last_visit_value + visit_count_value + - typed_count_value; - - // Normalize the score. - match.raw_score = static_cast<int>((raw_score / kMaxRawScore) * - kMaxNormalizedRawScore); return match; } -int InMemoryURLIndex::RawScoreForMatches(const TermMatches& matches, - size_t max_length) { +int InMemoryURLIndex::ScoreComponentForMatches(const TermMatches& matches, + size_t max_length) { // TODO(mrossetti): This is good enough for now but must be fine-tuned. if (matches.empty()) return 0; - // Search terms appearing in order score higher. - float order_value = 10.0; + // Score component for whether the input terms (if more than one) were found + // in the same order in the match. Start with kOrderMaxValue points divided + // equally among (number of terms - 1); then discount each of those terms that + // is out-of-order in the match. + const int kOrderMaxValue = 250; + int order_value = kOrderMaxValue; if (matches.size() > 1) { int max_possible_out_of_order = matches.size() - 1; int out_of_order = 0; @@ -741,33 +801,42 @@ int InMemoryURLIndex::RawScoreForMatches(const TermMatches& matches, if (matches[i - 1].term_num > matches[i].term_num) ++out_of_order; } - order_value = - (static_cast<float>(max_possible_out_of_order - out_of_order) / - max_possible_out_of_order) * kOrderMaxValue; + order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / + max_possible_out_of_order; } - // Search terms which appear earlier score higher. - // No points if the first search term does not appear in the first - // kMaxSignificantStart chars. - float start_value = - (static_cast<float>(kMaxSignificantStart - - std::min(kMaxSignificantStart, matches[0].offset))) / - static_cast<float>(kMaxSignificantStart) * kStartMaxValue; - - // Search terms which comprise a greater portion score higher. + // Score component for how early in the match string the first search term + // appears. Start with kStartMaxValue points and discount by + // 1/kMaxSignificantStart points for each character later than the first at + // which the term begins. No points are earned if the start of the match + // occurs at or after kMaxSignificantStart. + const size_t kMaxSignificantStart = 20; + const int kStartMaxValue = 250; + int start_value = (kMaxSignificantStart - + std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue / + kMaxSignificantStart; + + // Score component for how much of the matched string the input terms cover. + // kCompleteMaxValue points times the fraction of the URL/page title string + // that was matched. size_t term_length_total = std::accumulate(matches.begin(), matches.end(), 0, AccumulateMatchLength); - float complete_value = - (static_cast<float>(term_length_total) / static_cast<float>(max_length)) * - kStartMaxValue; - return static_cast<int>(order_value + start_value + complete_value); + const int kCompleteMaxValue = 500; + int complete_value = term_length_total * kCompleteMaxValue / max_length; + + int raw_score = order_value + start_value + complete_value; + const int kTermScoreLevel[] = { 1000, 650, 500, 200 }; + + // Scale the sum of the three components above into a single score component + // on the same scale as that used in ScoredMatchForURL(). + return ScoreForValue(raw_score, kTermScoreLevel); } InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( const InMemoryURLIndex& index, const String16Vector& lower_terms) - : index_(index), - lower_terms_(lower_terms) { + : index_(index), + lower_terms_(lower_terms) { } InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h index e50b07f..cf02970 100644 --- a/chrome/browser/history/in_memory_url_index.h +++ b/chrome/browser/history/in_memory_url_index.h @@ -165,6 +165,15 @@ class InMemoryURLIndex { static String16Vector WordVectorFromString16(const string16& uni_string, bool break_on_space); + // Extract and return the offsets from |matches|. + static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches); + + // Replace the offsets in |matches| with those given in |offsets|, deleting + // any which are npos, and return the updated list of matches. + static TermMatches ReplaceOffsetsInTermMatches( + const TermMatches& matches, + const std::vector<size_t>& offsets); + private: friend class AddHistoryMatch; FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); @@ -329,11 +338,11 @@ class InMemoryURLIndex { const URLRow& row, const String16Vector& terms_vector); - // Calculates a partial raw score based on position, ordering and total + // Calculates a component score based on position, ordering and total // substring match size using metrics recorded in |matches|. |max_length| // is the length of the string against which the terms are being searched. - static int RawScoreForMatches(const TermMatches& matches, - size_t max_length); + static int ScoreComponentForMatches(const TermMatches& matches, + size_t max_length); // Sorts and removes overlapping substring matches from |matches| and // returns the cleaned up matches. diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc index dee196c..37e64fb 100644 --- a/chrome/browser/history/in_memory_url_index_unittest.cc +++ b/chrome/browser/history/in_memory_url_index_unittest.cc @@ -204,7 +204,7 @@ TEST_F(InMemoryURLIndexTest, Retrieval) { matches = url_index_->HistoryItemsForTerms(terms); ASSERT_EQ(2U, matches.size()); // The results should be in descending score order. - EXPECT_GT(matches[0].raw_score, matches[1].raw_score); + EXPECT_GE(matches[0].raw_score, matches[1].raw_score); // Search which should result in nearly perfect result. terms.clear(); @@ -228,7 +228,7 @@ TEST_F(InMemoryURLIndexTest, Retrieval) { matches = url_index_->HistoryItemsForTerms(terms); ASSERT_EQ(1U, matches.size()); // The results should have a poor score. - EXPECT_LT(matches[0].raw_score, 200); + EXPECT_LT(matches[0].raw_score, 500); EXPECT_EQ(33, matches[0].url_info.id()); EXPECT_EQ("http://quiteuselesssearchresultxyz.com/", matches[0].url_info.url().spec()); // Note: URL gets lowercased. @@ -263,7 +263,7 @@ TEST_F(InMemoryURLIndexTest, TitleSearch) { url_index_.reset(new InMemoryURLIndex()); url_index_->Init(this, "en,ja,hi,zh"); // Signal if someone has changed the test DB. - EXPECT_EQ(28U, url_index_->history_info_map_.size()); + EXPECT_EQ(25U, url_index_->history_info_map_.size()); InMemoryURLIndex::String16Vector terms; // Ensure title is being searched. @@ -367,6 +367,31 @@ TEST_F(InMemoryURLIndexTest, StaticFunctions) { EXPECT_EQ(expected_offsets[i], matches_c[i].offset); } +TEST_F(InMemoryURLIndexTest, OffsetsAndTermMatches) { + // Test OffsetsFromTermMatches + history::TermMatches matches_a; + matches_a.push_back(history::TermMatch(1, 1, 2)); + matches_a.push_back(history::TermMatch(2, 4, 3)); + matches_a.push_back(history::TermMatch(3, 9, 1)); + matches_a.push_back(history::TermMatch(3, 10, 1)); + matches_a.push_back(history::TermMatch(4, 14, 5)); + std::vector<size_t> offsets = + InMemoryURLIndex::OffsetsFromTermMatches(matches_a); + const size_t expected_offsets_a[] = {1, 4, 9, 10, 14}; + ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a)); + for (size_t i = 0; i < offsets.size(); ++i) + EXPECT_EQ(expected_offsets_a[i], offsets[i]); + + // Test ReplaceOffsetsInTermMatches + offsets[2] = string16::npos; + history::TermMatches matches_b = + InMemoryURLIndex::ReplaceOffsetsInTermMatches(matches_a, offsets); + const size_t expected_offsets_b[] = {1, 4, 10, 14}; + ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size()); + for (size_t i = 0; i < matches_b.size(); ++i) + EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset); +} + TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) { // Verify that match results for previously typed characters are retained // (in the term_char_word_set_cache_) and reused, if possible, in future @@ -416,7 +441,7 @@ TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) { } TEST_F(InMemoryURLIndexTest, Scoring) { - URLRow row_a(MakeURLRow("http://abcdef", "fedcba", 20, 0, 20)); + URLRow row_a(MakeURLRow("http://abcdef", "fedcba", 3, 30, 1)); // Test scores based on position. ScoredHistoryMatch scored_a( InMemoryURLIndex::ScoredMatchForURL(row_a, Make1Term("abc"))); @@ -434,20 +459,20 @@ TEST_F(InMemoryURLIndexTest, Scoring) { InMemoryURLIndex::ScoredMatchForURL(row_a, Make2Terms("def", "abc"))); EXPECT_GT(scored_d.raw_score, scored_e.raw_score); // Test scores based on visit_count. - URLRow row_b(MakeURLRow("http://abcdef", "fedcba", 10, 0, 20)); + URLRow row_b(MakeURLRow("http://abcdef", "fedcba", 10, 30, 1)); ScoredHistoryMatch scored_f( InMemoryURLIndex::ScoredMatchForURL(row_b, Make1Term("abc"))); - EXPECT_GT(scored_a.raw_score, scored_f.raw_score); + EXPECT_GT(scored_f.raw_score, scored_a.raw_score); // Test scores based on last_visit. - URLRow row_c(MakeURLRow("http://abcdef", "fedcba", 20, 2, 20)); + URLRow row_c(MakeURLRow("http://abcdef", "fedcba", 3, 10, 1)); ScoredHistoryMatch scored_g( InMemoryURLIndex::ScoredMatchForURL(row_c, Make1Term("abc"))); - EXPECT_GT(scored_a.raw_score, scored_g.raw_score); + EXPECT_GT(scored_g.raw_score, scored_a.raw_score); // Test scores based on typed_count. - URLRow row_d(MakeURLRow("http://abcdef", "fedcba", 20, 0, 10)); + URLRow row_d(MakeURLRow("http://abcdef", "fedcba", 3, 30, 10)); ScoredHistoryMatch scored_h( InMemoryURLIndex::ScoredMatchForURL(row_d, Make1Term("abc"))); - EXPECT_GT(scored_a.raw_score, scored_h.raw_score); + EXPECT_GT(scored_h.raw_score, scored_a.raw_score); } TEST_F(InMemoryURLIndexTest, AddNewRows) { diff --git a/chrome/test/data/History/url_history_provider_test.db.txt b/chrome/test/data/History/url_history_provider_test.db.txt index 26be362..5fd7821 100644 --- a/chrome/test/data/History/url_history_provider_test.db.txt +++ b/chrome/test/data/History/url_history_provider_test.db.txt @@ -26,36 +26,33 @@ The ordering, URLs and titles must be kept in sync with the unit tests found in in_memory_url_index_unittest.cc. */ -INSERT INTO "urls" VALUES(1,'http://www.reuters.com/article/idUSN0839880620100708','UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters',3,1,2,0,29); +INSERT INTO "urls" VALUES(1,'http://www.reuters.com/article/idUSN0839880620100708','UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters',3,1,2,0,29); // Qualifies INSERT INTO "urls" VALUES(2,'http://www.golfweek.com/news/2010/jul/08/goydos-opens-john-deere-classic-59/','Goydos opens John Deere Classic with 59',3,1,4,0,27); -INSERT INTO "urls" VALUES(3,'http://www.businessandmedia.org/articles/2010/20100708120415.aspx','LeBronomics: Could High Taxes Influence James'' Team Decision?',4,1,2,0,28); -INSERT INTO "urls" VALUES(4,'http://www.realclearmarkets.com/articles/2010/07/08/diversity_in_the_financial_sector_98562.html','RealClearMarkets - Racial, Gender Quotas in the Financial Bill?',4,1,4,0,0); -INSERT INTO "urls" VALUES(5,'http://drudgereport.com/','DRUDGE REPORT 2010',3,2,2,0,0); -INSERT INTO "urls" VALUES(6,'http://totalfinder.binaryage.com/','TotalFinder brings tabs to your native Finder and more!',3,2,4,0,26); -INSERT INTO "urls" VALUES(7,'http://www.clickgamer.com/products/pid_5269/screenshots/j2me/large/ingame5.png','ingame5.png 176×208 pixels',4,2,2,0,21); -INSERT INTO "urls" VALUES(8,'http://getsharekit.com/','ShareKit : Drop-in Share Features for all iOS Apps',4,2,4,0,20); +INSERT INTO "urls" VALUES(3,'http://www.businessandmedia.org/articles/2010/20100708120415.aspx','LeBronomics: Could High Taxes Influence James'' Team Decision?',4,1,2,0,28); // Qualifies +INSERT INTO "urls" VALUES(4,'http://www.realclearmarkets.com/articles/2010/07/08/diversity_in_the_financial_sector_98562.html','RealClearMarkets - Racial, Gender Quotas in the Financial Bill?',4,1,4,0,0); // Qualifies +INSERT INTO "urls" VALUES(5,'http://drudgereport.com/','DRUDGE REPORT 2010',3,2,2,0,0); // Qualifies +INSERT INTO "urls" VALUES(6,'http://totalfinder.binaryage.com/','TotalFinder brings tabs to your native Finder and more!',3,2,4,0,26); // Qualifies +INSERT INTO "urls" VALUES(8,'http://getsharekit.com/','ShareKit : Drop-in Share Features for all iOS Apps',4,2,4,0,20); // Qualifies INSERT INTO "urls" VALUES(9,'http://en.wikipedia.org/wiki/Control-Z','Control-Z - Wikipedia, the free encyclopedia',0,0,6,0,0); INSERT INTO "urls" VALUES(10,'http://vmware.com/info?id=724','VMware Account Management Login',1,0,6,0,0); INSERT INTO "urls" VALUES(11,'http://www.tech-recipes.com/rx/2621/os_x_change_path_environment_variable/','OS X: Change your PATH environment variable | Mac system administration | Tech-Recipes',0,1,6,0,14); -INSERT INTO "urls" VALUES(12,'http://view.atdmt.com/PPJ/iview/194841301/direct;wi.160;hi.600/01?click=','',6,6,0,1,0); -INSERT INTO "urls" VALUES(13,'http://www.itmedia.co.jp/news/','ITmedia News/ITの今が見える、明日が分かる',6,6,0,0,25); -INSERT INTO "urls" VALUES(14,'http://www.nikkei.co.jp/','NIKKEI NET(日経ネット)は日本経済新聞 電子版に生まれ変わりました',6,6,0,0,73); -INSERT INTO "urls" VALUES(15,'http://www.cnn.com/','CNN.com International - Breaking, World, Business, Sports, Entertainment and Video News',6,6,0,0,89); -INSERT INTO "urls" VALUES(16,'http://www.zdnet.com/','Technology News, Analysis, Comments and Product Reviews for IT Professionals | ZDNet',6,6,0,0,652); -INSERT INTO "urls" VALUES(17,'http://www.crash.net/','Crash.Net | Formula 1 & MotoGP | Motorsport News',6,6,0,0,239); -INSERT INTO "urls" VALUES(18,'http://www.theinquirer.net/','THE INQUIRER - Microprocessor, Server, Memory, PCS, Graphics, Networking, Storage',6,6,0,0,79); -INSERT INTO "urls" VALUES(19,'http://www.theregister.co.uk/','The Register: Sci/Tech News for the World',6,6,0,0,74); -INSERT INTO "urls" VALUES(20,'http://blogs.technet.com/markrussinovich/','Mark''s Blog - Site Home - TechNet Blogs',6,6,0,0,685); -INSERT INTO "urls" VALUES(21,'http://www.icu-project.org/','ICU Home Page (ICU - International Components for Unicode)',6,6,0,0,445); -INSERT INTO "urls" VALUES(22,'http://site.icu-project.org/','ICU Home Page (ICU - International Components for Unicode)',6,6,0,0,445); -INSERT INTO "urls" VALUES(23,'http://icu-project.org/apiref/icu4c/','ICU 4.2: Main Page',6,6,0,0,212); -INSERT INTO "urls" VALUES(24,'http://www.danilatos.com/event-test/ExperimentTest.html','Experimentation Harness',6,6,0,0,0); -INSERT INTO "urls" VALUES(25,'http://www.codeguru.com/','CodeGuru : codeguru',6,6,0,0,110); -INSERT INTO "urls" VALUES(26,'http://www.codeproject.com/','Your Development Resource - CodeProject',6,6,0,0,369); -INSERT INTO "urls" VALUES(27,'http://www.tomshardware.com/us/#redir','Tom''s Hardware: Hardware News, Tests and Reviews',6,6,0,0,65); -INSERT INTO "urls" VALUES(28,'http://www.ddj.com/windows/184416623','Dr. Dobb''s | Avoiding the Visual C++ Runtime Library | 2 1, 2003',6,6,0,0,0); -INSERT INTO "urls" VALUES(29,'http://svcs.cnn.com/weather/getForecast?time=34&mode=json_html&zipCode=336736767676&locCode=EGLL&celcius=true&csiID=csi2','',6,6,0,1,0); -INSERT INTO "urls" VALUES(30,'http://www.drudgery.com/Dogs%20Cats%20Gorillas%20Dolphine%20Sea%20Slugsand%20Mice','Life in the Slow Lane',3,2,2,0,0); +INSERT INTO "urls" VALUES(12,'http://view.atdmt.com/PPJ/iview/194841301/direct;wi.160;hi.600/01?click=','',6,6,0,1,0); // Qualifies +INSERT INTO "urls" VALUES(15,'http://www.cnn.com/','CNN.com International - Breaking, World, Business, Sports, Entertainment and Video News',6,6,0,0,89); // Qualifies +INSERT INTO "urls" VALUES(16,'http://www.zdnet.com/','Technology News, Analysis, Comments and Product Reviews for IT Professionals | ZDNet',6,6,0,0,652); // Qualifies +INSERT INTO "urls" VALUES(17,'http://www.crash.net/','Crash.Net | Formula 1 & MotoGP | Motorsport News',6,6,0,0,239); // Qualifies +INSERT INTO "urls" VALUES(18,'http://www.theinquirer.net/','THE INQUIRER - Microprocessor, Server, Memory, PCS, Graphics, Networking, Storage',6,6,0,0,79); // Qualifies +INSERT INTO "urls" VALUES(19,'http://www.theregister.co.uk/','The Register: Sci/Tech News for the World',6,6,0,0,74); // Qualifies +INSERT INTO "urls" VALUES(20,'http://blogs.technet.com/markrussinovich/','Mark''s Blog - Site Home - TechNet Blogs',6,6,0,0,685); // Qualifies +INSERT INTO "urls" VALUES(21,'http://www.icu-project.org/','ICU Home Page (ICU - International Components for Unicode)',6,6,0,0,445); // Qualifies +INSERT INTO "urls" VALUES(22,'http://site.icu-project.org/','ICU Home Page (ICU - International Components for Unicode)',6,6,0,0,445); // Qualifies +INSERT INTO "urls" VALUES(23,'http://icu-project.org/apiref/icu4c/','ICU 4.2: Main Page',6,6,0,0,212); // Qualifies +INSERT INTO "urls" VALUES(24,'http://www.danilatos.com/event-test/ExperimentTest.html','Experimentation Harness',6,6,0,0,0); // Qualifies +INSERT INTO "urls" VALUES(25,'http://www.codeguru.com/','CodeGuru : codeguru',6,6,0,0,110); // Qualifies +INSERT INTO "urls" VALUES(26,'http://www.codeproject.com/','Your Development Resource - CodeProject',6,6,0,0,369); // Qualifies +INSERT INTO "urls" VALUES(27,'http://www.tomshardware.com/us/#redir','Tom''s Hardware: Hardware News, Tests and Reviews',6,6,0,0,65); // Qualifies +INSERT INTO "urls" VALUES(28,'http://www.ddj.com/windows/184416623','Dr. Dobb''s | Avoiding the Visual C++ Runtime Library | 2 1, 2003',6,6,0,0,0); // Qualifies +INSERT INTO "urls" VALUES(29,'http://svcs.cnn.com/weather/getForecast?time=34&mode=json_html&zipCode=336736767676&locCode=EGLL&celcius=true&csiID=csi2','',6,6,0,1,0); // Qualifies +INSERT INTO "urls" VALUES(30,'http://www.drudgery.com/Dogs%20and%20Mice','Life in the Slow Lane',8,2,2,0,0); // Qualifies INSERT INTO "urls" VALUES(31,'http://www.redrudgerydo.com/','Music of the Wild Landscape',0,0,6,0,0); -INSERT INTO "urls" VALUES(32,'https://NearlyPerfectResult.com/','Practically Perfect Search Result',99,99,0,0,0); -INSERT INTO "urls" VALUES(33,'http://QuiteUselessSearchResultxyz.com/','Practically Useless Search Result',4,0,99,0,0); +INSERT INTO "urls" VALUES(32,'https://NearlyPerfectResult.com/','Practically Perfect Search Result',99,99,0,0,0); // Qualifies +INSERT INTO "urls" VALUES(33,'http://QuiteUselessSearchResultxyz.com/','Practically Useless Search Result',4,0,99,0,0); // Qualifies |