summaryrefslogtreecommitdiffstats
path: root/chrome
diff options
context:
space:
mode:
authormrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-04-20 04:24:55 +0000
committermrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-04-20 04:24:55 +0000
commitc99037d1bad96b729e015a84ca19e5a144689ccd (patch)
tree90fde2ca30a50cd28b60e8368e4a4ca94650e32f /chrome
parent456c8047e4c8cea12174f649e209798e11d83781 (diff)
downloadchromium_src-c99037d1bad96b729e015a84ca19e5a144689ccd.zip
chromium_src-c99037d1bad96b729e015a84ca19e5a144689ccd.tar.gz
chromium_src-c99037d1bad96b729e015a84ca19e5a144689ccd.tar.bz2
Multi-highlight and scoring improvements.
Incorporate multi-offset highlighting into HQP. Update results scoring. Switch to integer scoring operations from float. BUG=79271,78153 TEST=Added unit tests. Updated unit tests. Review URL: http://codereview.chromium.org/6859005 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@82237 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r--chrome/app/generated_resources.grd6
-rw-r--r--chrome/browser/autocomplete/autocomplete.cc4
-rw-r--r--chrome/browser/autocomplete/history_quick_provider.cc59
-rw-r--r--chrome/browser/autocomplete/history_quick_provider.h4
-rw-r--r--chrome/browser/autocomplete/history_quick_provider_unittest.cc26
-rw-r--r--chrome/browser/history/in_memory_url_index.cc199
-rw-r--r--chrome/browser/history/in_memory_url_index.h15
-rw-r--r--chrome/browser/history/in_memory_url_index_unittest.cc45
-rw-r--r--chrome/test/data/History/url_history_provider_test.db.txt53
9 files changed, 273 insertions, 138 deletions
diff --git a/chrome/app/generated_resources.grd b/chrome/app/generated_resources.grd
index fddb05c..2d44c7d 100644
--- a/chrome/app/generated_resources.grd
+++ b/chrome/app/generated_resources.grd
@@ -4189,6 +4189,12 @@ Keep your key file in a safe place. You will need it to create new versions of y
<message name="IDS_FLAGS_DISABLE_INTERACTIVE_FORM_VALIDATION_DESCRIPTION" desc="Description of the 'Disable HTML5 interactive form validation' lab.">
Disable showing validation messages and preventing form submission.
</message>
+ <message name="IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER" desc="Name of the 'Enable better visit history matching in the omnibox' lab.">
+ Better omnibox history matching
+ </message>
+ <message name="IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER_DESCRIPTION" desc="Description of the 'Enable better visit history matching in the omnibox' lab.">
+ Enables substring and multi-fragment matching within URLs from history.
+ </message>
<message name="IDS_FLAGS_NEW_TAB_PAGE_4_NAME" desc="Name of the new tab page 4 lab.">
Experimental new tab page
</message>
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc
index c022f96..f072c0c 100644
--- a/chrome/browser/autocomplete/autocomplete.cc
+++ b/chrome/browser/autocomplete/autocomplete.cc
@@ -788,7 +788,9 @@ AutocompleteController::AutocompleteController(
in_start_(false) {
search_provider_ = new SearchProvider(this, profile);
providers_.push_back(search_provider_);
- if (!CommandLine::ForCurrentProcess()->HasSwitch(
+ if (CommandLine::ForCurrentProcess()->HasSwitch(
+ switches::kEnableHistoryQuickProvider) &&
+ !CommandLine::ForCurrentProcess()->HasSwitch(
switches::kDisableHistoryQuickProvider))
providers_.push_back(new HistoryQuickProvider(this, profile));
if (!CommandLine::ForCurrentProcess()->HasSwitch(
diff --git a/chrome/browser/autocomplete/history_quick_provider.cc b/chrome/browser/autocomplete/history_quick_provider.cc
index 8a7046a..8e31343 100644
--- a/chrome/browser/autocomplete/history_quick_provider.cc
+++ b/chrome/browser/autocomplete/history_quick_provider.cc
@@ -4,6 +4,8 @@
#include "chrome/browser/autocomplete/history_quick_provider.h"
+#include <vector>
+
#include "base/basictypes.h"
#include "base/i18n/break_iterator.h"
#include "base/logging.h"
@@ -94,13 +96,20 @@ void HistoryQuickProvider::DoAutocomplete() {
if (matches.empty())
return;
+ // Artificially reduce the score of high-scoring results which should not be
+ // inline autocompletd. Each such result gets the next available
+ // |next_dont_inline_score|. Upon use of next_dont_inline_score it is
+ // decremented.
+ int next_dont_inline_score = 1199;
size_t match_num = matches.size() - 1;
for (ScoredHistoryMatches::const_iterator match_iter = matches.begin();
match_iter != matches.end(); ++match_iter, --match_num) {
const ScoredHistoryMatch& history_match(*match_iter);
if (history_match.raw_score > 0) {
AutocompleteMatch ac_match =
- QuickMatchToACMatch(history_match, match_num);
+ QuickMatchToACMatch(history_match, match_num,
+ autocomplete_input_.prevent_inline_autocomplete(),
+ &next_dont_inline_score);
matches_.push_back(ac_match);
}
}
@@ -108,35 +117,57 @@ void HistoryQuickProvider::DoAutocomplete() {
AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch(
const ScoredHistoryMatch& history_match,
- size_t match_number) {
+ size_t match_number,
+ bool prevent_inline_autocomplete,
+ int* next_dont_inline_score) {
+ DCHECK(next_dont_inline_score);
const history::URLRow& info = history_match.url_info;
int score = CalculateRelevance(history_match.raw_score,
autocomplete_input_.type(),
NORMAL, match_number);
+
+ // Discount a very high score when a) a match doesn't start at the beginning
+ // of the URL, or b) there are more than one substring matches in the URL, or
+ // c) the type of request does not allow inline autocompletion. This prevents
+ // the URL from being offered as an inline completion.
+ const int kMaxDontInlineScore = 1199;
+ if (score > kMaxDontInlineScore &&
+ (prevent_inline_autocomplete || history_match.url_matches.size() > 1 ||
+ history_match.url_matches[0].offset > 0)) {
+ score = std::min(*next_dont_inline_score, score);
+ --*next_dont_inline_score;
+ }
+
AutocompleteMatch match(this, score, !!info.visit_count(),
history_match.url_matches.empty() ?
AutocompleteMatch::HISTORY_URL :
AutocompleteMatch::HISTORY_TITLE);
+
match.destination_url = info.url();
DCHECK(match.destination_url.is_valid());
+
+ // Format the fill_into_edit and determine its offset.
size_t inline_autocomplete_offset =
history_match.input_location + autocomplete_input_.text().length();
- const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll;
match.fill_into_edit =
AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(),
- net::FormatUrl(info.url(), languages_, format_types,
- UnescapeRule::SPACES, NULL, NULL,
- &inline_autocomplete_offset));
+ net::FormatUrl(info.url(), languages_, net::kFormatUrlOmitAll,
+ UnescapeRule::SPACES, NULL, NULL, &inline_autocomplete_offset));
if (!autocomplete_input_.prevent_inline_autocomplete())
match.inline_autocomplete_offset = inline_autocomplete_offset;
DCHECK((match.inline_autocomplete_offset == string16::npos) ||
(match.inline_autocomplete_offset <= match.fill_into_edit.length()));
// Format the URL autocomplete presentation.
- match.contents = net::FormatUrl(info.url(), languages_, format_types,
- UnescapeRule::SPACES, NULL, NULL, NULL);
- match.contents_class = SpansFromTermMatch(history_match.url_matches,
- match.contents.size());
+ std::vector<size_t> offsets =
+ InMemoryURLIndex::OffsetsFromTermMatches(history_match.url_matches);
+ match.contents =
+ net::FormatUrlWithOffsets(info.url(), languages_, net::kFormatUrlOmitAll,
+ UnescapeRule::SPACES, NULL, NULL, &offsets);
+ history::TermMatches new_matches =
+ InMemoryURLIndex::ReplaceOffsetsInTermMatches(history_match.url_matches,
+ offsets);
+ match.contents_class = SpansFromTermMatch(new_matches, match.contents.size());
// Format the description autocomplete presentation.
match.description = info.title();
@@ -177,7 +208,7 @@ int HistoryQuickProvider::CalculateRelevance(int raw_score,
return 1200;
default:
- return 400 + raw_score;
+ return raw_score;
}
}
@@ -196,12 +227,6 @@ ACMatchClassifications HistoryQuickProvider::SpansFromTermMatch(
size_t match_count = matches.size();
for (size_t i = 0; i < match_count;) {
size_t offset = matches[i].offset;
- // TODO(mrossetti): Remove the following 'if' when http://crbug.com/77210
- // has been properly fixed. This guards against trying to highlight
- // substrings which fall off the end of the string as a result of having
- // encoded characters in the string.
- if (offset >= text_length)
- return spans;
spans.push_back(ACMatchClassification(offset,
ACMatchClassification::MATCH));
// Skip all adjacent matches.
diff --git a/chrome/browser/autocomplete/history_quick_provider.h b/chrome/browser/autocomplete/history_quick_provider.h
index 3b0277a..996e4be 100644
--- a/chrome/browser/autocomplete/history_quick_provider.h
+++ b/chrome/browser/autocomplete/history_quick_provider.h
@@ -46,7 +46,9 @@ class HistoryQuickProvider : public HistoryProvider {
AutocompleteMatch QuickMatchToACMatch(
const history::ScoredHistoryMatch& history_match,
- size_t match_number);
+ size_t match_number,
+ bool prevent_inline_autocomplete,
+ int* next_dont_inline_score);
// Determines the relevance for some input, given its type and which match it
// is. If |match_type| is NORMAL, |match_number| is a number
diff --git a/chrome/browser/autocomplete/history_quick_provider_unittest.cc b/chrome/browser/autocomplete/history_quick_provider_unittest.cc
index 4ecf7f1..985340d 100644
--- a/chrome/browser/autocomplete/history_quick_provider_unittest.cc
+++ b/chrome/browser/autocomplete/history_quick_provider_unittest.cc
@@ -43,7 +43,7 @@ struct TestURLInfo {
{"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2, 0},
{"http://news.google.com/", "Google News", 1, 1, 0},
{"http://foo.com/", "Dir", 5, 5, 0},
- {"http://foo.com/dir/", "Dir", 2, 2, 0},
+ {"http://foo.com/dir/", "Dir", 2, 1, 10},
{"http://foo.com/dir/another/", "Dir", 5, 1, 0},
{"http://foo.com/dir/another/again/", "Dir", 10, 0, 0},
{"http://foo.com/dir/another/again/myfile.html", "File", 10, 2, 0},
@@ -62,12 +62,12 @@ struct TestURLInfo {
{"http://daysagoest.com/x/c", "DC", 1, 1, 2},
{"http://daysagoest.com/x/d", "DD", 1, 1, 3},
{"http://daysagoest.com/y/e", "DE", 1, 1, 4},
- {"http://abcdefghixyzjklmnopqrstuvw.com/a", "", 1, 1, 0},
+ {"http://abcdefghixyzjklmnopqrstuvw.com/a", "", 3, 1, 0},
{"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2, 0},
- {"http://abcdefghijklxyzmnopqrstuvw.com/a", "", 1, 1, 0},
- {"http://abcdefxyzghijklmnopqrstuvw.com/a", "", 1, 1, 0},
- {"http://abcxyzdefghijklmnopqrstuvw.com/a", "", 1, 1, 0},
- {"http://xyzabcdefghijklmnopqrstuvw.com/a", "", 1, 1, 0},
+ {"http://abcdefghijklxyzmnopqrstuvw.com/a", "", 3, 1, 0},
+ {"http://abcdefxyzghijklmnopqrstuvw.com/a", "", 3, 1, 0},
+ {"http://abcxyzdefghijklmnopqrstuvw.com/a", "", 3, 1, 0},
+ {"http://xyzabcdefghijklmnopqrstuvw.com/a", "", 3, 1, 0},
{"http://cda.com/Dogs%20Cats%20Gorillas%20Sea%20Slugs%20and%20Mice",
"Dogs & Cats & Mice", 1, 1, 0},
};
@@ -220,11 +220,11 @@ TEST_F(HistoryQuickProviderTest, SimpleSingleMatch) {
TEST_F(HistoryQuickProviderTest, MultiMatch) {
string16 text(ASCIIToUTF16("foo"));
std::vector<std::string> expected_urls;
- // Scores high because of completion length.
+ // Scores high because of typed_count.
expected_urls.push_back("http://foo.com/");
// Scores high because of visit count.
- expected_urls.push_back("http://foo.com/dir/another/again/");
- // Scores high because of visit count but less match span.
+ expected_urls.push_back("http://foo.com/dir/another/");
+ // Scores high because of high visit count.
expected_urls.push_back("http://foo.com/dir/another/again/myfile.html");
RunTest(text, expected_urls, "http://foo.com/");
}
@@ -278,12 +278,12 @@ TEST_F(HistoryQuickProviderTest, EncodingLimitMatch) {
const size_t max_offset = url.size() - ((6 * 2) + 7);
for (ACMatchClassifications::const_iterator citer = content.begin();
citer != content.end(); ++citer)
- EXPECT_GT(max_offset, citer->offset);
+ EXPECT_LT(citer->offset, max_offset);
ACMatchClassifications description(ac_matches_[0].description_class);
std::string page_title("Dogs & Cats & Mice");
- for (ACMatchClassifications::const_iterator diter = content.begin();
- diter != content.end(); ++diter)
- EXPECT_GT(page_title.size(), diter->offset);
+ for (ACMatchClassifications::const_iterator diter = description.begin();
+ diter != description.end(); ++diter)
+ EXPECT_LT(diter->offset, page_title.size());
}
TEST_F(HistoryQuickProviderTest, Spans) {
diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc
index 67546a2..9085853 100644
--- a/chrome/browser/history/in_memory_url_index.cc
+++ b/chrome/browser/history/in_memory_url_index.cc
@@ -49,20 +49,11 @@ typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry
const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1;
-// Scoring constants.
-const float kOrderMaxValue = 50.0;
-const float kStartMaxValue = 50.0;
-const size_t kMaxSignificantStart = 20;
-const float kCompleteMaxValue = 100.0;
-const float kLastVisitMaxValue = 200.0;
-const base::TimeDelta kMaxSignificantDay = base::TimeDelta::FromDays(30);
-const float kVisitCountMaxValue = 100.0;
-const int kMaxSignificantVisits = 20;
-const float kTypedCountMaxValue = 300.0;
-const int kMaxSignificantTyped = 20;
-const float kMaxRawScore = kOrderMaxValue + kStartMaxValue + kCompleteMaxValue +
- kLastVisitMaxValue + kVisitCountMaxValue + kTypedCountMaxValue;
-const float kMaxNormalizedRawScore = 1000.0;
+// Score ranges used to get a 'base' score for each of the scoring factors
+// (such as recency of last visit, times visited, times the URL was typed,
+// and the quality of the string match). There is a matching value range for
+// each of these scores for each factor.
+const int kScoreRank[] = { 1425, 1200, 900, 400 };
ScoredHistoryMatch::ScoredHistoryMatch()
: raw_score(0),
@@ -111,6 +102,38 @@ int AccumulateMatchLength(int total, const TermMatch& match) {
return total + match.length;
}
+// Converts a raw value for some particular scoring factor into a score
+// component for that factor. The conversion function is piecewise linear, with
+// input values provided in |value_ranks| and resulting output scores from
+// |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]). A score
+// cannot be higher than kScoreRank[0], and drops directly to 0 if lower than
+// kScoreRank[3].
+//
+// For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }.
+// Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the
+// linear function:
+// score = m * value + b, where
+// m = (kScoreRank[0] - kScoreRank[1]) / (value_ranks[0] - value_ranks[1])
+// b = value_ranks[1]
+// Any value higher than 100 would be scored as if it were 100, and any value
+// lower than 10 scored 0.
+int ScoreForValue(int value, const int* value_ranks) {
+ int i = 0;
+ int rank_count = arraysize(kScoreRank);
+ while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ?
+ (value > value_ranks[i]) : (value < value_ranks[i])))
+ ++i;
+ if (i >= rank_count)
+ return 0;
+ int score = kScoreRank[i];
+ if (i > 0) {
+ score += (value - value_ranks[i]) *
+ (kScoreRank[i - 1] - kScoreRank[i]) /
+ (value_ranks[i - 1] - value_ranks[i]);
+ }
+ return score;
+}
+
InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)
: history_dir_(history_dir),
history_item_count_(0) {
@@ -342,6 +365,7 @@ ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
// substring match, inserting those which pass in order by score.
scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),
AddHistoryMatch(*this, lower_terms)).ScoredMatches();
+
// Select and sort only the top kMaxMatches results.
if (scored_items.size() > AutocompleteProvider::kMaxMatches) {
std::partial_sort(scored_items.begin(),
@@ -648,6 +672,33 @@ TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) {
}
// static
+std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches(
+ const TermMatches& matches) {
+ std::vector<size_t> offsets;
+ for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i)
+ offsets.push_back(i->offset);
+ return offsets;
+}
+
+// static
+TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches(
+ const TermMatches& matches,
+ const std::vector<size_t>& offsets) {
+ DCHECK_EQ(matches.size(), offsets.size());
+ TermMatches new_matches;
+ std::vector<size_t>::const_iterator offset_iter = offsets.begin();
+ for (TermMatches::const_iterator term_iter = matches.begin();
+ term_iter != matches.end(); ++term_iter, ++offset_iter) {
+ if (*offset_iter != string16::npos) {
+ TermMatch new_match(*term_iter);
+ new_match.offset = *offset_iter;
+ new_matches.push_back(new_match);
+ }
+ }
+ return new_matches;
+}
+
+// static
ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
const URLRow& row,
const String16Vector& terms) {
@@ -688,52 +739,61 @@ ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
// Get partial scores based on term matching. Note that the score for
// each of the URL and title are adjusted by the fraction of the
// terms appearing in each.
- int url_score = RawScoreForMatches(match.url_matches, url.size()) *
+ int url_score = ScoreComponentForMatches(match.url_matches, url.size()) *
match.url_matches.size() / terms.size();
- int title_score = RawScoreForMatches(match.title_matches, title.size()) *
- match.title_matches.size() / terms.size();
+ int title_score =
+ ScoreComponentForMatches(match.title_matches, title.size()) *
+ static_cast<int>(match.title_matches.size()) /
+ static_cast<int>(terms.size());
// Arbitrarily pick the best.
// TODO(mrossetti): It might make sense that a term which appears in both the
// URL and the Title should boost the score a bit.
int term_score = std::max(url_score, title_score);
+ if (term_score == 0)
+ return match;
+
+ // Factor in recency of visit, visit count and typed count attributes of the
+ // URLRow.
+ const int kDaysAgoLevel[] = { 0, 10, 20, 30 };
+ int score = ScoreForValue((base::Time::Now() -
+ row.last_visit()).InDays(), kDaysAgoLevel);
+ const int kVisitCountLevel[] = { 30, 10, 5, 3 };
+ int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel);
+ const int kTypedCountLevel[] = { 10, 5, 3, 1 };
+ int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel);
+
+ // Determine how many of the factors comprising the final score are
+ // significant by summing the relative factors for each and subtracting how
+ // many will be 'discarded' even if they are low.
+ const int kVisitCountMultiplier = 2;
+ const int kTypedCountMultiplier = 3;
+ const int kSignificantFactors =
+ kVisitCountMultiplier + // Visit count factor plus
+ kTypedCountMultiplier + // typed count factor plus
+ 2 - // one each for string match and last visit
+ 2; // minus 2 insignificant factors.
+ // The following, in effect, discards up to |kSignificantFactors| low scoring
+ // elements which contribute little to the score but which can inordinately
+ // drag down an otherwise good score.
+ match.raw_score = std::min(kScoreRank[0], (term_score + score +
+ (visit_count_value * kVisitCountMultiplier) + (typed_count_value *
+ kTypedCountMultiplier)) / kSignificantFactors);
- // Factor in attributes of the URLRow.
- // Items which have been visited recently score higher.
- int64 delta_time = (kMaxSignificantDay -
- std::min((base::Time::Now() - row.last_visit()),
- kMaxSignificantDay)).ToInternalValue();
- float last_visit_value =
- (static_cast<float>(delta_time) /
- static_cast<float>(kMaxSignificantDay.ToInternalValue())) *
- kLastVisitMaxValue;
-
- float visit_count_value =
- (static_cast<float>(std::min(row.visit_count(),
- kMaxSignificantVisits))) / static_cast<float>(kMaxSignificantVisits) *
- kVisitCountMaxValue;
-
- float typed_count_value =
- (static_cast<float>(std::min(row.typed_count(),
- kMaxSignificantTyped))) / static_cast<float>(kMaxSignificantTyped) *
- kTypedCountMaxValue;
-
- float raw_score = term_score + last_visit_value + visit_count_value +
- typed_count_value;
-
- // Normalize the score.
- match.raw_score = static_cast<int>((raw_score / kMaxRawScore) *
- kMaxNormalizedRawScore);
return match;
}
-int InMemoryURLIndex::RawScoreForMatches(const TermMatches& matches,
- size_t max_length) {
+int InMemoryURLIndex::ScoreComponentForMatches(const TermMatches& matches,
+ size_t max_length) {
// TODO(mrossetti): This is good enough for now but must be fine-tuned.
if (matches.empty())
return 0;
- // Search terms appearing in order score higher.
- float order_value = 10.0;
+ // Score component for whether the input terms (if more than one) were found
+ // in the same order in the match. Start with kOrderMaxValue points divided
+ // equally among (number of terms - 1); then discount each of those terms that
+ // is out-of-order in the match.
+ const int kOrderMaxValue = 250;
+ int order_value = kOrderMaxValue;
if (matches.size() > 1) {
int max_possible_out_of_order = matches.size() - 1;
int out_of_order = 0;
@@ -741,33 +801,42 @@ int InMemoryURLIndex::RawScoreForMatches(const TermMatches& matches,
if (matches[i - 1].term_num > matches[i].term_num)
++out_of_order;
}
- order_value =
- (static_cast<float>(max_possible_out_of_order - out_of_order) /
- max_possible_out_of_order) * kOrderMaxValue;
+ order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue /
+ max_possible_out_of_order;
}
- // Search terms which appear earlier score higher.
- // No points if the first search term does not appear in the first
- // kMaxSignificantStart chars.
- float start_value =
- (static_cast<float>(kMaxSignificantStart -
- std::min(kMaxSignificantStart, matches[0].offset))) /
- static_cast<float>(kMaxSignificantStart) * kStartMaxValue;
-
- // Search terms which comprise a greater portion score higher.
+ // Score component for how early in the match string the first search term
+ // appears. Start with kStartMaxValue points and discount by
+ // 1/kMaxSignificantStart points for each character later than the first at
+ // which the term begins. No points are earned if the start of the match
+ // occurs at or after kMaxSignificantStart.
+ const size_t kMaxSignificantStart = 20;
+ const int kStartMaxValue = 250;
+ int start_value = (kMaxSignificantStart -
+ std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue /
+ kMaxSignificantStart;
+
+ // Score component for how much of the matched string the input terms cover.
+ // kCompleteMaxValue points times the fraction of the URL/page title string
+ // that was matched.
size_t term_length_total = std::accumulate(matches.begin(), matches.end(),
0, AccumulateMatchLength);
- float complete_value =
- (static_cast<float>(term_length_total) / static_cast<float>(max_length)) *
- kStartMaxValue;
- return static_cast<int>(order_value + start_value + complete_value);
+ const int kCompleteMaxValue = 500;
+ int complete_value = term_length_total * kCompleteMaxValue / max_length;
+
+ int raw_score = order_value + start_value + complete_value;
+ const int kTermScoreLevel[] = { 1000, 650, 500, 200 };
+
+ // Scale the sum of the three components above into a single score component
+ // on the same scale as that used in ScoredMatchForURL().
+ return ScoreForValue(raw_score, kTermScoreLevel);
}
InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch(
const InMemoryURLIndex& index,
const String16Vector& lower_terms)
- : index_(index),
- lower_terms_(lower_terms) {
+ : index_(index),
+ lower_terms_(lower_terms) {
}
InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {}
diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h
index e50b07f..cf02970 100644
--- a/chrome/browser/history/in_memory_url_index.h
+++ b/chrome/browser/history/in_memory_url_index.h
@@ -165,6 +165,15 @@ class InMemoryURLIndex {
static String16Vector WordVectorFromString16(const string16& uni_string,
bool break_on_space);
+ // Extract and return the offsets from |matches|.
+ static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches);
+
+ // Replace the offsets in |matches| with those given in |offsets|, deleting
+ // any which are npos, and return the updated list of matches.
+ static TermMatches ReplaceOffsetsInTermMatches(
+ const TermMatches& matches,
+ const std::vector<size_t>& offsets);
+
private:
friend class AddHistoryMatch;
FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization);
@@ -329,11 +338,11 @@ class InMemoryURLIndex {
const URLRow& row,
const String16Vector& terms_vector);
- // Calculates a partial raw score based on position, ordering and total
+ // Calculates a component score based on position, ordering and total
// substring match size using metrics recorded in |matches|. |max_length|
// is the length of the string against which the terms are being searched.
- static int RawScoreForMatches(const TermMatches& matches,
- size_t max_length);
+ static int ScoreComponentForMatches(const TermMatches& matches,
+ size_t max_length);
// Sorts and removes overlapping substring matches from |matches| and
// returns the cleaned up matches.
diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc
index dee196c..37e64fb 100644
--- a/chrome/browser/history/in_memory_url_index_unittest.cc
+++ b/chrome/browser/history/in_memory_url_index_unittest.cc
@@ -204,7 +204,7 @@ TEST_F(InMemoryURLIndexTest, Retrieval) {
matches = url_index_->HistoryItemsForTerms(terms);
ASSERT_EQ(2U, matches.size());
// The results should be in descending score order.
- EXPECT_GT(matches[0].raw_score, matches[1].raw_score);
+ EXPECT_GE(matches[0].raw_score, matches[1].raw_score);
// Search which should result in nearly perfect result.
terms.clear();
@@ -228,7 +228,7 @@ TEST_F(InMemoryURLIndexTest, Retrieval) {
matches = url_index_->HistoryItemsForTerms(terms);
ASSERT_EQ(1U, matches.size());
// The results should have a poor score.
- EXPECT_LT(matches[0].raw_score, 200);
+ EXPECT_LT(matches[0].raw_score, 500);
EXPECT_EQ(33, matches[0].url_info.id());
EXPECT_EQ("http://quiteuselesssearchresultxyz.com/",
matches[0].url_info.url().spec()); // Note: URL gets lowercased.
@@ -263,7 +263,7 @@ TEST_F(InMemoryURLIndexTest, TitleSearch) {
url_index_.reset(new InMemoryURLIndex());
url_index_->Init(this, "en,ja,hi,zh");
// Signal if someone has changed the test DB.
- EXPECT_EQ(28U, url_index_->history_info_map_.size());
+ EXPECT_EQ(25U, url_index_->history_info_map_.size());
InMemoryURLIndex::String16Vector terms;
// Ensure title is being searched.
@@ -367,6 +367,31 @@ TEST_F(InMemoryURLIndexTest, StaticFunctions) {
EXPECT_EQ(expected_offsets[i], matches_c[i].offset);
}
+TEST_F(InMemoryURLIndexTest, OffsetsAndTermMatches) {
+ // Test OffsetsFromTermMatches
+ history::TermMatches matches_a;
+ matches_a.push_back(history::TermMatch(1, 1, 2));
+ matches_a.push_back(history::TermMatch(2, 4, 3));
+ matches_a.push_back(history::TermMatch(3, 9, 1));
+ matches_a.push_back(history::TermMatch(3, 10, 1));
+ matches_a.push_back(history::TermMatch(4, 14, 5));
+ std::vector<size_t> offsets =
+ InMemoryURLIndex::OffsetsFromTermMatches(matches_a);
+ const size_t expected_offsets_a[] = {1, 4, 9, 10, 14};
+ ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a));
+ for (size_t i = 0; i < offsets.size(); ++i)
+ EXPECT_EQ(expected_offsets_a[i], offsets[i]);
+
+ // Test ReplaceOffsetsInTermMatches
+ offsets[2] = string16::npos;
+ history::TermMatches matches_b =
+ InMemoryURLIndex::ReplaceOffsetsInTermMatches(matches_a, offsets);
+ const size_t expected_offsets_b[] = {1, 4, 10, 14};
+ ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size());
+ for (size_t i = 0; i < matches_b.size(); ++i)
+ EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset);
+}
+
TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) {
// Verify that match results for previously typed characters are retained
// (in the term_char_word_set_cache_) and reused, if possible, in future
@@ -416,7 +441,7 @@ TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) {
}
TEST_F(InMemoryURLIndexTest, Scoring) {
- URLRow row_a(MakeURLRow("http://abcdef", "fedcba", 20, 0, 20));
+ URLRow row_a(MakeURLRow("http://abcdef", "fedcba", 3, 30, 1));
// Test scores based on position.
ScoredHistoryMatch scored_a(
InMemoryURLIndex::ScoredMatchForURL(row_a, Make1Term("abc")));
@@ -434,20 +459,20 @@ TEST_F(InMemoryURLIndexTest, Scoring) {
InMemoryURLIndex::ScoredMatchForURL(row_a, Make2Terms("def", "abc")));
EXPECT_GT(scored_d.raw_score, scored_e.raw_score);
// Test scores based on visit_count.
- URLRow row_b(MakeURLRow("http://abcdef", "fedcba", 10, 0, 20));
+ URLRow row_b(MakeURLRow("http://abcdef", "fedcba", 10, 30, 1));
ScoredHistoryMatch scored_f(
InMemoryURLIndex::ScoredMatchForURL(row_b, Make1Term("abc")));
- EXPECT_GT(scored_a.raw_score, scored_f.raw_score);
+ EXPECT_GT(scored_f.raw_score, scored_a.raw_score);
// Test scores based on last_visit.
- URLRow row_c(MakeURLRow("http://abcdef", "fedcba", 20, 2, 20));
+ URLRow row_c(MakeURLRow("http://abcdef", "fedcba", 3, 10, 1));
ScoredHistoryMatch scored_g(
InMemoryURLIndex::ScoredMatchForURL(row_c, Make1Term("abc")));
- EXPECT_GT(scored_a.raw_score, scored_g.raw_score);
+ EXPECT_GT(scored_g.raw_score, scored_a.raw_score);
// Test scores based on typed_count.
- URLRow row_d(MakeURLRow("http://abcdef", "fedcba", 20, 0, 10));
+ URLRow row_d(MakeURLRow("http://abcdef", "fedcba", 3, 30, 10));
ScoredHistoryMatch scored_h(
InMemoryURLIndex::ScoredMatchForURL(row_d, Make1Term("abc")));
- EXPECT_GT(scored_a.raw_score, scored_h.raw_score);
+ EXPECT_GT(scored_h.raw_score, scored_a.raw_score);
}
TEST_F(InMemoryURLIndexTest, AddNewRows) {
diff --git a/chrome/test/data/History/url_history_provider_test.db.txt b/chrome/test/data/History/url_history_provider_test.db.txt
index 26be362..5fd7821 100644
--- a/chrome/test/data/History/url_history_provider_test.db.txt
+++ b/chrome/test/data/History/url_history_provider_test.db.txt
@@ -26,36 +26,33 @@
The ordering, URLs and titles must be kept in sync with the unit tests found
in in_memory_url_index_unittest.cc.
*/
-INSERT INTO "urls" VALUES(1,'http://www.reuters.com/article/idUSN0839880620100708','UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters',3,1,2,0,29);
+INSERT INTO "urls" VALUES(1,'http://www.reuters.com/article/idUSN0839880620100708','UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters',3,1,2,0,29); // Qualifies
INSERT INTO "urls" VALUES(2,'http://www.golfweek.com/news/2010/jul/08/goydos-opens-john-deere-classic-59/','Goydos opens John Deere Classic with 59',3,1,4,0,27);
-INSERT INTO "urls" VALUES(3,'http://www.businessandmedia.org/articles/2010/20100708120415.aspx','LeBronomics: Could High Taxes Influence James'' Team Decision?',4,1,2,0,28);
-INSERT INTO "urls" VALUES(4,'http://www.realclearmarkets.com/articles/2010/07/08/diversity_in_the_financial_sector_98562.html','RealClearMarkets - Racial, Gender Quotas in the Financial Bill?',4,1,4,0,0);
-INSERT INTO "urls" VALUES(5,'http://drudgereport.com/','DRUDGE REPORT 2010',3,2,2,0,0);
-INSERT INTO "urls" VALUES(6,'http://totalfinder.binaryage.com/','TotalFinder brings tabs to your native Finder and more!',3,2,4,0,26);
-INSERT INTO "urls" VALUES(7,'http://www.clickgamer.com/products/pid_5269/screenshots/j2me/large/ingame5.png','ingame5.png 176×208 pixels',4,2,2,0,21);
-INSERT INTO "urls" VALUES(8,'http://getsharekit.com/','ShareKit : Drop-in Share Features for all iOS Apps',4,2,4,0,20);
+INSERT INTO "urls" VALUES(3,'http://www.businessandmedia.org/articles/2010/20100708120415.aspx','LeBronomics: Could High Taxes Influence James'' Team Decision?',4,1,2,0,28); // Qualifies
+INSERT INTO "urls" VALUES(4,'http://www.realclearmarkets.com/articles/2010/07/08/diversity_in_the_financial_sector_98562.html','RealClearMarkets - Racial, Gender Quotas in the Financial Bill?',4,1,4,0,0); // Qualifies
+INSERT INTO "urls" VALUES(5,'http://drudgereport.com/','DRUDGE REPORT 2010',3,2,2,0,0); // Qualifies
+INSERT INTO "urls" VALUES(6,'http://totalfinder.binaryage.com/','TotalFinder brings tabs to your native Finder and more!',3,2,4,0,26); // Qualifies
+INSERT INTO "urls" VALUES(8,'http://getsharekit.com/','ShareKit : Drop-in Share Features for all iOS Apps',4,2,4,0,20); // Qualifies
INSERT INTO "urls" VALUES(9,'http://en.wikipedia.org/wiki/Control-Z','Control-Z - Wikipedia, the free encyclopedia',0,0,6,0,0);
INSERT INTO "urls" VALUES(10,'http://vmware.com/info?id=724','VMware Account Management Login',1,0,6,0,0);
INSERT INTO "urls" VALUES(11,'http://www.tech-recipes.com/rx/2621/os_x_change_path_environment_variable/','OS X: Change your PATH environment variable | Mac system administration | Tech-Recipes',0,1,6,0,14);
-INSERT INTO "urls" VALUES(12,'http://view.atdmt.com/PPJ/iview/194841301/direct;wi.160;hi.600/01?click=','',6,6,0,1,0);
-INSERT INTO "urls" VALUES(13,'http://www.itmedia.co.jp/news/','ITmedia News/ITの今が見える、明日が分かる',6,6,0,0,25);
-INSERT INTO "urls" VALUES(14,'http://www.nikkei.co.jp/','NIKKEI NET(日経ネット)は日本経済新聞 電子版に生まれ変わりました',6,6,0,0,73);
-INSERT INTO "urls" VALUES(15,'http://www.cnn.com/','CNN.com International - Breaking, World, Business, Sports, Entertainment and Video News',6,6,0,0,89);
-INSERT INTO "urls" VALUES(16,'http://www.zdnet.com/','Technology News, Analysis, Comments and Product Reviews for IT Professionals | ZDNet',6,6,0,0,652);
-INSERT INTO "urls" VALUES(17,'http://www.crash.net/','Crash.Net | Formula 1 & MotoGP | Motorsport News',6,6,0,0,239);
-INSERT INTO "urls" VALUES(18,'http://www.theinquirer.net/','THE INQUIRER - Microprocessor, Server, Memory, PCS, Graphics, Networking, Storage',6,6,0,0,79);
-INSERT INTO "urls" VALUES(19,'http://www.theregister.co.uk/','The Register: Sci/Tech News for the World',6,6,0,0,74);
-INSERT INTO "urls" VALUES(20,'http://blogs.technet.com/markrussinovich/','Mark''s Blog - Site Home - TechNet Blogs',6,6,0,0,685);
-INSERT INTO "urls" VALUES(21,'http://www.icu-project.org/','ICU Home Page (ICU - International Components for Unicode)',6,6,0,0,445);
-INSERT INTO "urls" VALUES(22,'http://site.icu-project.org/','ICU Home Page (ICU - International Components for Unicode)',6,6,0,0,445);
-INSERT INTO "urls" VALUES(23,'http://icu-project.org/apiref/icu4c/','ICU 4.2: Main Page',6,6,0,0,212);
-INSERT INTO "urls" VALUES(24,'http://www.danilatos.com/event-test/ExperimentTest.html','Experimentation Harness',6,6,0,0,0);
-INSERT INTO "urls" VALUES(25,'http://www.codeguru.com/','CodeGuru : codeguru',6,6,0,0,110);
-INSERT INTO "urls" VALUES(26,'http://www.codeproject.com/','Your Development Resource - CodeProject',6,6,0,0,369);
-INSERT INTO "urls" VALUES(27,'http://www.tomshardware.com/us/#redir','Tom''s Hardware: Hardware News, Tests and Reviews',6,6,0,0,65);
-INSERT INTO "urls" VALUES(28,'http://www.ddj.com/windows/184416623','Dr. Dobb''s | Avoiding the Visual C++ Runtime Library | 2 1, 2003',6,6,0,0,0);
-INSERT INTO "urls" VALUES(29,'http://svcs.cnn.com/weather/getForecast?time=34&mode=json_html&zipCode=336736767676&locCode=EGLL&celcius=true&csiID=csi2','',6,6,0,1,0);
-INSERT INTO "urls" VALUES(30,'http://www.drudgery.com/Dogs%20Cats%20Gorillas%20Dolphine%20Sea%20Slugsand%20Mice','Life in the Slow Lane',3,2,2,0,0);
+INSERT INTO "urls" VALUES(12,'http://view.atdmt.com/PPJ/iview/194841301/direct;wi.160;hi.600/01?click=','',6,6,0,1,0); // Qualifies
+INSERT INTO "urls" VALUES(15,'http://www.cnn.com/','CNN.com International - Breaking, World, Business, Sports, Entertainment and Video News',6,6,0,0,89); // Qualifies
+INSERT INTO "urls" VALUES(16,'http://www.zdnet.com/','Technology News, Analysis, Comments and Product Reviews for IT Professionals | ZDNet',6,6,0,0,652); // Qualifies
+INSERT INTO "urls" VALUES(17,'http://www.crash.net/','Crash.Net | Formula 1 & MotoGP | Motorsport News',6,6,0,0,239); // Qualifies
+INSERT INTO "urls" VALUES(18,'http://www.theinquirer.net/','THE INQUIRER - Microprocessor, Server, Memory, PCS, Graphics, Networking, Storage',6,6,0,0,79); // Qualifies
+INSERT INTO "urls" VALUES(19,'http://www.theregister.co.uk/','The Register: Sci/Tech News for the World',6,6,0,0,74); // Qualifies
+INSERT INTO "urls" VALUES(20,'http://blogs.technet.com/markrussinovich/','Mark''s Blog - Site Home - TechNet Blogs',6,6,0,0,685); // Qualifies
+INSERT INTO "urls" VALUES(21,'http://www.icu-project.org/','ICU Home Page (ICU - International Components for Unicode)',6,6,0,0,445); // Qualifies
+INSERT INTO "urls" VALUES(22,'http://site.icu-project.org/','ICU Home Page (ICU - International Components for Unicode)',6,6,0,0,445); // Qualifies
+INSERT INTO "urls" VALUES(23,'http://icu-project.org/apiref/icu4c/','ICU 4.2: Main Page',6,6,0,0,212); // Qualifies
+INSERT INTO "urls" VALUES(24,'http://www.danilatos.com/event-test/ExperimentTest.html','Experimentation Harness',6,6,0,0,0); // Qualifies
+INSERT INTO "urls" VALUES(25,'http://www.codeguru.com/','CodeGuru : codeguru',6,6,0,0,110); // Qualifies
+INSERT INTO "urls" VALUES(26,'http://www.codeproject.com/','Your Development Resource - CodeProject',6,6,0,0,369); // Qualifies
+INSERT INTO "urls" VALUES(27,'http://www.tomshardware.com/us/#redir','Tom''s Hardware: Hardware News, Tests and Reviews',6,6,0,0,65); // Qualifies
+INSERT INTO "urls" VALUES(28,'http://www.ddj.com/windows/184416623','Dr. Dobb''s | Avoiding the Visual C++ Runtime Library | 2 1, 2003',6,6,0,0,0); // Qualifies
+INSERT INTO "urls" VALUES(29,'http://svcs.cnn.com/weather/getForecast?time=34&mode=json_html&zipCode=336736767676&locCode=EGLL&celcius=true&csiID=csi2','',6,6,0,1,0); // Qualifies
+INSERT INTO "urls" VALUES(30,'http://www.drudgery.com/Dogs%20and%20Mice','Life in the Slow Lane',8,2,2,0,0); // Qualifies
INSERT INTO "urls" VALUES(31,'http://www.redrudgerydo.com/','Music of the Wild Landscape',0,0,6,0,0);
-INSERT INTO "urls" VALUES(32,'https://NearlyPerfectResult.com/','Practically Perfect Search Result',99,99,0,0,0);
-INSERT INTO "urls" VALUES(33,'http://QuiteUselessSearchResultxyz.com/','Practically Useless Search Result',4,0,99,0,0);
+INSERT INTO "urls" VALUES(32,'https://NearlyPerfectResult.com/','Practically Perfect Search Result',99,99,0,0,0); // Qualifies
+INSERT INTO "urls" VALUES(33,'http://QuiteUselessSearchResultxyz.com/','Practically Useless Search Result',4,0,99,0,0); // Qualifies