Multi-highlight and scoring improvements.

Incorporate multi-offset highlighting into HQP. Update results scoring. Switch to integer scoring operations from float. BUG=79271,78153 TEST=Added unit tests. Updated unit tests. Review URL: http://codereview.chromium.org/6859005 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@82237 0039d316-1c4b-4281-b951-d872f2087c98
author: mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-04-20 04:24:55 +0000
committer: mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-04-20 04:24:55 +0000
commit: c99037d1bad96b729e015a84ca19e5a144689ccd (patch)
tree: 90fde2ca30a50cd28b60e8368e4a4ca94650e32f /chrome
parent: 456c8047e4c8cea12174f649e209798e11d83781 (diff)
download: chromium_src-c99037d1bad96b729e015a84ca19e5a144689ccd.zip
chromium_src-c99037d1bad96b729e015a84ca19e5a144689ccd.tar.gz
chromium_src-c99037d1bad96b729e015a84ca19e5a144689ccd.tar.bz2
9 files changed, 273 insertions, 138 deletions
diff --git a/chrome/app/generated_resources.grd b/chrome/app/generated_resources.grd
index fddb05c..2d44c7d 100644
--- a/chrome/app/generated_resources.grd
+++ b/chrome/app/generated_resources.grd
@@ -4189,6 +4189,12 @@ Keep your key file in a safe place. You will need it to create new versions of y
       <message name="IDS_FLAGS_DISABLE_INTERACTIVE_FORM_VALIDATION_DESCRIPTION" desc="Description of the 'Disable HTML5 interactive form validation' lab.">
         Disable showing validation messages and preventing form submission.
       </message>
+      <message name="IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER" desc="Name of the 'Enable better visit history matching in the omnibox' lab.">
+        Better omnibox history matching
+      </message>
+      <message name="IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER_DESCRIPTION" desc="Description of the 'Enable better visit history matching in the omnibox' lab.">
+        Enables substring and multi-fragment matching within URLs from history.
+      </message>
       <message name="IDS_FLAGS_NEW_TAB_PAGE_4_NAME" desc="Name of the new tab page 4 lab.">
         Experimental new tab page
       </message>
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc
index c022f96..f072c0c 100644
--- a/chrome/browser/autocomplete/autocomplete.cc
+++ b/chrome/browser/autocomplete/autocomplete.cc
@@ -788,7 +788,9 @@ AutocompleteController::AutocompleteController(
       in_start_(false) {
   search_provider_ = new SearchProvider(this, profile);
   providers_.push_back(search_provider_);
-  if (!CommandLine::ForCurrentProcess()->HasSwitch(
+  if (CommandLine::ForCurrentProcess()->HasSwitch(
+          switches::kEnableHistoryQuickProvider) &&
+      !CommandLine::ForCurrentProcess()->HasSwitch(
           switches::kDisableHistoryQuickProvider))
     providers_.push_back(new HistoryQuickProvider(this, profile));
   if (!CommandLine::ForCurrentProcess()->HasSwitch(
diff --git a/chrome/browser/autocomplete/history_quick_provider.cc b/chrome/browser/autocomplete/history_quick_provider.cc
index 8a7046a..8e31343 100644
--- a/chrome/browser/autocomplete/history_quick_provider.cc
+++ b/chrome/browser/autocomplete/history_quick_provider.cc
@@ -4,6 +4,8 @@
 
 #include "chrome/browser/autocomplete/history_quick_provider.h"
 
+#include <vector>
+
 #include "base/basictypes.h"
 #include "base/i18n/break_iterator.h"
 #include "base/logging.h"
@@ -94,13 +96,20 @@ void HistoryQuickProvider::DoAutocomplete() {
   if (matches.empty())
     return;
 
+  // Artificially reduce the score of high-scoring results which should not be
+  // inline autocompletd. Each such result gets the next available
+  // |next_dont_inline_score|. Upon use of next_dont_inline_score it is
+  // decremented.
+  int next_dont_inline_score = 1199;
   size_t match_num = matches.size() - 1;
   for (ScoredHistoryMatches::const_iterator match_iter = matches.begin();
        match_iter != matches.end(); ++match_iter, --match_num) {
     const ScoredHistoryMatch& history_match(*match_iter);
     if (history_match.raw_score > 0) {
       AutocompleteMatch ac_match =
-          QuickMatchToACMatch(history_match, match_num);
+          QuickMatchToACMatch(history_match, match_num,
+              autocomplete_input_.prevent_inline_autocomplete(),
+              &next_dont_inline_score);
       matches_.push_back(ac_match);
     }
   }
@@ -108,35 +117,57 @@ void HistoryQuickProvider::DoAutocomplete() {
 
 AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch(
     const ScoredHistoryMatch& history_match,
-    size_t match_number) {
+    size_t match_number,
+    bool prevent_inline_autocomplete,
+    int* next_dont_inline_score) {
+  DCHECK(next_dont_inline_score);
   const history::URLRow& info = history_match.url_info;
   int score = CalculateRelevance(history_match.raw_score,
                                  autocomplete_input_.type(),
                                  NORMAL, match_number);
+
+  // Discount a very high score when a) a match doesn't start at the beginning
+  // of the URL, or b) there are more than one substring matches in the URL, or
+  // c) the type of request does not allow inline autocompletion. This prevents
+  // the URL from being offered as an inline completion.
+  const int kMaxDontInlineScore = 1199;
+  if (score > kMaxDontInlineScore &&
+      (prevent_inline_autocomplete || history_match.url_matches.size() > 1 ||
+       history_match.url_matches[0].offset > 0)) {
+    score = std::min(*next_dont_inline_score, score);
+    --*next_dont_inline_score;
+  }
+
   AutocompleteMatch match(this, score, !!info.visit_count(),
                           history_match.url_matches.empty() ?
                           AutocompleteMatch::HISTORY_URL :
                           AutocompleteMatch::HISTORY_TITLE);
+
   match.destination_url = info.url();
   DCHECK(match.destination_url.is_valid());
+
+  // Format the fill_into_edit and determine its offset.
   size_t inline_autocomplete_offset =
       history_match.input_location + autocomplete_input_.text().length();
-  const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll;
   match.fill_into_edit =
       AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(),
-          net::FormatUrl(info.url(), languages_, format_types,
-                         UnescapeRule::SPACES, NULL, NULL,
-                         &inline_autocomplete_offset));
+          net::FormatUrl(info.url(), languages_, net::kFormatUrlOmitAll,
+          UnescapeRule::SPACES, NULL, NULL, &inline_autocomplete_offset));
   if (!autocomplete_input_.prevent_inline_autocomplete())
     match.inline_autocomplete_offset = inline_autocomplete_offset;
   DCHECK((match.inline_autocomplete_offset == string16::npos) ||
          (match.inline_autocomplete_offset <= match.fill_into_edit.length()));
 
   // Format the URL autocomplete presentation.
-  match.contents = net::FormatUrl(info.url(), languages_, format_types,
-                                  UnescapeRule::SPACES, NULL, NULL, NULL);
-  match.contents_class = SpansFromTermMatch(history_match.url_matches,
-                                            match.contents.size());
+  std::vector<size_t> offsets =
+      InMemoryURLIndex::OffsetsFromTermMatches(history_match.url_matches);
+  match.contents =
+      net::FormatUrlWithOffsets(info.url(), languages_, net::kFormatUrlOmitAll,
+                                UnescapeRule::SPACES, NULL, NULL, &offsets);
+  history::TermMatches new_matches =
+      InMemoryURLIndex::ReplaceOffsetsInTermMatches(history_match.url_matches,
+                                                    offsets);
+  match.contents_class = SpansFromTermMatch(new_matches, match.contents.size());
 
   // Format the description autocomplete presentation.
   match.description = info.title();
@@ -177,7 +208,7 @@ int HistoryQuickProvider::CalculateRelevance(int raw_score,
       return 1200;
 
     default:
-      return 400 + raw_score;
+      return raw_score;
   }
 }
 
@@ -196,12 +227,6 @@ ACMatchClassifications HistoryQuickProvider::SpansFromTermMatch(
   size_t match_count = matches.size();
   for (size_t i = 0; i < match_count;) {
     size_t offset = matches[i].offset;
-    // TODO(mrossetti): Remove the following 'if' when http://crbug.com/77210
-    // has been properly fixed. This guards against trying to highlight
-    // substrings which fall off the end of the string as a result of having
-    // encoded characters in the string.
-    if (offset >= text_length)
-      return spans;
     spans.push_back(ACMatchClassification(offset,
                                           ACMatchClassification::MATCH));
     // Skip all adjacent matches.
diff --git a/chrome/browser/autocomplete/history_quick_provider.h b/chrome/browser/autocomplete/history_quick_provider.h
index 3b0277a..996e4be 100644
--- a/chrome/browser/autocomplete/history_quick_provider.h
+++ b/chrome/browser/autocomplete/history_quick_provider.h
@@ -46,7 +46,9 @@ class HistoryQuickProvider : public HistoryProvider {
 
   AutocompleteMatch QuickMatchToACMatch(
       const history::ScoredHistoryMatch& history_match,
-      size_t match_number);
+      size_t match_number,
+      bool prevent_inline_autocomplete,
+      int* next_dont_inline_score);
 
   // Determines the relevance for some input, given its type and which match it
   // is.  If |match_type| is NORMAL, |match_number| is a number
diff --git a/chrome/browser/autocomplete/history_quick_provider_unittest.cc b/chrome/browser/autocomplete/history_quick_provider_unittest.cc
index 4ecf7f1..985340d 100644
--- a/chrome/browser/autocomplete/history_quick_provider_unittest.cc
+++ b/chrome/browser/autocomplete/history_quick_provider_unittest.cc
@@ -43,7 +43,7 @@ struct TestURLInfo {
   {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2, 0},
   {"http://news.google.com/", "Google News", 1, 1, 0},
   {"http://foo.com/", "Dir", 5, 5, 0},
-  {"http://foo.com/dir/", "Dir", 2, 2, 0},
+  {"http://foo.com/dir/", "Dir", 2, 1, 10},
   {"http://foo.com/dir/another/", "Dir", 5, 1, 0},
   {"http://foo.com/dir/another/again/", "Dir", 10, 0, 0},
   {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2, 0},
@@ -62,12 +62,12 @@ struct TestURLInfo {
   {"http://daysagoest.com/x/c", "DC", 1, 1, 2},
   {"http://daysagoest.com/x/d", "DD", 1, 1, 3},
   {"http://daysagoest.com/y/e", "DE", 1, 1, 4},
-  {"http://abcdefghixyzjklmnopqrstuvw.com/a", "", 1, 1, 0},
+  {"http://abcdefghixyzjklmnopqrstuvw.com/a", "", 3, 1, 0},
   {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2, 0},
-  {"http://abcdefghijklxyzmnopqrstuvw.com/a", "", 1, 1, 0},
-  {"http://abcdefxyzghijklmnopqrstuvw.com/a", "", 1, 1, 0},
-  {"http://abcxyzdefghijklmnopqrstuvw.com/a", "", 1, 1, 0},
-  {"http://xyzabcdefghijklmnopqrstuvw.com/a", "", 1, 1, 0},
+  {"http://abcdefghijklxyzmnopqrstuvw.com/a", "", 3, 1, 0},
+  {"http://abcdefxyzghijklmnopqrstuvw.com/a", "", 3, 1, 0},
+  {"http://abcxyzdefghijklmnopqrstuvw.com/a", "", 3, 1, 0},
+  {"http://xyzabcdefghijklmnopqrstuvw.com/a", "", 3, 1, 0},
   {"http://cda.com/Dogs%20Cats%20Gorillas%20Sea%20Slugs%20and%20Mice",
    "Dogs & Cats & Mice", 1, 1, 0},
 };
@@ -220,11 +220,11 @@ TEST_F(HistoryQuickProviderTest, SimpleSingleMatch) {
 TEST_F(HistoryQuickProviderTest, MultiMatch) {
   string16 text(ASCIIToUTF16("foo"));
   std::vector<std::string> expected_urls;
-  // Scores high because of completion length.
+  // Scores high because of typed_count.
   expected_urls.push_back("http://foo.com/");
   // Scores high because of visit count.
-  expected_urls.push_back("http://foo.com/dir/another/again/");
-  // Scores high because of visit count but less match span.
+  expected_urls.push_back("http://foo.com/dir/another/");
+  // Scores high because of high visit count.
   expected_urls.push_back("http://foo.com/dir/another/again/myfile.html");
   RunTest(text, expected_urls, "http://foo.com/");
 }
@@ -278,12 +278,12 @@ TEST_F(HistoryQuickProviderTest, EncodingLimitMatch) {
   const size_t max_offset = url.size() - ((6 * 2) + 7);
   for (ACMatchClassifications::const_iterator citer = content.begin();
        citer != content.end(); ++citer)
-    EXPECT_GT(max_offset, citer->offset);
+    EXPECT_LT(citer->offset, max_offset);
   ACMatchClassifications description(ac_matches_[0].description_class);
   std::string page_title("Dogs & Cats & Mice");
-  for (ACMatchClassifications::const_iterator diter = content.begin();
-       diter != content.end(); ++diter)
-    EXPECT_GT(page_title.size(), diter->offset);
+  for (ACMatchClassifications::const_iterator diter = description.begin();
+       diter != description.end(); ++diter)
+    EXPECT_LT(diter->offset, page_title.size());
 }
 
 TEST_F(HistoryQuickProviderTest, Spans) {
diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc
index 67546a2..9085853 100644
--- a/chrome/browser/history/in_memory_url_index.cc
+++ b/chrome/browser/history/in_memory_url_index.cc
@@ -49,20 +49,11 @@ typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry
 
 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1;
 
-// Scoring constants.
-const float kOrderMaxValue = 50.0;
-const float kStartMaxValue = 50.0;
-const size_t kMaxSignificantStart = 20;
-const float kCompleteMaxValue = 100.0;
-const float kLastVisitMaxValue = 200.0;
-const base::TimeDelta kMaxSignificantDay = base::TimeDelta::FromDays(30);
-const float kVisitCountMaxValue = 100.0;
-const int kMaxSignificantVisits = 20;
-const float kTypedCountMaxValue = 300.0;
-const int kMaxSignificantTyped = 20;
-const float kMaxRawScore = kOrderMaxValue + kStartMaxValue + kCompleteMaxValue +
-    kLastVisitMaxValue + kVisitCountMaxValue + kTypedCountMaxValue;
-const float kMaxNormalizedRawScore = 1000.0;
+// Score ranges used to get a 'base' score for each of the scoring factors
+// (such as recency of last visit, times visited, times the URL was typed,
+// and the quality of the string match). There is a matching value range for
+// each of these scores for each factor.
+const int kScoreRank[] = { 1425, 1200, 900, 400 };
 
 ScoredHistoryMatch::ScoredHistoryMatch()
     : raw_score(0),
@@ -111,6 +102,38 @@ int AccumulateMatchLength(int total, const TermMatch& match) {
   return total + match.length;
 }
 
+// Converts a raw value for some particular scoring factor into a score
+// component for that factor.  The conversion function is piecewise linear, with
+// input values provided in |value_ranks| and resulting output scores from
+// |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]).  A score
+// cannot be higher than kScoreRank[0], and drops directly to 0 if lower than
+// kScoreRank[3].
+//
+// For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }.
+// Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the
+// linear function:
+//   score = m * value + b, where
+//   m = (kScoreRank[0] - kScoreRank[1]) / (value_ranks[0] - value_ranks[1])
+//   b = value_ranks[1]
+// Any value higher than 100 would be scored as if it were 100, and any value
+// lower than 10 scored 0.
+int ScoreForValue(int value, const int* value_ranks) {
+  int i = 0;
+  int rank_count = arraysize(kScoreRank);
+  while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ?
+         (value > value_ranks[i]) : (value < value_ranks[i])))
+    ++i;
+  if (i >= rank_count)
+    return 0;
+  int score = kScoreRank[i];
+  if (i > 0) {
+    score += (value - value_ranks[i]) *
+        (kScoreRank[i - 1] - kScoreRank[i]) /
+        (value_ranks[i - 1] - value_ranks[i]);
+  }
+  return score;
+}
+
 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)
     : history_dir_(history_dir),
       history_item_count_(0) {
@@ -342,6 +365,7 @@ ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
       // substring match, inserting those which pass in order by score.
       scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),
           AddHistoryMatch(*this, lower_terms)).ScoredMatches();
+
       // Select and sort only the top kMaxMatches results.
       if (scored_items.size() > AutocompleteProvider::kMaxMatches) {
         std::partial_sort(scored_items.begin(),
@@ -648,6 +672,33 @@ TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) {
 }
 
 // static
+std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches(
+    const TermMatches& matches) {
+  std::vector<size_t> offsets;
+  for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i)
+    offsets.push_back(i->offset);
+  return offsets;
+}
+
+// static
+TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches(
+    const TermMatches& matches,
+    const std::vector<size_t>& offsets) {
+  DCHECK_EQ(matches.size(), offsets.size());
+  TermMatches new_matches;
+  std::vector<size_t>::const_iterator offset_iter = offsets.begin();
+  for (TermMatches::const_iterator term_iter = matches.begin();
+       term_iter != matches.end(); ++term_iter, ++offset_iter) {
+    if (*offset_iter != string16::npos) {
+      TermMatch new_match(*term_iter);
+      new_match.offset = *offset_iter;
+      new_matches.push_back(new_match);
+    }
+  }
+  return new_matches;
+}
+
+// static
 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
     const URLRow& row,
     const String16Vector& terms) {
@@ -688,52 +739,61 @@ ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
   // Get partial scores based on term matching. Note that the score for
   // each of the URL and title are adjusted by the fraction of the
   // terms appearing in each.
-  int url_score = RawScoreForMatches(match.url_matches, url.size()) *
+  int url_score = ScoreComponentForMatches(match.url_matches, url.size()) *
       match.url_matches.size() / terms.size();
-  int title_score = RawScoreForMatches(match.title_matches, title.size()) *
-      match.title_matches.size() / terms.size();
+  int title_score =
+      ScoreComponentForMatches(match.title_matches, title.size()) *
+      static_cast<int>(match.title_matches.size()) /
+      static_cast<int>(terms.size());
   // Arbitrarily pick the best.
   // TODO(mrossetti): It might make sense that a term which appears in both the
   // URL and the Title should boost the score a bit.
   int term_score = std::max(url_score, title_score);
+  if (term_score == 0)
+    return match;
+
+  // Factor in recency of visit, visit count and typed count attributes of the
+  // URLRow.
+  const int kDaysAgoLevel[] = { 0, 10, 20, 30 };
+  int score = ScoreForValue((base::Time::Now() -
+      row.last_visit()).InDays(), kDaysAgoLevel);
+  const int kVisitCountLevel[] = { 30, 10, 5, 3 };
+  int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel);
+  const int kTypedCountLevel[] = { 10, 5, 3, 1 };
+  int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel);
+
+  // Determine how many of the factors comprising the final score are
+  // significant by summing the relative factors for each and subtracting how
+  // many will be 'discarded' even if they are low.
+  const int kVisitCountMultiplier = 2;
+  const int kTypedCountMultiplier = 3;
+  const int kSignificantFactors =
+      kVisitCountMultiplier +  // Visit count factor plus
+      kTypedCountMultiplier +  // typed count factor plus
+      2 -                      // one each for string match and last visit
+      2;                       // minus 2 insignificant factors.
+  // The following, in effect, discards up to |kSignificantFactors| low scoring
+  // elements which contribute little to the score but which can inordinately
+  // drag down an otherwise good score.
+  match.raw_score = std::min(kScoreRank[0], (term_score + score +
+      (visit_count_value * kVisitCountMultiplier) + (typed_count_value *
+      kTypedCountMultiplier)) / kSignificantFactors);
 
-  // Factor in attributes of the URLRow.
-  // Items which have been visited recently score higher.
-  int64 delta_time = (kMaxSignificantDay -
-      std::min((base::Time::Now() - row.last_visit()),
-               kMaxSignificantDay)).ToInternalValue();
-  float last_visit_value =
-      (static_cast<float>(delta_time) /
-       static_cast<float>(kMaxSignificantDay.ToInternalValue())) *
-      kLastVisitMaxValue;
-
-  float visit_count_value =
-      (static_cast<float>(std::min(row.visit_count(),
-       kMaxSignificantVisits))) / static_cast<float>(kMaxSignificantVisits) *
-      kVisitCountMaxValue;
-
-  float typed_count_value =
-      (static_cast<float>(std::min(row.typed_count(),
-       kMaxSignificantTyped))) / static_cast<float>(kMaxSignificantTyped) *
-      kTypedCountMaxValue;
-
-  float raw_score = term_score + last_visit_value + visit_count_value +
-                    typed_count_value;
-
-  // Normalize the score.
-  match.raw_score = static_cast<int>((raw_score / kMaxRawScore) *
-                                     kMaxNormalizedRawScore);
   return match;
 }
 
-int InMemoryURLIndex::RawScoreForMatches(const TermMatches& matches,
-                                         size_t max_length) {
+int InMemoryURLIndex::ScoreComponentForMatches(const TermMatches& matches,
+                                               size_t max_length) {
   // TODO(mrossetti): This is good enough for now but must be fine-tuned.
   if (matches.empty())
     return 0;
 
-  // Search terms appearing in order score higher.
-  float order_value = 10.0;
+  // Score component for whether the input terms (if more than one) were found
+  // in the same order in the match.  Start with kOrderMaxValue points divided
+  // equally among (number of terms - 1); then discount each of those terms that
+  // is out-of-order in the match.
+  const int kOrderMaxValue = 250;
+  int order_value = kOrderMaxValue;
   if (matches.size() > 1) {
     int max_possible_out_of_order = matches.size() - 1;
     int out_of_order = 0;
@@ -741,33 +801,42 @@ int InMemoryURLIndex::RawScoreForMatches(const TermMatches& matches,
       if (matches[i - 1].term_num > matches[i].term_num)
         ++out_of_order;
     }
-    order_value =
-        (static_cast<float>(max_possible_out_of_order - out_of_order) /
-         max_possible_out_of_order) * kOrderMaxValue;
+    order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue /
+        max_possible_out_of_order;
   }
 
-  // Search terms which appear earlier score higher.
-  // No points if the first search term does not appear in the first
-  // kMaxSignificantStart chars.
-  float start_value =
-      (static_cast<float>(kMaxSignificantStart -
-       std::min(kMaxSignificantStart, matches[0].offset))) /
-      static_cast<float>(kMaxSignificantStart) * kStartMaxValue;
-
-  // Search terms which comprise a greater portion score higher.
+  // Score component for how early in the match string the first search term
+  // appears.  Start with kStartMaxValue points and discount by
+  // 1/kMaxSignificantStart points for each character later than the first at
+  // which the term begins. No points are earned if the start of the match
+  // occurs at or after kMaxSignificantStart.
+  const size_t kMaxSignificantStart = 20;
+  const int kStartMaxValue = 250;
+  int start_value = (kMaxSignificantStart -
+      std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue /
+      kMaxSignificantStart;
+
+  // Score component for how much of the matched string the input terms cover.
+  // kCompleteMaxValue points times the fraction of the URL/page title string
+  // that was matched.
   size_t term_length_total = std::accumulate(matches.begin(), matches.end(),
                                              0, AccumulateMatchLength);
-  float complete_value =
-      (static_cast<float>(term_length_total) / static_cast<float>(max_length)) *
-      kStartMaxValue;
-  return static_cast<int>(order_value + start_value + complete_value);
+  const int kCompleteMaxValue = 500;
+  int complete_value = term_length_total * kCompleteMaxValue / max_length;
+
+  int raw_score = order_value + start_value + complete_value;
+  const int kTermScoreLevel[] = { 1000, 650, 500, 200 };
+
+  // Scale the sum of the three components above into a single score component
+  // on the same scale as that used in ScoredMatchForURL().
+  return ScoreForValue(raw_score, kTermScoreLevel);
 }
 
 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch(
     const InMemoryURLIndex& index,
     const String16Vector& lower_terms)
-    : index_(index),
-      lower_terms_(lower_terms) {
+  : index_(index),
+    lower_terms_(lower_terms) {
 }
 
 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {}
diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h
index e50b07f..cf02970 100644
--- a/chrome/browser/history/in_memory_url_index.h
+++ b/chrome/browser/history/in_memory_url_index.h
@@ -165,6 +165,15 @@ class InMemoryURLIndex {
   static String16Vector WordVectorFromString16(const string16& uni_string,
                                                bool break_on_space);
 
+  // Extract and return the offsets from |matches|.
+  static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches);
+
+  // Replace the offsets in |matches| with those given in |offsets|, deleting
+  // any which are npos, and return the updated list of matches.
+  static TermMatches ReplaceOffsetsInTermMatches(
+      const TermMatches& matches,
+      const std::vector<size_t>& offsets);
+
  private:
   friend class AddHistoryMatch;
   FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization);
@@ -329,11 +338,11 @@ class InMemoryURLIndex {
       const URLRow& row,
       const String16Vector& terms_vector);
 
-  // Calculates a partial raw score based on position, ordering and total
+  // Calculates a component score based on position, ordering and total
   // substring match size using metrics recorded in |matches|. |max_length|
   // is the length of the string against which the terms are being searched.
-  static int RawScoreForMatches(const TermMatches& matches,
-                                size_t max_length);
+  static int ScoreComponentForMatches(const TermMatches& matches,
+                                      size_t max_length);
 
   // Sorts and removes overlapping substring matches from |matches| and
   // returns the cleaned up matches.
diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc
index dee196c..37e64fb 100644
--- a/chrome/browser/history/in_memory_url_index_unittest.cc
+++ b/chrome/browser/history/in_memory_url_index_unittest.cc
@@ -204,7 +204,7 @@ TEST_F(InMemoryURLIndexTest, Retrieval) {
   matches = url_index_->HistoryItemsForTerms(terms);
   ASSERT_EQ(2U, matches.size());
   // The results should be in descending score order.
-  EXPECT_GT(matches[0].raw_score, matches[1].raw_score);
+  EXPECT_GE(matches[0].raw_score, matches[1].raw_score);
 
   // Search which should result in nearly perfect result.
   terms.clear();
@@ -228,7 +228,7 @@ TEST_F(InMemoryURLIndexTest, Retrieval) {
   matches = url_index_->HistoryItemsForTerms(terms);
   ASSERT_EQ(1U, matches.size());
   // The results should have a poor score.
-  EXPECT_LT(matches[0].raw_score, 200);
+  EXPECT_LT(matches[0].raw_score, 500);
   EXPECT_EQ(33, matches[0].url_info.id());
   EXPECT_EQ("http://quiteuselesssearchresultxyz.com/",
             matches[0].url_info.url().spec());  // Note: URL gets lowercased.
@@ -263,7 +263,7 @@ TEST_F(InMemoryURLIndexTest, TitleSearch) {
   url_index_.reset(new InMemoryURLIndex());
   url_index_->Init(this, "en,ja,hi,zh");
   // Signal if someone has changed the test DB.
-  EXPECT_EQ(28U, url_index_->history_info_map_.size());
+  EXPECT_EQ(25U, url_index_->history_info_map_.size());
   InMemoryURLIndex::String16Vector terms;
 
   // Ensure title is being searched.
@@ -367,6 +367,31 @@ TEST_F(InMemoryURLIndexTest, StaticFunctions) {
     EXPECT_EQ(expected_offsets[i], matches_c[i].offset);
 }
 
+TEST_F(InMemoryURLIndexTest, OffsetsAndTermMatches) {
+  // Test OffsetsFromTermMatches
+  history::TermMatches matches_a;
+  matches_a.push_back(history::TermMatch(1, 1, 2));
+  matches_a.push_back(history::TermMatch(2, 4, 3));
+  matches_a.push_back(history::TermMatch(3, 9, 1));
+  matches_a.push_back(history::TermMatch(3, 10, 1));
+  matches_a.push_back(history::TermMatch(4, 14, 5));
+  std::vector<size_t> offsets =
+      InMemoryURLIndex::OffsetsFromTermMatches(matches_a);
+  const size_t expected_offsets_a[] = {1, 4, 9, 10, 14};
+  ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a));
+  for (size_t i = 0; i < offsets.size(); ++i)
+    EXPECT_EQ(expected_offsets_a[i], offsets[i]);
+
+  // Test ReplaceOffsetsInTermMatches
+  offsets[2] = string16::npos;
+  history::TermMatches matches_b =
+      InMemoryURLIndex::ReplaceOffsetsInTermMatches(matches_a, offsets);
+  const size_t expected_offsets_b[] = {1, 4, 10, 14};
+  ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size());
+  for (size_t i = 0; i < matches_b.size(); ++i)
+    EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset);
+}
+
 TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) {
   // Verify that match results for previously typed characters are retained
   // (in the term_char_word_set_cache_) and reused, if possible, in future
@@ -416,7 +441,7 @@ TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) {
 }
 
 TEST_F(InMemoryURLIndexTest, Scoring) {
-  URLRow row_a(MakeURLRow("http://abcdef", "fedcba", 20, 0, 20));
+  URLRow row_a(MakeURLRow("http://abcdef", "fedcba", 3, 30, 1));
   // Test scores based on position.
   ScoredHistoryMatch scored_a(
       InMemoryURLIndex::ScoredMatchForURL(row_a, Make1Term("abc")));
@@ -434,20 +459,20 @@ TEST_F(InMemoryURLIndexTest, Scoring) {
       InMemoryURLIndex::ScoredMatchForURL(row_a, Make2Terms("def", "abc")));
   EXPECT_GT(scored_d.raw_score, scored_e.raw_score);
   // Test scores based on visit_count.
-  URLRow row_b(MakeURLRow("http://abcdef", "fedcba", 10, 0, 20));
+  URLRow row_b(MakeURLRow("http://abcdef", "fedcba", 10, 30, 1));
   ScoredHistoryMatch scored_f(
       InMemoryURLIndex::ScoredMatchForURL(row_b, Make1Term("abc")));
-  EXPECT_GT(scored_a.raw_score, scored_f.raw_score);
+  EXPECT_GT(scored_f.raw_score, scored_a.raw_score);
   // Test scores based on last_visit.
-  URLRow row_c(MakeURLRow("http://abcdef", "fedcba", 20, 2, 20));
+  URLRow row_c(MakeURLRow("http://abcdef", "fedcba", 3, 10, 1));
   ScoredHistoryMatch scored_g(
       InMemoryURLIndex::ScoredMatchForURL(row_c, Make1Term("abc")));
-  EXPECT_GT(scored_a.raw_score, scored_g.raw_score);
+  EXPECT_GT(scored_g.raw_score, scored_a.raw_score);
   // Test scores based on typed_count.
-  URLRow row_d(MakeURLRow("http://abcdef", "fedcba", 20, 0, 10));
+  URLRow row_d(MakeURLRow("http://abcdef", "fedcba", 3, 30, 10));
   ScoredHistoryMatch scored_h(
       InMemoryURLIndex::ScoredMatchForURL(row_d, Make1Term("abc")));
-  EXPECT_GT(scored_a.raw_score, scored_h.raw_score);
+  EXPECT_GT(scored_h.raw_score, scored_a.raw_score);
 }
 
 TEST_F(InMemoryURLIndexTest, AddNewRows) {
diff --git a/chrome/test/data/History/url_history_provider_test.db.txt b/chrome/test/data/History/url_history_provider_test.db.txt
index 26be362..5fd7821 100644
--- a/chrome/test/data/History/url_history_provider_test.db.txt
+++ b/chrome/test/data/History/url_history_provider_test.db.txt
@@ -26,36 +26,33 @@
   The ordering, URLs and titles must be kept in sync with the unit tests found
   in in_memory_url_index_unittest.cc.
 */
-INSERT INTO "urls" VALUES(1,'http://www.reuters.com/article/idUSN0839880620100708','UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters',3,1,2,0,29);
+INSERT INTO "urls" VALUES(1,'http://www.reuters.com/article/idUSN0839880620100708','UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters',3,1,2,0,29);  // Qualifies
 INSERT INTO "urls" VALUES(2,'http://www.golfweek.com/news/2010/jul/08/goydos-opens-john-deere-classic-59/','Goydos opens John Deere Classic with 59',3,1,4,0,27);
-INSERT INTO "urls" VALUES(3,'http://www.businessandmedia.org/articles/2010/20100708120415.aspx','LeBronomics: Could High Taxes Influence James'' Team Decision?',4,1,2,0,28);
-INSERT INTO "urls" VALUES(4,'http://www.realclearmarkets.com/articles/2010/07/08/diversity_in_the_financial_sector_98562.html','RealClearMarkets - Racial, Gender Quotas in the Financial Bill?',4,1,4,0,0);
-INSERT INTO "urls" VALUES(5,'http://drudgereport.com/','DRUDGE REPORT 2010',3,2,2,0,0);
-INSERT INTO "urls" VALUES(6,'http://totalfinder.binaryage.com/','TotalFinder brings tabs to your native Finder and more!',3,2,4,0,26);
-INSERT INTO "urls" VALUES(7,'http://www.clickgamer.com/products/pid_5269/screenshots/j2me/large/ingame5.png','ingame5.png 176×208 pixels',4,2,2,0,21);
-INSERT INTO "urls" VALUES(8,'http://getsharekit.com/','ShareKit : Drop-in Share Features for all iOS Apps',4,2,4,0,20);
+INSERT INTO "urls" VALUES(3,'http://www.businessandmedia.org/articles/2010/20100708120415.aspx','LeBronomics: Could High Taxes Influence James'' Team Decision?',4,1,2,0,28);  // Qualifies
+INSERT INTO "urls" VALUES(4,'http://www.realclearmarkets.com/articles/2010/07/08/diversity_in_the_financial_sector_98562.html','RealClearMarkets - Racial, Gender Quotas in the Financial Bill?',4,1,4,0,0);  // Qualifies
+INSERT INTO "urls" VALUES(5,'http://drudgereport.com/','DRUDGE REPORT 2010',3,2,2,0,0);  // Qualifies
+INSERT INTO "urls" VALUES(6,'http://totalfinder.binaryage.com/','TotalFinder brings tabs to your native Finder and more!',3,2,4,0,26);  // Qualifies
+INSERT INTO "urls" VALUES(8,'http://getsharekit.com/','ShareKit : Drop-in Share Features for all iOS Apps',4,2,4,0,20);  // Qualifies
 INSERT INTO "urls" VALUES(9,'http://en.wikipedia.org/wiki/Control-Z','Control-Z - Wikipedia, the free encyclopedia',0,0,6,0,0);
 INSERT INTO "urls" VALUES(10,'http://vmware.com/info?id=724','VMware Account Management Login',1,0,6,0,0);
 INSERT INTO "urls" VALUES(11,'http://www.tech-recipes.com/rx/2621/os_x_change_path_environment_variable/','OS X: Change your PATH environment variable | Mac system administration | Tech-Recipes',0,1,6,0,14);
-INSERT INTO "urls" VALUES(12,'http://view.atdmt.com/PPJ/iview/194841301/direct;wi.160;hi.600/01?click=','',6,6,0,1,0);
-INSERT INTO "urls" VALUES(13,'http://www.itmedia.co.jp/news/','ITmedia News／ITの今が見える、明日が分かる',6,6,0,0,25);
-INSERT INTO "urls" VALUES(14,'http://www.nikkei.co.jp/','NIKKEI NET（日経ネット）は日本経済新聞 電子版に生まれ変わりました',6,6,0,0,73);
-INSERT INTO "urls" VALUES(15,'http://www.cnn.com/','CNN.com International - Breaking, World, Business, Sports, Entertainment and Video News',6,6,0,0,89);
-INSERT INTO "urls" VALUES(16,'http://www.zdnet.com/','Technology News, Analysis, Comments and Product Reviews for IT Professionals | ZDNet',6,6,0,0,652);
-INSERT INTO "urls" VALUES(17,'http://www.crash.net/','Crash.Net | Formula 1 & MotoGP | Motorsport News',6,6,0,0,239);
-INSERT INTO "urls" VALUES(18,'http://www.theinquirer.net/','THE INQUIRER - Microprocessor, Server, Memory, PCS, Graphics, Networking, Storage',6,6,0,0,79);
-INSERT INTO "urls" VALUES(19,'http://www.theregister.co.uk/','The Register: Sci/Tech News for the World',6,6,0,0,74);
-INSERT INTO "urls" VALUES(20,'http://blogs.technet.com/markrussinovich/','Mark''s Blog - Site Home - TechNet Blogs',6,6,0,0,685);
-INSERT INTO "urls" VALUES(21,'http://www.icu-project.org/','ICU Home Page (ICU - International Components for Unicode)',6,6,0,0,445);
-INSERT INTO "urls" VALUES(22,'http://site.icu-project.org/','ICU Home Page (ICU - International Components for Unicode)',6,6,0,0,445);
-INSERT INTO "urls" VALUES(23,'http://icu-project.org/apiref/icu4c/','ICU 4.2: Main Page',6,6,0,0,212);
-INSERT INTO "urls" VALUES(24,'http://www.danilatos.com/event-test/ExperimentTest.html','Experimentation Harness',6,6,0,0,0);
-INSERT INTO "urls" VALUES(25,'http://www.codeguru.com/','CodeGuru : codeguru',6,6,0,0,110);
-INSERT INTO "urls" VALUES(26,'http://www.codeproject.com/','Your Development Resource - CodeProject',6,6,0,0,369);
-INSERT INTO "urls" VALUES(27,'http://www.tomshardware.com/us/#redir','Tom''s Hardware: Hardware News, Tests and Reviews',6,6,0,0,65);
-INSERT INTO "urls" VALUES(28,'http://www.ddj.com/windows/184416623','Dr. Dobb''s | Avoiding the Visual C++ Runtime Library | 2 1, 2003',6,6,0,0,0);
-INSERT INTO "urls" VALUES(29,'http://svcs.cnn.com/weather/getForecast?time=34&mode=json_html&zipCode=336736767676&locCode=EGLL&celcius=true&csiID=csi2','',6,6,0,1,0);
-INSERT INTO "urls" VALUES(30,'http://www.drudgery.com/Dogs%20Cats%20Gorillas%20Dolphine%20Sea%20Slugsand%20Mice','Life in the Slow Lane',3,2,2,0,0);
+INSERT INTO "urls" VALUES(12,'http://view.atdmt.com/PPJ/iview/194841301/direct;wi.160;hi.600/01?click=','',6,6,0,1,0);  // Qualifies
+INSERT INTO "urls" VALUES(15,'http://www.cnn.com/','CNN.com International - Breaking, World, Business, Sports, Entertainment and Video News',6,6,0,0,89);  // Qualifies
+INSERT INTO "urls" VALUES(16,'http://www.zdnet.com/','Technology News, Analysis, Comments and Product Reviews for IT Professionals | ZDNet',6,6,0,0,652);  // Qualifies
+INSERT INTO "urls" VALUES(17,'http://www.crash.net/','Crash.Net | Formula 1 & MotoGP | Motorsport News',6,6,0,0,239);  // Qualifies
+INSERT INTO "urls" VALUES(18,'http://www.theinquirer.net/','THE INQUIRER - Microprocessor, Server, Memory, PCS, Graphics, Networking, Storage',6,6,0,0,79);  // Qualifies
+INSERT INTO "urls" VALUES(19,'http://www.theregister.co.uk/','The Register: Sci/Tech News for the World',6,6,0,0,74);  // Qualifies
+INSERT INTO "urls" VALUES(20,'http://blogs.technet.com/markrussinovich/','Mark''s Blog - Site Home - TechNet Blogs',6,6,0,0,685);  // Qualifies
+INSERT INTO "urls" VALUES(21,'http://www.icu-project.org/','ICU Home Page (ICU - International Components for Unicode)',6,6,0,0,445);  // Qualifies
+INSERT INTO "urls" VALUES(22,'http://site.icu-project.org/','ICU Home Page (ICU - International Components for Unicode)',6,6,0,0,445);  // Qualifies
+INSERT INTO "urls" VALUES(23,'http://icu-project.org/apiref/icu4c/','ICU 4.2: Main Page',6,6,0,0,212);  // Qualifies
+INSERT INTO "urls" VALUES(24,'http://www.danilatos.com/event-test/ExperimentTest.html','Experimentation Harness',6,6,0,0,0);  // Qualifies
+INSERT INTO "urls" VALUES(25,'http://www.codeguru.com/','CodeGuru : codeguru',6,6,0,0,110);  // Qualifies
+INSERT INTO "urls" VALUES(26,'http://www.codeproject.com/','Your Development Resource - CodeProject',6,6,0,0,369);  // Qualifies
+INSERT INTO "urls" VALUES(27,'http://www.tomshardware.com/us/#redir','Tom''s Hardware: Hardware News, Tests and Reviews',6,6,0,0,65);  // Qualifies
+INSERT INTO "urls" VALUES(28,'http://www.ddj.com/windows/184416623','Dr. Dobb''s | Avoiding the Visual C++ Runtime Library | 2 1, 2003',6,6,0,0,0);  // Qualifies
+INSERT INTO "urls" VALUES(29,'http://svcs.cnn.com/weather/getForecast?time=34&mode=json_html&zipCode=336736767676&locCode=EGLL&celcius=true&csiID=csi2','',6,6,0,1,0);  // Qualifies
+INSERT INTO "urls" VALUES(30,'http://www.drudgery.com/Dogs%20and%20Mice','Life in the Slow Lane',8,2,2,0,0);  // Qualifies
 INSERT INTO "urls" VALUES(31,'http://www.redrudgerydo.com/','Music of the Wild Landscape',0,0,6,0,0);
-INSERT INTO "urls" VALUES(32,'https://NearlyPerfectResult.com/','Practically Perfect Search Result',99,99,0,0,0);
-INSERT INTO "urls" VALUES(33,'http://QuiteUselessSearchResultxyz.com/','Practically Useless Search Result',4,0,99,0,0);
+INSERT INTO "urls" VALUES(32,'https://NearlyPerfectResult.com/','Practically Perfect Search Result',99,99,0,0,0);  // Qualifies
+INSERT INTO "urls" VALUES(33,'http://QuiteUselessSearchResultxyz.com/','Practically Useless Search Result',4,0,99,0,0);  // Qualifies
author	mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-04-20 04:24:55 +0000
committer	mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-04-20 04:24:55 +0000
commit	c99037d1bad96b729e015a84ca19e5a144689ccd (patch)
tree	90fde2ca30a50cd28b60e8368e4a4ca94650e32f /chrome
parent	456c8047e4c8cea12174f649e209798e11d83781 (diff)
download	chromium_src-c99037d1bad96b729e015a84ca19e5a144689ccd.zip chromium_src-c99037d1bad96b729e015a84ca19e5a144689ccd.tar.gz chromium_src-c99037d1bad96b729e015a84ca19e5a144689ccd.tar.bz2