// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "components/omnibox/browser/bookmark_provider.h" #include <algorithm> #include <functional> #include <vector> #include "base/macros.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "components/bookmarks/browser/bookmark_match.h" #include "components/bookmarks/browser/bookmark_model.h" #include "components/metrics/proto/omnibox_input_type.pb.h" #include "components/omnibox/browser/autocomplete_provider_client.h" #include "components/omnibox/browser/autocomplete_result.h" #include "components/omnibox/browser/history_provider.h" #include "components/omnibox/browser/url_prefix.h" #include "components/prefs/pref_service.h" #include "components/url_formatter/url_formatter.h" #include "url/url_constants.h" using bookmarks::BookmarkMatch; using bookmarks::BookmarkNode; typedef std::vector<BookmarkMatch> BookmarkMatches; namespace { // Removes leading spaces from |title| before displaying, otherwise it looks // funny. In the process, corrects |title_match_positions| so the correct // characters are highlighted. void CorrectTitleAndMatchPositions( base::string16* title, BookmarkMatch::MatchPositions* title_match_positions) { size_t leading_whitespace_chars = title->length(); base::TrimWhitespace(*title, base::TRIM_LEADING, title); leading_whitespace_chars-= title->length(); if (leading_whitespace_chars == 0) return; for (query_parser::Snippet::MatchPositions::iterator it = title_match_positions->begin(); it != title_match_positions->end(); ++it) { (*it) = query_parser::Snippet::MatchPosition( it->first - leading_whitespace_chars, it->second - leading_whitespace_chars); } } } // namespace // BookmarkProvider ------------------------------------------------------------ BookmarkProvider::BookmarkProvider(AutocompleteProviderClient* client) : AutocompleteProvider(AutocompleteProvider::TYPE_BOOKMARK), client_(client), bookmark_model_(NULL) { if (client_) { bookmark_model_ = client_->GetBookmarkModel(); languages_ = client_->GetAcceptLanguages(); } } void BookmarkProvider::Start(const AutocompleteInput& input, bool minimal_changes) { if (minimal_changes) return; matches_.clear(); if (input.from_omnibox_focus() || input.text().empty() || (input.type() == metrics::OmniboxInputType::FORCED_QUERY)) return; DoAutocomplete(input); } BookmarkProvider::~BookmarkProvider() {} void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input) { // We may not have a bookmark model for some unit tests. if (!bookmark_model_) return; BookmarkMatches matches; // Retrieve enough bookmarks so that we have a reasonable probability of // suggesting the one that the user desires. const size_t kMaxBookmarkMatches = 50; // GetBookmarksMatching returns bookmarks matching the user's // search terms using the following rules: // - The search text is broken up into search terms. Each term is searched // for separately. // - Term matches are always performed against the start of a word. 'def' // will match against 'define' but not against 'indefinite'. // - Terms must be at least three characters in length in order to perform // partial word matches. Any term of lesser length will only be used as an // exact match. 'def' will match against 'define' but 'de' will not match. // - A search containing multiple terms will return results with those words // occuring in any order. // - Terms enclosed in quotes comprises a phrase that must match exactly. // - Multiple terms enclosed in quotes will require those exact words in that // exact order to match. // // Please refer to the code for BookmarkIndex::GetBookmarksMatching for // complete details of how searches are performed against the user's // bookmarks. bookmark_model_->GetBookmarksMatching(input.text(), kMaxBookmarkMatches, &matches); if (matches.empty()) return; // There were no matches. const base::string16 fixed_up_input(FixupUserInput(input).second); for (BookmarkMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) { // Create and score the AutocompleteMatch. If its score is 0 then the // match is discarded. AutocompleteMatch match(BookmarkMatchToACMatch(input, fixed_up_input, *i)); if (match.relevance > 0) matches_.push_back(match); } // Sort and clip the resulting matches. size_t num_matches = std::min(matches_.size(), AutocompleteProvider::kMaxMatches); std::partial_sort(matches_.begin(), matches_.begin() + num_matches, matches_.end(), AutocompleteMatch::MoreRelevant); matches_.resize(num_matches); } namespace { // for_each helper functor that calculates a match factor for each query term // when calculating the final score. // // Calculate a 'factor' from 0 to the bookmark's title length for a match // based on 1) how many characters match and 2) where the match is positioned. class ScoringFunctor { public: // |title_length| is the length of the bookmark title against which this // match will be scored. explicit ScoringFunctor(size_t title_length) : title_length_(static_cast<double>(title_length)), scoring_factor_(0.0) { } void operator()(const query_parser::Snippet::MatchPosition& match) { double term_length = static_cast<double>(match.second - match.first); scoring_factor_ += term_length * (title_length_ - match.first) / title_length_; } double ScoringFactor() { return scoring_factor_; } private: double title_length_; double scoring_factor_; }; } // namespace AutocompleteMatch BookmarkProvider::BookmarkMatchToACMatch( const AutocompleteInput& input, const base::string16& fixed_up_input_text, const BookmarkMatch& bookmark_match) { // The AutocompleteMatch we construct is non-deletable because the only // way to support this would be to delete the underlying bookmark, which is // unlikely to be what the user intends. AutocompleteMatch match(this, 0, false, AutocompleteMatchType::BOOKMARK_TITLE); base::string16 title(bookmark_match.node->GetTitle()); BookmarkMatch::MatchPositions new_title_match_positions = bookmark_match.title_match_positions; CorrectTitleAndMatchPositions(&title, &new_title_match_positions); const GURL& url(bookmark_match.node->url()); const base::string16& url_utf16 = base::UTF8ToUTF16(url.spec()); size_t inline_autocomplete_offset = URLPrefix::GetInlineAutocompleteOffset( input.text(), fixed_up_input_text, false, url_utf16); match.destination_url = url; const size_t match_start = bookmark_match.url_match_positions.empty() ? 0 : bookmark_match.url_match_positions[0].first; const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()) && ((match_start == base::string16::npos) || (match_start != 0)); std::vector<size_t> offsets = BookmarkMatch::OffsetsFromMatchPositions( bookmark_match.url_match_positions); // In addition to knowing how |offsets| is transformed, we need to know how // |inline_autocomplete_offset| is transformed. We add it to the end of // |offsets|, compute how everything is transformed, then remove it from the // end. offsets.push_back(inline_autocomplete_offset); match.contents = url_formatter::FormatUrlWithOffsets( url, languages_, url_formatter::kFormatUrlOmitAll & ~(trim_http ? 0 : url_formatter::kFormatUrlOmitHTTP), net::UnescapeRule::SPACES, nullptr, nullptr, &offsets); inline_autocomplete_offset = offsets.back(); offsets.pop_back(); BookmarkMatch::MatchPositions new_url_match_positions = BookmarkMatch::ReplaceOffsetsInMatchPositions( bookmark_match.url_match_positions, offsets); match.contents_class = ClassificationsFromMatch(new_url_match_positions, match.contents.size(), true); match.fill_into_edit = AutocompleteInput::FormattedStringWithEquivalentMeaning( url, match.contents, client_->GetSchemeClassifier()); if (inline_autocomplete_offset != base::string16::npos) { // |inline_autocomplete_offset| may be beyond the end of the // |fill_into_edit| if the user has typed an URL with a scheme and the // last character typed is a slash. That slash is removed by the // FormatURLWithOffsets call above. if (inline_autocomplete_offset < match.fill_into_edit.length()) { match.inline_autocompletion = match.fill_into_edit.substr(inline_autocomplete_offset); } match.allowed_to_be_default_match = match.inline_autocompletion.empty() || !HistoryProvider::PreventInlineAutocomplete(input); } match.description = title; match.description_class = ClassificationsFromMatch(bookmark_match.title_match_positions, match.description.size(), false); // Summary on how a relevance score is determined for the match: // // For each match within the bookmark's title or URL (or both), calculate a // 'factor', sum up those factors, then use the sum to figure out a value // between the base score and the maximum score. // // The factor for each match is the product of: // // 1) how many characters in the bookmark's title/URL are part of this match. // This is capped at the length of the bookmark's title // to prevent terms that match in both the title and the URL from // scoring too strongly. // // 2) where the match occurs within the bookmark's title or URL, // giving more points for matches that appear earlier in the string: // ((string_length - position of match start) / string_length). // // Example: Given a bookmark title of 'abcde fghijklm', with a title length // of 14, and two different search terms, 'abcde' and 'fghij', with // start positions of 0 and 6, respectively, 'abcde' will score higher // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with // a partial factor of (14-6)/14 = 0.571 ). (In this example neither // term matches in the URL.) // // Once all match factors have been calculated they are summed. If there // are no URL matches, the resulting sum will never be greater than the // length of the bookmark title because of the way the bookmark model matches // and removes overlaps. (In particular, the bookmark model only // matches terms to the beginning of words and it removes all overlapping // matches, keeping only the longest. Together these mean that each // character is included in at most one match.) If there are matches in the // URL, the sum can be greater. // // This sum is then normalized by the length of the bookmark title + 10 // and capped at 1.0. The +10 is to expand the scoring range so fewer // bookmarks will hit the 1.0 cap and hence lose all ability to distinguish // between these high-quality bookmarks. // // The normalized value is multiplied against the scoring range available, // which is the difference between the minimum possible score and the maximum // possible score. This product is added to the minimum possible score to // give the preliminary score. // // If the preliminary score is less than the maximum possible score, 1199, // it can be boosted up to that maximum possible score if the URL referenced // by the bookmark is also referenced by any of the user's other bookmarks. // A count of how many times the bookmark's URL is referenced is determined // and, for each additional reference beyond the one for the bookmark being // scored up to a maximum of three, the score is boosted by a fixed amount // given by |kURLCountBoost|, below. // // Pretend empty titles are identical to the URL. if (title.empty()) title = base::ASCIIToUTF16(url.spec()); ScoringFunctor title_position_functor = for_each(bookmark_match.title_match_positions.begin(), bookmark_match.title_match_positions.end(), ScoringFunctor(title.size())); ScoringFunctor url_position_functor = for_each(bookmark_match.url_match_positions.begin(), bookmark_match.url_match_positions.end(), ScoringFunctor(bookmark_match.node->url().spec().length())); const double title_match_strength = title_position_functor.ScoringFactor(); const double summed_factors = title_match_strength + url_position_functor.ScoringFactor(); const double normalized_sum = std::min(summed_factors / (title.size() + 10), 1.0); // Bookmarks with javascript scheme ("bookmarklets") that do not have title // matches get a lower base and lower maximum score because returning them // for matches in their (often very long) URL looks stupid and is often not // intended by the user. const bool bookmarklet_without_title_match = url.SchemeIs(url::kJavaScriptScheme) && (title_match_strength == 0.0); const int kBaseBookmarkScore = bookmarklet_without_title_match ? 400 : 900; const int kMaxBookmarkScore = bookmarklet_without_title_match ? 799 : 1199; const double kBookmarkScoreRange = static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore); match.relevance = static_cast<int>(normalized_sum * kBookmarkScoreRange) + kBaseBookmarkScore; // Don't waste any time searching for additional referenced URLs if we // already have a perfect title match. if (match.relevance >= kMaxBookmarkScore) return match; // Boost the score if the bookmark's URL is referenced by other bookmarks. const int kURLCountBoost[4] = { 0, 75, 125, 150 }; std::vector<const BookmarkNode*> nodes; bookmark_model_->GetNodesByURL(url, &nodes); DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U); match.relevance += kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1]; match.relevance = std::min(kMaxBookmarkScore, match.relevance); return match; } // static ACMatchClassifications BookmarkProvider::ClassificationsFromMatch( const query_parser::Snippet::MatchPositions& positions, size_t text_length, bool is_url) { ACMatchClassification::Style url_style = is_url ? ACMatchClassification::URL : ACMatchClassification::NONE; ACMatchClassifications classifications; if (positions.empty()) { if (text_length > 0) classifications.push_back(ACMatchClassification(0, url_style)); return classifications; } for (query_parser::Snippet::MatchPositions::const_iterator i = positions.begin(); i != positions.end(); ++i) { AutocompleteMatch::ACMatchClassifications new_class; AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first, text_length, url_style, &new_class); classifications = AutocompleteMatch::MergeClassifications( classifications, new_class); } return classifications; }