diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-16 17:45:22 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-16 17:45:22 +0000 |
commit | 2c0a4317e8190a04f20dc8377567ca429efe664d (patch) | |
tree | 3ce3fbbdf241be3082784ed7afe9082eb66a5cdc /chrome/browser/autocomplete/history_url_provider.cc | |
parent | d600e2d290492bcd30663c9f7b92aa35a16bc900 (diff) | |
download | chromium_src-2c0a4317e8190a04f20dc8377567ca429efe664d.zip chromium_src-2c0a4317e8190a04f20dc8377567ca429efe664d.tar.gz chromium_src-2c0a4317e8190a04f20dc8377567ca429efe664d.tar.bz2 |
Cleanup:
* Remove using directives
* Make more functions file-scope instead of private + static
* Make .cc and .h order match
BUG=none
TEST=none
Review URL: http://codereview.chromium.org/7661005
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@96973 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/autocomplete/history_url_provider.cc')
-rw-r--r-- | chrome/browser/autocomplete/history_url_provider.cc | 709 |
1 files changed, 363 insertions, 346 deletions
diff --git a/chrome/browser/autocomplete/history_url_provider.cc b/chrome/browser/autocomplete/history_url_provider.cc index 9fcaaef..9a7d69d 100644 --- a/chrome/browser/autocomplete/history_url_provider.cc +++ b/chrome/browser/autocomplete/history_url_provider.cc @@ -26,18 +26,141 @@ #include "googleurl/src/url_util.h" #include "net/base/net_util.h" -using base::Time; -using base::TimeDelta; -using base::TimeTicks; -using history::Prefix; -using history::Prefixes; -using history::HistoryMatch; -using history::HistoryMatches; - -namespace history { - -// Returns true if |url| is just a host (e.g. "http://www.google.com/") and -// not some other subpage (e.g. "http://www.google.com/foo.html"). +namespace { + +// Ensures that |matches| contains an entry for |info|, which may mean adding a +// new such entry (using |input_location| and |match_in_scheme|). +// +// If |promote| is true, this also ensures the entry is the first element in +// |matches|, moving or adding it to the front as appropriate. When |promote| +// is false, existing matches are left in place, and newly added matches are +// placed at the back. +void EnsureMatchPresent(const history::URLRow& info, + size_t input_location, + bool match_in_scheme, + history::HistoryMatches* matches, + bool promote) { + // |matches| may already have an entry for this. + for (history::HistoryMatches::iterator i(matches->begin()); + i != matches->end(); ++i) { + if (i->url_info.url() == info.url()) { + // Rotate it to the front if the caller wishes. + if (promote) + std::rotate(matches->begin(), i, i + 1); + return; + } + } + + // No entry, so create one. + history::HistoryMatch match(info, input_location, match_in_scheme, true); + if (promote) + matches->push_front(match); + else + matches->push_back(match); +} + +// Given the user's |input| and a |match| created from it, reduce the match's +// URL to just a host. If this host still matches the user input, return it. +// Returns the empty string on failure. +GURL ConvertToHostOnly(const history::HistoryMatch& match, + const string16& input) { + // See if we should try to do host-only suggestions for this URL. Nonstandard + // schemes means there's no authority section, so suggesting the host name + // is useless. File URLs are standard, but host suggestion is not useful for + // them either. + const GURL& url = match.url_info.url(); + if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile()) + return GURL(); + + // Transform to a host-only match. Bail if the host no longer matches the + // user input (e.g. because the user typed more than just a host). + GURL host = url.GetWithEmptyPath(); + if ((host.spec().length() < (match.input_location + input.length()))) + return GURL(); // User typing is longer than this host suggestion. + + const string16 spec = UTF8ToUTF16(host.spec()); + if (spec.compare(match.input_location, input.length(), input)) + return GURL(); // User typing is no longer a prefix. + + return host; +} + +// See if a shorter version of the best match should be created, and if so place +// it at the front of |matches|. This can suggest history URLs that are +// prefixes of the best match (if they've been visited enough, compared to the +// best match), or create host-only suggestions even when they haven't been +// visited before: if the user visited http://example.com/asdf once, we'll +// suggest http://example.com/ even if they've never been to it. +void PromoteOrCreateShorterSuggestion( + history::URLDatabase* db, + const HistoryURLProviderParams& params, + bool have_what_you_typed_match, + const AutocompleteMatch& what_you_typed_match, + history::HistoryMatches* matches) { + if (matches->empty()) + return; // No matches, nothing to do. + + // Determine the base URL from which to search, and whether that URL could + // itself be added as a match. We can add the base iff it's not "effectively + // the same" as any "what you typed" match. + const history::HistoryMatch& match = matches->front(); + GURL search_base = ConvertToHostOnly(match, params.input.text()); + bool can_add_search_base_to_matches = !have_what_you_typed_match; + if (search_base.is_empty()) { + // Search from what the user typed when we couldn't reduce the best match + // to a host. Careful: use a substring of |match| here, rather than the + // first match in |params|, because they might have different prefixes. If + // the user typed "google.com", |what_you_typed_match| will hold + // "http://google.com/", but |match| might begin with + // "http://www.google.com/". + // TODO: this should be cleaned up, and is probably incorrect for IDN. + std::string new_match = match.url_info.url().possibly_invalid_spec(). + substr(0, match.input_location + params.input.text().length()); + search_base = GURL(new_match); + // TODO(mrossetti): There is a degenerate case where the following may + // cause a failure: http://www/~someword/fubar.html. Diagnose. + // See: http://crbug.com/50101 + if (search_base.is_empty()) + return; // Can't construct a valid URL from which to start a search. + } else if (!can_add_search_base_to_matches) { + can_add_search_base_to_matches = + (search_base != what_you_typed_match.destination_url); + } + if (search_base == match.url_info.url()) + return; // Couldn't shorten |match|, so no range of URLs to search over. + + // Search the DB for short URLs between our base and |match|. + history::URLRow info(search_base); + bool promote = true; + // A short URL is only worth suggesting if it's been visited at least a third + // as often as the longer URL. + const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1; + // For stability between the in-memory and on-disk autocomplete passes, when + // the long URL has been typed before, only suggest shorter URLs that have + // also been typed. Otherwise, the on-disk pass could suggest a shorter URL + // (which hasn't been typed) that the in-memory pass doesn't know about, + // thereby making the top match, and thus the behavior of inline + // autocomplete, unstable. + const int min_typed_count = match.url_info.typed_count() ? 1 : 0; + if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(), + match.url_info.url().possibly_invalid_spec(), min_visit_count, + min_typed_count, can_add_search_base_to_matches, &info)) { + if (!can_add_search_base_to_matches) + return; // Couldn't find anything and can't add the search base, bail. + + // Try to get info on the search base itself. Promote it to the top if the + // original best match isn't good enough to autocomplete. + db->GetRowForURL(search_base, &info); + promote = match.url_info.typed_count() <= 1; + } + + // Promote or add the desired URL to the list of matches. + EnsureMatchPresent(info, match.input_location, match.match_in_scheme, + matches, promote); +} + +// Returns true if |url| is just a host (e.g. "http://www.google.com/") and not +// some other subpage (e.g. "http://www.google.com/foo.html"). bool IsHostOnly(const GURL& url) { DCHECK(url.is_valid()); return (!url.has_path() || (url.path() == "/")) && !url.has_query() && @@ -45,7 +168,8 @@ bool IsHostOnly(const GURL& url) { } // Acts like the > operator for URLInfo classes. -bool CompareHistoryMatch(const HistoryMatch& a, const HistoryMatch& b) { +bool CompareHistoryMatch(const history::HistoryMatch& a, + const history::HistoryMatch& b) { // A URL that has been typed at all is better than one that has never been // typed. (Note "!"s on each side) if (!a.url_info.typed_count() != !b.url_info.typed_count()) @@ -63,8 +187,8 @@ bool CompareHistoryMatch(const HistoryMatch& a, const HistoryMatch& b) { // For URLs that have each been typed once, a host (alone) is better than a // page inside. if (a.url_info.typed_count() == 1) { - const bool a_is_host_only = history::IsHostOnly(a.url_info.url()); - if (a_is_host_only != history::IsHostOnly(b.url_info.url())) + const bool a_is_host_only = IsHostOnly(a.url_info.url()); + if (a_is_host_only != IsHostOnly(b.url_info.url())) return a_is_host_only; } @@ -76,29 +200,46 @@ bool CompareHistoryMatch(const HistoryMatch& a, const HistoryMatch& b) { return a.url_info.last_visit() > b.url_info.last_visit(); } -// Given the user's |input| and a |match| created from it, reduce the -// match's URL to just a host. If this host still matches the user input, -// return it. Returns the empty string on failure. -GURL ConvertToHostOnly(const HistoryMatch& match, const string16& input) { - // See if we should try to do host-only suggestions for this URL. Nonstandard - // schemes means there's no authority section, so suggesting the host name - // is useless. File URLs are standard, but host suggestion is not useful for - // them either. - const GURL& url = match.url_info.url(); - if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile()) - return GURL(); +// Determines the confidence for a |match| when compared to all the |matches|. +// Returns a number in the range [0, 1]. +float CalculateConfidence(const history::HistoryMatch& match, + const history::HistoryMatches& matches) { + // Calculate a score based on typed count. + const float typed_numerator = match.url_info.typed_count(); + float typed_denominator = 0.0f; + for (history::HistoryMatches::const_iterator it = matches.begin(); + it != matches.end(); ++it) { + typed_denominator += it->url_info.typed_count(); + } + const float typed_score = (typed_denominator > 0.0f) ? + (typed_numerator / typed_denominator) : 0.0f; - // Transform to a host-only match. Bail if the host no longer matches the - // user input (e.g. because the user typed more than just a host). - GURL host = url.GetWithEmptyPath(); - if ((host.spec().length() < (match.input_location + input.length()))) - return GURL(); // User typing is longer than this host suggestion. + // Calculate a score based on visit count + const float visit_numerator = match.url_info.visit_count(); + float visit_denominator = 0.0f; + for (history::HistoryMatches::const_iterator it = matches.begin(); + it != matches.end(); ++it) { + visit_denominator += it->url_info.visit_count(); + } + const float visit_score = (visit_denominator > 0.0f) ? + (visit_numerator / visit_denominator) : 0.0f; - const string16 spec = UTF8ToUTF16(host.spec()); - if (spec.compare(match.input_location, input.length(), input)) - return GURL(); // User typing is no longer a prefix. + // Calculate a score based on innermost matching. + const float innermost_score = (match.innermost_match ? 1.0f : 0.0f); - return host; + // TODO(dominich): Add a boost for bookmarked pages? + // Prefer typed count to visit count as: + // - It's a better indicator of what the user wants to open given that they + // are typing in the address bar (users tend to open certain URLs by typing + // and others by e.g. bookmarks, so visit_count is a good indicator of + // overall interest but a bad one for specifically omnibox interest). + // - Since the DB query is sorted by typed_count, the results may be + // effectively a random selection as far as visit_counts are concerned + // (meaning many high-visit_count-URLs may be present in one query and + // absent in a similar one), leading to wild swings in confidence for the + // same result across distinct queries. + // Add a boost for innermost matches (matches after scheme or 'www.'). + return (0.5f * typed_score) + (0.3f * visit_score) + (0.2f * innermost_score); } } // namespace history @@ -116,7 +257,8 @@ HistoryURLProviderParams::HistoryURLProviderParams( dont_suggest_exact_input(false) { } -HistoryURLProviderParams::~HistoryURLProviderParams() {} +HistoryURLProviderParams::~HistoryURLProviderParams() { +} HistoryURLProvider::HistoryURLProvider(ACProviderListener* listener, Profile* profile) @@ -158,12 +300,12 @@ void HistoryURLProvider::ExecuteWithDB(history::HistoryBackend* backend, if (!db) { params->failed = true; } else if (!params->cancel_flag.IsSet()) { - TimeTicks beginning_time = TimeTicks::Now(); + base::TimeTicks beginning_time = base::TimeTicks::Now(); DoAutocomplete(backend, db, params); UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime", - TimeTicks::Now() - beginning_time); + base::TimeTicks::Now() - beginning_time); } // Return the results (if any) to the main thread. @@ -198,10 +340,10 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, // Get the matching URLs from the DB typedef std::vector<history::URLRow> URLRowVector; URLRowVector url_matches; - HistoryMatches history_matches; + history::HistoryMatches history_matches; - for (Prefixes::const_iterator i(prefixes_.begin()); i != prefixes_.end(); - ++i) { + for (history::Prefixes::const_iterator i(prefixes_.begin()); + i != prefixes_.end(); ++i) { if (params->cancel_flag.IsSet()) return; // Canceled in the middle of a query, give up. // We only need kMaxMatches results in the end, but before we get there we @@ -214,9 +356,9 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, kMaxMatches * 2, (backend == NULL), &url_matches); for (URLRowVector::const_iterator j(url_matches.begin()); j != url_matches.end(); ++j) { - const Prefix* best_prefix = BestPrefix(j->url(), string16()); + const history::Prefix* best_prefix = BestPrefix(j->url(), string16()); DCHECK(best_prefix != NULL); - history_matches.push_back(HistoryMatch(*j, i->prefix.length(), + history_matches.push_back(history::HistoryMatch(*j, i->prefix.length(), !i->num_components, i->num_components >= best_prefix->num_components)); } @@ -265,7 +407,7 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, // Convert the history matches to autocomplete matches. for (size_t i = first_match; i < history_matches.size(); ++i) { - const HistoryMatch& match = history_matches[i]; + const history::HistoryMatch& match = history_matches[i]; DCHECK(!have_what_you_typed_match || (match.url_info.url() != GURL(params->matches.front().destination_url))); @@ -304,6 +446,153 @@ void HistoryURLProvider::QueryComplete( listener_->OnProviderUpdate(true); } +HistoryURLProvider::~HistoryURLProvider() { + // Note: This object can get leaked on shutdown if there are pending + // requests on the database (which hold a reference to us). Normally, these + // messages get flushed for each thread. We do a round trip from main, to + // history, back to main while holding a reference. If the main thread + // completes before the history thread, the message to delegate back to the + // main thread will not run and the reference will leak. Therefore, don't do + // anything on destruction. +} + +// static +history::Prefixes HistoryURLProvider::GetPrefixes() { + // We'll complete text following these prefixes. + // NOTE: There's no requirement that these be in any particular order. + history::Prefixes prefixes; + prefixes.push_back(history::Prefix(ASCIIToUTF16("https://www."), 2)); + prefixes.push_back(history::Prefix(ASCIIToUTF16("http://www."), 2)); + prefixes.push_back(history::Prefix(ASCIIToUTF16("ftp://ftp."), 2)); + prefixes.push_back(history::Prefix(ASCIIToUTF16("ftp://www."), 2)); + prefixes.push_back(history::Prefix(ASCIIToUTF16("https://"), 1)); + prefixes.push_back(history::Prefix(ASCIIToUTF16("http://"), 1)); + prefixes.push_back(history::Prefix(ASCIIToUTF16("ftp://"), 1)); + // Empty string catches within-scheme matches as well. + prefixes.push_back(history::Prefix(string16(), 0)); + return prefixes; +} + +// static +int HistoryURLProvider::CalculateRelevance(AutocompleteInput::Type input_type, + MatchType match_type, + size_t match_number) { + switch (match_type) { + case INLINE_AUTOCOMPLETE: + return 1400; + + case WHAT_YOU_TYPED: + return 1200; + + default: + return 900 + static_cast<int>(match_number); + } +} + +void HistoryURLProvider::RunAutocompletePasses( + const AutocompleteInput& input, + bool fixup_input_and_run_pass_1) { + matches_.clear(); + + if ((input.type() == AutocompleteInput::INVALID) || + (input.type() == AutocompleteInput::FORCED_QUERY)) + return; + + // Create a match for exactly what the user typed. This will only be used as + // a fallback in case we can't get the history service or URL DB; otherwise, + // we'll run this again in DoAutocomplete() and use that result instead. + const bool trim_http = !HasHTTPScheme(input.text()); + // Don't do this for queries -- while we can sometimes mark up a match for + // this, it's not what the user wants, and just adds noise. + if ((input.type() != AutocompleteInput::QUERY) && + input.canonicalized_url().is_valid()) + matches_.push_back(SuggestExactInput(input, trim_http)); + + // We'll need the history service to run both passes, so try to obtain it. + if (!profile_) + return; + HistoryService* const history_service = + profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); + if (!history_service) + return; + + // Create the data structure for the autocomplete passes. We'll save this off + // onto the |params_| member for later deletion below if we need to run pass + // 2. + std::string languages(languages_); + if (languages.empty()) { + languages = + profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); + } + scoped_ptr<HistoryURLProviderParams> params( + new HistoryURLProviderParams(input, trim_http, languages)); + + params->prevent_inline_autocomplete = + PreventInlineAutocomplete(input); + + if (fixup_input_and_run_pass_1) { + // Do some fixup on the user input before matching against it, so we provide + // good results for local file paths, input with spaces, etc. + // NOTE: This purposefully doesn't take input.desired_tld() into account; if + // it did, then holding "ctrl" would change all the results from the + // HistoryURLProvider provider, not just the What You Typed Result. + const string16 fixed_text(FixupUserInput(input)); + if (fixed_text.empty()) { + // Conceivably fixup could result in an empty string (although I don't + // have cases where this happens offhand). We can't do anything with + // empty input, so just bail; otherwise we'd crash later. + return; + } + params->input.set_text(fixed_text); + + // Pass 1: Get the in-memory URL database, and use it to find and promote + // the inline autocomplete match, if any. + history::URLDatabase* url_db = history_service->InMemoryDatabase(); + // url_db can be NULL if it hasn't finished initializing (or failed to + // initialize). In this case all we can do is fall back on the second + // pass. + // + // TODO(pkasting): We should just block here until this loads. Any time + // someone unloads the history backend, we'll get inconsistent inline + // autocomplete behavior here. + if (url_db) { + DoAutocomplete(NULL, url_db, params.get()); + // params->matches now has the matches we should expose to the provider. + // Pass 2 expects a "clean slate" set of matches. + matches_.clear(); + matches_.swap(params->matches); + UpdateStarredStateOfMatches(); + } + } + + // Pass 2: Ask the history service to call us back on the history thread, + // where we can read the full on-disk DB. + if (input.matches_requested() == AutocompleteInput::ALL_MATCHES) { + done_ = false; + params_ = params.release(); // This object will be destroyed in + // QueryComplete() once we're done with it. + history_service->ScheduleAutocomplete(this, params_); + } +} + +const history::Prefix* HistoryURLProvider::BestPrefix( + const GURL& url, + const string16& prefix_suffix) const { + const history::Prefix* best_prefix = NULL; + const string16 text(UTF8ToUTF16(url.spec())); + for (history::Prefixes::const_iterator i(prefixes_.begin()); + i != prefixes_.end(); ++i) { + if ((best_prefix == NULL) || + (i->num_components > best_prefix->num_components)) { + string16 prefix_with_suffix(i->prefix + prefix_suffix); + if ((text.length() >= prefix_with_suffix.length()) && + !text.compare(0, prefix_with_suffix.length(), prefix_with_suffix)) + best_prefix = &(*i); + } + } + return best_prefix; +} + AutocompleteMatch HistoryURLProvider::SuggestExactInput( const AutocompleteInput& input, bool trim_http) { @@ -337,7 +626,8 @@ AutocompleteMatch HistoryURLProvider::SuggestExactInput( // This relies on match.destination_url being the non-prefix-trimmed version // of match.contents. match.contents = display_string; - const Prefix* best_prefix = BestPrefix(match.destination_url, input.text()); + const history::Prefix* best_prefix = + BestPrefix(match.destination_url, input.text()); // Because of the vagaries of GURL, it's possible for match.destination_url // to not contain the user's input at all. In this case don't mark anything // as a match. @@ -355,10 +645,11 @@ AutocompleteMatch HistoryURLProvider::SuggestExactInput( return match; } -bool HistoryURLProvider::FixupExactSuggestion(history::URLDatabase* db, - const AutocompleteInput& input, - AutocompleteMatch* match, - HistoryMatches* matches) const { +bool HistoryURLProvider::FixupExactSuggestion( + history::URLDatabase* db, + const AutocompleteInput& input, + AutocompleteMatch* match, + history::HistoryMatches* matches) const { DCHECK(match != NULL); DCHECK(matches != NULL); @@ -419,8 +710,8 @@ bool HistoryURLProvider::FixupExactSuggestion(history::URLDatabase* db, bool HistoryURLProvider::PromoteMatchForInlineAutocomplete( HistoryURLProviderParams* params, - const HistoryMatch& match, - const HistoryMatches& matches) { + const history::HistoryMatch& match, + const history::HistoryMatches& matches) { // Promote the first match if it's been typed at least n times, where n == 1 // for "simple" (host-only) URLs and n == 2 for others. We set a higher bar // for these long URLs because it's less likely that users will want to visit @@ -429,7 +720,7 @@ bool HistoryURLProvider::PromoteMatchForInlineAutocomplete( // hand, we wouldn't want to immediately start autocompleting it. if (!match.url_info.typed_count() || ((match.url_info.typed_count() == 1) && - !history::IsHostOnly(match.url_info.url()))) + !IsHostOnly(match.url_info.url()))) return false; // In the case where the user has typed "foo.com" and visited (but not typed) @@ -444,284 +735,9 @@ bool HistoryURLProvider::PromoteMatchForInlineAutocomplete( return true; } -HistoryURLProvider::~HistoryURLProvider() {} - -// static -history::Prefixes HistoryURLProvider::GetPrefixes() { - // We'll complete text following these prefixes. - // NOTE: There's no requirement that these be in any particular order. - Prefixes prefixes; - prefixes.push_back(Prefix(ASCIIToUTF16("https://www."), 2)); - prefixes.push_back(Prefix(ASCIIToUTF16("http://www."), 2)); - prefixes.push_back(Prefix(ASCIIToUTF16("ftp://ftp."), 2)); - prefixes.push_back(Prefix(ASCIIToUTF16("ftp://www."), 2)); - prefixes.push_back(Prefix(ASCIIToUTF16("https://"), 1)); - prefixes.push_back(Prefix(ASCIIToUTF16("http://"), 1)); - prefixes.push_back(Prefix(ASCIIToUTF16("ftp://"), 1)); - // Empty string catches within-scheme matches as well. - prefixes.push_back(Prefix(string16(), 0)); - return prefixes; -} - -// static -int HistoryURLProvider::CalculateRelevance(AutocompleteInput::Type input_type, - MatchType match_type, - size_t match_number) { - switch (match_type) { - case INLINE_AUTOCOMPLETE: - return 1400; - - case WHAT_YOU_TYPED: - return 1200; - - default: - return 900 + static_cast<int>(match_number); - } -} - -// static -float HistoryURLProvider::CalculateConfidence( - const history::HistoryMatch& match, - const history::HistoryMatches& matches) { - // Calculate a score based on typed count. - const float typed_numerator = match.url_info.typed_count(); - float typed_denominator = 0.0f; - for (history::HistoryMatches::const_iterator it = matches.begin(); - it != matches.end(); ++it) { - typed_denominator += it->url_info.typed_count(); - } - const float typed_score = (typed_denominator > 0.0f) ? - (typed_numerator / typed_denominator) : 0.0f; - - // Calculate a score based on visit count - const float visit_numerator = match.url_info.visit_count(); - float visit_denominator = 0.0f; - for (history::HistoryMatches::const_iterator it = matches.begin(); - it != matches.end(); ++it) { - visit_denominator += it->url_info.visit_count(); - } - const float visit_score = (visit_denominator > 0.0f) ? - (visit_numerator / visit_denominator) : 0.0f; - - // Calculate a score based on innermost matching. - const float innermost_score = (match.innermost_match ? 1.0f : 0.0f); - - // TODO(dominich): Add a boost for bookmarked pages? - // Prefer typed count to visit count as: - // - It's a better indicator of what the user wants to open given that they - // are typing in the address bar (users tend to open certain URLs by typing - // and others by e.g. bookmarks, so visit_count is a good indicator of - // overall interest but a bad one for specifically omnibox interest). - // - Since the DB query is sorted by typed_count, the results may be - // effectively a random selection as far as visit_counts are concerned - // (meaning many high-visit_count-URLs may be present in one query and - // absent in a similar one), leading to wild swings in confidence for the - // same result across distinct queries. - // Add a boost for innermost matches (matches after scheme or 'www.'). - return (0.5f * typed_score) + (0.3f * visit_score) + (0.2f * innermost_score); -} - -// static -void HistoryURLProvider::PromoteOrCreateShorterSuggestion( - history::URLDatabase* db, - const HistoryURLProviderParams& params, - bool have_what_you_typed_match, - const AutocompleteMatch& what_you_typed_match, - HistoryMatches* matches) { - if (matches->empty()) - return; // No matches, nothing to do. - - // Determine the base URL from which to search, and whether that URL could - // itself be added as a match. We can add the base iff it's not "effectively - // the same" as any "what you typed" match. - const HistoryMatch& match = matches->front(); - GURL search_base = history::ConvertToHostOnly(match, params.input.text()); - bool can_add_search_base_to_matches = !have_what_you_typed_match; - if (search_base.is_empty()) { - // Search from what the user typed when we couldn't reduce the best match - // to a host. Careful: use a substring of |match| here, rather than the - // first match in |params|, because they might have different prefixes. If - // the user typed "google.com", |what_you_typed_match| will hold - // "http://google.com/", but |match| might begin with - // "http://www.google.com/". - // TODO: this should be cleaned up, and is probably incorrect for IDN. - std::string new_match = match.url_info.url().possibly_invalid_spec(). - substr(0, match.input_location + params.input.text().length()); - search_base = GURL(new_match); - // TODO(mrossetti): There is a degenerate case where the following may - // cause a failure: http://www/~someword/fubar.html. Diagnose. - // See: http://crbug.com/50101 - if (search_base.is_empty()) - return; // Can't construct a valid URL from which to start a search. - } else if (!can_add_search_base_to_matches) { - can_add_search_base_to_matches = - (search_base != what_you_typed_match.destination_url); - } - if (search_base == match.url_info.url()) - return; // Couldn't shorten |match|, so no range of URLs to search over. - - // Search the DB for short URLs between our base and |match|. - history::URLRow info(search_base); - bool promote = true; - // A short URL is only worth suggesting if it's been visited at least a third - // as often as the longer URL. - const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1; - // For stability between the in-memory and on-disk autocomplete passes, when - // the long URL has been typed before, only suggest shorter URLs that have - // also been typed. Otherwise, the on-disk pass could suggest a shorter URL - // (which hasn't been typed) that the in-memory pass doesn't know about, - // thereby making the top match, and thus the behavior of inline - // autocomplete, unstable. - const int min_typed_count = match.url_info.typed_count() ? 1 : 0; - if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(), - match.url_info.url().possibly_invalid_spec(), min_visit_count, - min_typed_count, can_add_search_base_to_matches, &info)) { - if (!can_add_search_base_to_matches) - return; // Couldn't find anything and can't add the search base, bail. - - // Try to get info on the search base itself. Promote it to the top if the - // original best match isn't good enough to autocomplete. - db->GetRowForURL(search_base, &info); - promote = match.url_info.typed_count() <= 1; - } - - // Promote or add the desired URL to the list of matches. - EnsureMatchPresent(info, match.input_location, match.match_in_scheme, - matches, promote); -} - -// static -void HistoryURLProvider::EnsureMatchPresent(const history::URLRow& info, - size_t input_location, - bool match_in_scheme, - HistoryMatches* matches, - bool promote) { - // |matches| may already have an entry for this. - for (HistoryMatches::iterator i(matches->begin()); i != matches->end(); - ++i) { - if (i->url_info.url() == info.url()) { - // Rotate it to the front if the caller wishes. - if (promote) - std::rotate(matches->begin(), i, i + 1); - return; - } - } - - // No entry, so create one. - HistoryMatch match(info, input_location, match_in_scheme, true); - if (promote) - matches->push_front(match); - else - matches->push_back(match); -} - -void HistoryURLProvider::RunAutocompletePasses( - const AutocompleteInput& input, - bool fixup_input_and_run_pass_1) { - matches_.clear(); - - if ((input.type() == AutocompleteInput::INVALID) || - (input.type() == AutocompleteInput::FORCED_QUERY)) - return; - - // Create a match for exactly what the user typed. This will only be used as - // a fallback in case we can't get the history service or URL DB; otherwise, - // we'll run this again in DoAutocomplete() and use that result instead. - const bool trim_http = !HasHTTPScheme(input.text()); - // Don't do this for queries -- while we can sometimes mark up a match for - // this, it's not what the user wants, and just adds noise. - if ((input.type() != AutocompleteInput::QUERY) && - input.canonicalized_url().is_valid()) - matches_.push_back(SuggestExactInput(input, trim_http)); - - // We'll need the history service to run both passes, so try to obtain it. - if (!profile_) - return; - HistoryService* const history_service = - profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); - if (!history_service) - return; - - // Create the data structure for the autocomplete passes. We'll save this off - // onto the |params_| member for later deletion below if we need to run pass - // 2. - std::string languages(languages_); - if (languages.empty()) { - languages = - profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); - } - scoped_ptr<HistoryURLProviderParams> params( - new HistoryURLProviderParams(input, trim_http, languages)); - - params->prevent_inline_autocomplete = - PreventInlineAutocomplete(input); - - if (fixup_input_and_run_pass_1) { - // Do some fixup on the user input before matching against it, so we provide - // good results for local file paths, input with spaces, etc. - // NOTE: This purposefully doesn't take input.desired_tld() into account; if - // it did, then holding "ctrl" would change all the results from the - // HistoryURLProvider provider, not just the What You Typed Result. - const string16 fixed_text(FixupUserInput(input)); - if (fixed_text.empty()) { - // Conceivably fixup could result in an empty string (although I don't - // have cases where this happens offhand). We can't do anything with - // empty input, so just bail; otherwise we'd crash later. - return; - } - params->input.set_text(fixed_text); - - // Pass 1: Get the in-memory URL database, and use it to find and promote - // the inline autocomplete match, if any. - history::URLDatabase* url_db = history_service->InMemoryDatabase(); - // url_db can be NULL if it hasn't finished initializing (or failed to - // initialize). In this case all we can do is fall back on the second - // pass. - // - // TODO(pkasting): We should just block here until this loads. Any time - // someone unloads the history backend, we'll get inconsistent inline - // autocomplete behavior here. - if (url_db) { - DoAutocomplete(NULL, url_db, params.get()); - // params->matches now has the matches we should expose to the provider. - // Pass 2 expects a "clean slate" set of matches. - matches_.clear(); - matches_.swap(params->matches); - UpdateStarredStateOfMatches(); - } - } - - // Pass 2: Ask the history service to call us back on the history thread, - // where we can read the full on-disk DB. - if (input.matches_requested() == AutocompleteInput::ALL_MATCHES) { - done_ = false; - params_ = params.release(); // This object will be destroyed in - // QueryComplete() once we're done with it. - history_service->ScheduleAutocomplete(this, params_); - } -} - -const history::Prefix* HistoryURLProvider::BestPrefix( - const GURL& url, - const string16& prefix_suffix) const { - const Prefix* best_prefix = NULL; - const string16 text(UTF8ToUTF16(url.spec())); - for (Prefixes::const_iterator i(prefixes_.begin()); i != prefixes_.end(); - ++i) { - if ((best_prefix == NULL) || - (i->num_components > best_prefix->num_components)) { - string16 prefix_with_suffix(i->prefix + prefix_suffix); - if ((text.length() >= prefix_with_suffix.length()) && - !text.compare(0, prefix_with_suffix.length(), prefix_with_suffix)) - best_prefix = &(*i); - } - } - return best_prefix; -} - -void HistoryURLProvider::SortMatches(HistoryMatches* matches) const { +void HistoryURLProvider::SortMatches(history::HistoryMatches* matches) const { // Sort by quality, best first. - std::sort(matches->begin(), matches->end(), &history::CompareHistoryMatch); + std::sort(matches->begin(), matches->end(), &CompareHistoryMatch); // Remove duplicate matches (caused by the search string appearing in one of // the prefixes as well as after it). Consider the following scenario: @@ -743,8 +759,8 @@ void HistoryURLProvider::SortMatches(HistoryMatches* matches) const { // we use an index instead of an iterator in the outer loop, and don't // precalculate the ending position. for (size_t i = 0; i < matches->size(); ++i) { - HistoryMatches::iterator j(matches->begin() + i + 1); - while (j != matches->end()) { + for (history::HistoryMatches::iterator j(matches->begin() + i + 1); + j != matches->end(); ) { if ((*matches)[i].url_info.url() == j->url_info.url()) j = matches->erase(j); else @@ -753,9 +769,11 @@ void HistoryURLProvider::SortMatches(HistoryMatches* matches) const { } } -void HistoryURLProvider::CullPoorMatches(HistoryMatches* matches) const { +void HistoryURLProvider::CullPoorMatches( + history::HistoryMatches* matches) const { const base::Time& threshold(history::AutocompleteAgeThreshold()); - for (HistoryMatches::iterator i(matches->begin()); i != matches->end();) { + for (history::HistoryMatches::iterator i(matches->begin()); + i != matches->end();) { if (RowQualifiesAsSignificant(i->url_info, threshold)) ++i; else @@ -764,7 +782,7 @@ void HistoryURLProvider::CullPoorMatches(HistoryMatches* matches) const { } void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, - HistoryMatches* matches, + history::HistoryMatches* matches, size_t max_results) const { for (size_t source = 0; (source < matches->size()) && (source < max_results); ) { @@ -794,38 +812,37 @@ void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, } size_t HistoryURLProvider::RemoveSubsequentMatchesOf( - HistoryMatches* matches, + history::HistoryMatches* matches, size_t source_index, const std::vector<GURL>& remove) const { size_t next_index = source_index + 1; // return value = item after source // Find the first occurrence of any URL in the redirect chain. We want to // keep this one since it is rated the highest. - HistoryMatches::iterator first(std::find_first_of( + history::HistoryMatches::iterator first(std::find_first_of( matches->begin(), matches->end(), remove.begin(), remove.end())); - DCHECK(first != matches->end()) << - "We should have always found at least the original URL."; + DCHECK(first != matches->end()) << "We should have always found at least the " + "original URL."; // Find any following occurrences of any URL in the redirect chain, these // should be deleted. - HistoryMatches::iterator next(first); - next++; // Start searching immediately after the one we found already. - while (next != matches->end() && - (next = std::find_first_of(next, matches->end(), remove.begin(), - remove.end())) != matches->end()) { + for (history::HistoryMatches::iterator next(std::find_first_of(first + 1, + matches->end(), remove.begin(), remove.end())); + next != matches->end(); next = std::find_first_of(next, matches->end(), + remove.begin(), remove.end())) { // Remove this item. When we remove an item before the source index, we // need to shift it to the right and remember that so we can return it. next = matches->erase(next); if (static_cast<size_t>(next - matches->begin()) < next_index) - next_index--; + --next_index; } return next_index; } AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( HistoryURLProviderParams* params, - const HistoryMatch& history_match, - const HistoryMatches& history_matches, + const history::HistoryMatch& history_match, + const history::HistoryMatches& history_matches, MatchType match_type, size_t match_number) { const history::URLRow& info = history_match.url_info; |