summaryrefslogtreecommitdiffstats
path: root/chrome/browser/autocomplete/history_url_provider.cc
diff options
context:
space:
mode:
authorpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-08-16 17:45:22 +0000
committerpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-08-16 17:45:22 +0000
commit2c0a4317e8190a04f20dc8377567ca429efe664d (patch)
tree3ce3fbbdf241be3082784ed7afe9082eb66a5cdc /chrome/browser/autocomplete/history_url_provider.cc
parentd600e2d290492bcd30663c9f7b92aa35a16bc900 (diff)
downloadchromium_src-2c0a4317e8190a04f20dc8377567ca429efe664d.zip
chromium_src-2c0a4317e8190a04f20dc8377567ca429efe664d.tar.gz
chromium_src-2c0a4317e8190a04f20dc8377567ca429efe664d.tar.bz2
Cleanup:
* Remove using directives * Make more functions file-scope instead of private + static * Make .cc and .h order match BUG=none TEST=none Review URL: http://codereview.chromium.org/7661005 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@96973 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/autocomplete/history_url_provider.cc')
-rw-r--r--chrome/browser/autocomplete/history_url_provider.cc709
1 files changed, 363 insertions, 346 deletions
diff --git a/chrome/browser/autocomplete/history_url_provider.cc b/chrome/browser/autocomplete/history_url_provider.cc
index 9fcaaef..9a7d69d 100644
--- a/chrome/browser/autocomplete/history_url_provider.cc
+++ b/chrome/browser/autocomplete/history_url_provider.cc
@@ -26,18 +26,141 @@
#include "googleurl/src/url_util.h"
#include "net/base/net_util.h"
-using base::Time;
-using base::TimeDelta;
-using base::TimeTicks;
-using history::Prefix;
-using history::Prefixes;
-using history::HistoryMatch;
-using history::HistoryMatches;
-
-namespace history {
-
-// Returns true if |url| is just a host (e.g. "http://www.google.com/") and
-// not some other subpage (e.g. "http://www.google.com/foo.html").
+namespace {
+
+// Ensures that |matches| contains an entry for |info|, which may mean adding a
+// new such entry (using |input_location| and |match_in_scheme|).
+//
+// If |promote| is true, this also ensures the entry is the first element in
+// |matches|, moving or adding it to the front as appropriate. When |promote|
+// is false, existing matches are left in place, and newly added matches are
+// placed at the back.
+void EnsureMatchPresent(const history::URLRow& info,
+ size_t input_location,
+ bool match_in_scheme,
+ history::HistoryMatches* matches,
+ bool promote) {
+ // |matches| may already have an entry for this.
+ for (history::HistoryMatches::iterator i(matches->begin());
+ i != matches->end(); ++i) {
+ if (i->url_info.url() == info.url()) {
+ // Rotate it to the front if the caller wishes.
+ if (promote)
+ std::rotate(matches->begin(), i, i + 1);
+ return;
+ }
+ }
+
+ // No entry, so create one.
+ history::HistoryMatch match(info, input_location, match_in_scheme, true);
+ if (promote)
+ matches->push_front(match);
+ else
+ matches->push_back(match);
+}
+
+// Given the user's |input| and a |match| created from it, reduce the match's
+// URL to just a host. If this host still matches the user input, return it.
+// Returns the empty string on failure.
+GURL ConvertToHostOnly(const history::HistoryMatch& match,
+ const string16& input) {
+ // See if we should try to do host-only suggestions for this URL. Nonstandard
+ // schemes means there's no authority section, so suggesting the host name
+ // is useless. File URLs are standard, but host suggestion is not useful for
+ // them either.
+ const GURL& url = match.url_info.url();
+ if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile())
+ return GURL();
+
+ // Transform to a host-only match. Bail if the host no longer matches the
+ // user input (e.g. because the user typed more than just a host).
+ GURL host = url.GetWithEmptyPath();
+ if ((host.spec().length() < (match.input_location + input.length())))
+ return GURL(); // User typing is longer than this host suggestion.
+
+ const string16 spec = UTF8ToUTF16(host.spec());
+ if (spec.compare(match.input_location, input.length(), input))
+ return GURL(); // User typing is no longer a prefix.
+
+ return host;
+}
+
+// See if a shorter version of the best match should be created, and if so place
+// it at the front of |matches|. This can suggest history URLs that are
+// prefixes of the best match (if they've been visited enough, compared to the
+// best match), or create host-only suggestions even when they haven't been
+// visited before: if the user visited http://example.com/asdf once, we'll
+// suggest http://example.com/ even if they've never been to it.
+void PromoteOrCreateShorterSuggestion(
+ history::URLDatabase* db,
+ const HistoryURLProviderParams& params,
+ bool have_what_you_typed_match,
+ const AutocompleteMatch& what_you_typed_match,
+ history::HistoryMatches* matches) {
+ if (matches->empty())
+ return; // No matches, nothing to do.
+
+ // Determine the base URL from which to search, and whether that URL could
+ // itself be added as a match. We can add the base iff it's not "effectively
+ // the same" as any "what you typed" match.
+ const history::HistoryMatch& match = matches->front();
+ GURL search_base = ConvertToHostOnly(match, params.input.text());
+ bool can_add_search_base_to_matches = !have_what_you_typed_match;
+ if (search_base.is_empty()) {
+ // Search from what the user typed when we couldn't reduce the best match
+ // to a host. Careful: use a substring of |match| here, rather than the
+ // first match in |params|, because they might have different prefixes. If
+ // the user typed "google.com", |what_you_typed_match| will hold
+ // "http://google.com/", but |match| might begin with
+ // "http://www.google.com/".
+ // TODO: this should be cleaned up, and is probably incorrect for IDN.
+ std::string new_match = match.url_info.url().possibly_invalid_spec().
+ substr(0, match.input_location + params.input.text().length());
+ search_base = GURL(new_match);
+ // TODO(mrossetti): There is a degenerate case where the following may
+ // cause a failure: http://www/~someword/fubar.html. Diagnose.
+ // See: http://crbug.com/50101
+ if (search_base.is_empty())
+ return; // Can't construct a valid URL from which to start a search.
+ } else if (!can_add_search_base_to_matches) {
+ can_add_search_base_to_matches =
+ (search_base != what_you_typed_match.destination_url);
+ }
+ if (search_base == match.url_info.url())
+ return; // Couldn't shorten |match|, so no range of URLs to search over.
+
+ // Search the DB for short URLs between our base and |match|.
+ history::URLRow info(search_base);
+ bool promote = true;
+ // A short URL is only worth suggesting if it's been visited at least a third
+ // as often as the longer URL.
+ const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1;
+ // For stability between the in-memory and on-disk autocomplete passes, when
+ // the long URL has been typed before, only suggest shorter URLs that have
+ // also been typed. Otherwise, the on-disk pass could suggest a shorter URL
+ // (which hasn't been typed) that the in-memory pass doesn't know about,
+ // thereby making the top match, and thus the behavior of inline
+ // autocomplete, unstable.
+ const int min_typed_count = match.url_info.typed_count() ? 1 : 0;
+ if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(),
+ match.url_info.url().possibly_invalid_spec(), min_visit_count,
+ min_typed_count, can_add_search_base_to_matches, &info)) {
+ if (!can_add_search_base_to_matches)
+ return; // Couldn't find anything and can't add the search base, bail.
+
+ // Try to get info on the search base itself. Promote it to the top if the
+ // original best match isn't good enough to autocomplete.
+ db->GetRowForURL(search_base, &info);
+ promote = match.url_info.typed_count() <= 1;
+ }
+
+ // Promote or add the desired URL to the list of matches.
+ EnsureMatchPresent(info, match.input_location, match.match_in_scheme,
+ matches, promote);
+}
+
+// Returns true if |url| is just a host (e.g. "http://www.google.com/") and not
+// some other subpage (e.g. "http://www.google.com/foo.html").
bool IsHostOnly(const GURL& url) {
DCHECK(url.is_valid());
return (!url.has_path() || (url.path() == "/")) && !url.has_query() &&
@@ -45,7 +168,8 @@ bool IsHostOnly(const GURL& url) {
}
// Acts like the > operator for URLInfo classes.
-bool CompareHistoryMatch(const HistoryMatch& a, const HistoryMatch& b) {
+bool CompareHistoryMatch(const history::HistoryMatch& a,
+ const history::HistoryMatch& b) {
// A URL that has been typed at all is better than one that has never been
// typed. (Note "!"s on each side)
if (!a.url_info.typed_count() != !b.url_info.typed_count())
@@ -63,8 +187,8 @@ bool CompareHistoryMatch(const HistoryMatch& a, const HistoryMatch& b) {
// For URLs that have each been typed once, a host (alone) is better than a
// page inside.
if (a.url_info.typed_count() == 1) {
- const bool a_is_host_only = history::IsHostOnly(a.url_info.url());
- if (a_is_host_only != history::IsHostOnly(b.url_info.url()))
+ const bool a_is_host_only = IsHostOnly(a.url_info.url());
+ if (a_is_host_only != IsHostOnly(b.url_info.url()))
return a_is_host_only;
}
@@ -76,29 +200,46 @@ bool CompareHistoryMatch(const HistoryMatch& a, const HistoryMatch& b) {
return a.url_info.last_visit() > b.url_info.last_visit();
}
-// Given the user's |input| and a |match| created from it, reduce the
-// match's URL to just a host. If this host still matches the user input,
-// return it. Returns the empty string on failure.
-GURL ConvertToHostOnly(const HistoryMatch& match, const string16& input) {
- // See if we should try to do host-only suggestions for this URL. Nonstandard
- // schemes means there's no authority section, so suggesting the host name
- // is useless. File URLs are standard, but host suggestion is not useful for
- // them either.
- const GURL& url = match.url_info.url();
- if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile())
- return GURL();
+// Determines the confidence for a |match| when compared to all the |matches|.
+// Returns a number in the range [0, 1].
+float CalculateConfidence(const history::HistoryMatch& match,
+ const history::HistoryMatches& matches) {
+ // Calculate a score based on typed count.
+ const float typed_numerator = match.url_info.typed_count();
+ float typed_denominator = 0.0f;
+ for (history::HistoryMatches::const_iterator it = matches.begin();
+ it != matches.end(); ++it) {
+ typed_denominator += it->url_info.typed_count();
+ }
+ const float typed_score = (typed_denominator > 0.0f) ?
+ (typed_numerator / typed_denominator) : 0.0f;
- // Transform to a host-only match. Bail if the host no longer matches the
- // user input (e.g. because the user typed more than just a host).
- GURL host = url.GetWithEmptyPath();
- if ((host.spec().length() < (match.input_location + input.length())))
- return GURL(); // User typing is longer than this host suggestion.
+ // Calculate a score based on visit count
+ const float visit_numerator = match.url_info.visit_count();
+ float visit_denominator = 0.0f;
+ for (history::HistoryMatches::const_iterator it = matches.begin();
+ it != matches.end(); ++it) {
+ visit_denominator += it->url_info.visit_count();
+ }
+ const float visit_score = (visit_denominator > 0.0f) ?
+ (visit_numerator / visit_denominator) : 0.0f;
- const string16 spec = UTF8ToUTF16(host.spec());
- if (spec.compare(match.input_location, input.length(), input))
- return GURL(); // User typing is no longer a prefix.
+ // Calculate a score based on innermost matching.
+ const float innermost_score = (match.innermost_match ? 1.0f : 0.0f);
- return host;
+ // TODO(dominich): Add a boost for bookmarked pages?
+ // Prefer typed count to visit count as:
+ // - It's a better indicator of what the user wants to open given that they
+ // are typing in the address bar (users tend to open certain URLs by typing
+ // and others by e.g. bookmarks, so visit_count is a good indicator of
+ // overall interest but a bad one for specifically omnibox interest).
+ // - Since the DB query is sorted by typed_count, the results may be
+ // effectively a random selection as far as visit_counts are concerned
+ // (meaning many high-visit_count-URLs may be present in one query and
+ // absent in a similar one), leading to wild swings in confidence for the
+ // same result across distinct queries.
+ // Add a boost for innermost matches (matches after scheme or 'www.').
+ return (0.5f * typed_score) + (0.3f * visit_score) + (0.2f * innermost_score);
}
} // namespace history
@@ -116,7 +257,8 @@ HistoryURLProviderParams::HistoryURLProviderParams(
dont_suggest_exact_input(false) {
}
-HistoryURLProviderParams::~HistoryURLProviderParams() {}
+HistoryURLProviderParams::~HistoryURLProviderParams() {
+}
HistoryURLProvider::HistoryURLProvider(ACProviderListener* listener,
Profile* profile)
@@ -158,12 +300,12 @@ void HistoryURLProvider::ExecuteWithDB(history::HistoryBackend* backend,
if (!db) {
params->failed = true;
} else if (!params->cancel_flag.IsSet()) {
- TimeTicks beginning_time = TimeTicks::Now();
+ base::TimeTicks beginning_time = base::TimeTicks::Now();
DoAutocomplete(backend, db, params);
UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime",
- TimeTicks::Now() - beginning_time);
+ base::TimeTicks::Now() - beginning_time);
}
// Return the results (if any) to the main thread.
@@ -198,10 +340,10 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend,
// Get the matching URLs from the DB
typedef std::vector<history::URLRow> URLRowVector;
URLRowVector url_matches;
- HistoryMatches history_matches;
+ history::HistoryMatches history_matches;
- for (Prefixes::const_iterator i(prefixes_.begin()); i != prefixes_.end();
- ++i) {
+ for (history::Prefixes::const_iterator i(prefixes_.begin());
+ i != prefixes_.end(); ++i) {
if (params->cancel_flag.IsSet())
return; // Canceled in the middle of a query, give up.
// We only need kMaxMatches results in the end, but before we get there we
@@ -214,9 +356,9 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend,
kMaxMatches * 2, (backend == NULL), &url_matches);
for (URLRowVector::const_iterator j(url_matches.begin());
j != url_matches.end(); ++j) {
- const Prefix* best_prefix = BestPrefix(j->url(), string16());
+ const history::Prefix* best_prefix = BestPrefix(j->url(), string16());
DCHECK(best_prefix != NULL);
- history_matches.push_back(HistoryMatch(*j, i->prefix.length(),
+ history_matches.push_back(history::HistoryMatch(*j, i->prefix.length(),
!i->num_components,
i->num_components >= best_prefix->num_components));
}
@@ -265,7 +407,7 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend,
// Convert the history matches to autocomplete matches.
for (size_t i = first_match; i < history_matches.size(); ++i) {
- const HistoryMatch& match = history_matches[i];
+ const history::HistoryMatch& match = history_matches[i];
DCHECK(!have_what_you_typed_match ||
(match.url_info.url() !=
GURL(params->matches.front().destination_url)));
@@ -304,6 +446,153 @@ void HistoryURLProvider::QueryComplete(
listener_->OnProviderUpdate(true);
}
+HistoryURLProvider::~HistoryURLProvider() {
+ // Note: This object can get leaked on shutdown if there are pending
+ // requests on the database (which hold a reference to us). Normally, these
+ // messages get flushed for each thread. We do a round trip from main, to
+ // history, back to main while holding a reference. If the main thread
+ // completes before the history thread, the message to delegate back to the
+ // main thread will not run and the reference will leak. Therefore, don't do
+ // anything on destruction.
+}
+
+// static
+history::Prefixes HistoryURLProvider::GetPrefixes() {
+ // We'll complete text following these prefixes.
+ // NOTE: There's no requirement that these be in any particular order.
+ history::Prefixes prefixes;
+ prefixes.push_back(history::Prefix(ASCIIToUTF16("https://www."), 2));
+ prefixes.push_back(history::Prefix(ASCIIToUTF16("http://www."), 2));
+ prefixes.push_back(history::Prefix(ASCIIToUTF16("ftp://ftp."), 2));
+ prefixes.push_back(history::Prefix(ASCIIToUTF16("ftp://www."), 2));
+ prefixes.push_back(history::Prefix(ASCIIToUTF16("https://"), 1));
+ prefixes.push_back(history::Prefix(ASCIIToUTF16("http://"), 1));
+ prefixes.push_back(history::Prefix(ASCIIToUTF16("ftp://"), 1));
+ // Empty string catches within-scheme matches as well.
+ prefixes.push_back(history::Prefix(string16(), 0));
+ return prefixes;
+}
+
+// static
+int HistoryURLProvider::CalculateRelevance(AutocompleteInput::Type input_type,
+ MatchType match_type,
+ size_t match_number) {
+ switch (match_type) {
+ case INLINE_AUTOCOMPLETE:
+ return 1400;
+
+ case WHAT_YOU_TYPED:
+ return 1200;
+
+ default:
+ return 900 + static_cast<int>(match_number);
+ }
+}
+
+void HistoryURLProvider::RunAutocompletePasses(
+ const AutocompleteInput& input,
+ bool fixup_input_and_run_pass_1) {
+ matches_.clear();
+
+ if ((input.type() == AutocompleteInput::INVALID) ||
+ (input.type() == AutocompleteInput::FORCED_QUERY))
+ return;
+
+ // Create a match for exactly what the user typed. This will only be used as
+ // a fallback in case we can't get the history service or URL DB; otherwise,
+ // we'll run this again in DoAutocomplete() and use that result instead.
+ const bool trim_http = !HasHTTPScheme(input.text());
+ // Don't do this for queries -- while we can sometimes mark up a match for
+ // this, it's not what the user wants, and just adds noise.
+ if ((input.type() != AutocompleteInput::QUERY) &&
+ input.canonicalized_url().is_valid())
+ matches_.push_back(SuggestExactInput(input, trim_http));
+
+ // We'll need the history service to run both passes, so try to obtain it.
+ if (!profile_)
+ return;
+ HistoryService* const history_service =
+ profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
+ if (!history_service)
+ return;
+
+ // Create the data structure for the autocomplete passes. We'll save this off
+ // onto the |params_| member for later deletion below if we need to run pass
+ // 2.
+ std::string languages(languages_);
+ if (languages.empty()) {
+ languages =
+ profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
+ }
+ scoped_ptr<HistoryURLProviderParams> params(
+ new HistoryURLProviderParams(input, trim_http, languages));
+
+ params->prevent_inline_autocomplete =
+ PreventInlineAutocomplete(input);
+
+ if (fixup_input_and_run_pass_1) {
+ // Do some fixup on the user input before matching against it, so we provide
+ // good results for local file paths, input with spaces, etc.
+ // NOTE: This purposefully doesn't take input.desired_tld() into account; if
+ // it did, then holding "ctrl" would change all the results from the
+ // HistoryURLProvider provider, not just the What You Typed Result.
+ const string16 fixed_text(FixupUserInput(input));
+ if (fixed_text.empty()) {
+ // Conceivably fixup could result in an empty string (although I don't
+ // have cases where this happens offhand). We can't do anything with
+ // empty input, so just bail; otherwise we'd crash later.
+ return;
+ }
+ params->input.set_text(fixed_text);
+
+ // Pass 1: Get the in-memory URL database, and use it to find and promote
+ // the inline autocomplete match, if any.
+ history::URLDatabase* url_db = history_service->InMemoryDatabase();
+ // url_db can be NULL if it hasn't finished initializing (or failed to
+ // initialize). In this case all we can do is fall back on the second
+ // pass.
+ //
+ // TODO(pkasting): We should just block here until this loads. Any time
+ // someone unloads the history backend, we'll get inconsistent inline
+ // autocomplete behavior here.
+ if (url_db) {
+ DoAutocomplete(NULL, url_db, params.get());
+ // params->matches now has the matches we should expose to the provider.
+ // Pass 2 expects a "clean slate" set of matches.
+ matches_.clear();
+ matches_.swap(params->matches);
+ UpdateStarredStateOfMatches();
+ }
+ }
+
+ // Pass 2: Ask the history service to call us back on the history thread,
+ // where we can read the full on-disk DB.
+ if (input.matches_requested() == AutocompleteInput::ALL_MATCHES) {
+ done_ = false;
+ params_ = params.release(); // This object will be destroyed in
+ // QueryComplete() once we're done with it.
+ history_service->ScheduleAutocomplete(this, params_);
+ }
+}
+
+const history::Prefix* HistoryURLProvider::BestPrefix(
+ const GURL& url,
+ const string16& prefix_suffix) const {
+ const history::Prefix* best_prefix = NULL;
+ const string16 text(UTF8ToUTF16(url.spec()));
+ for (history::Prefixes::const_iterator i(prefixes_.begin());
+ i != prefixes_.end(); ++i) {
+ if ((best_prefix == NULL) ||
+ (i->num_components > best_prefix->num_components)) {
+ string16 prefix_with_suffix(i->prefix + prefix_suffix);
+ if ((text.length() >= prefix_with_suffix.length()) &&
+ !text.compare(0, prefix_with_suffix.length(), prefix_with_suffix))
+ best_prefix = &(*i);
+ }
+ }
+ return best_prefix;
+}
+
AutocompleteMatch HistoryURLProvider::SuggestExactInput(
const AutocompleteInput& input,
bool trim_http) {
@@ -337,7 +626,8 @@ AutocompleteMatch HistoryURLProvider::SuggestExactInput(
// This relies on match.destination_url being the non-prefix-trimmed version
// of match.contents.
match.contents = display_string;
- const Prefix* best_prefix = BestPrefix(match.destination_url, input.text());
+ const history::Prefix* best_prefix =
+ BestPrefix(match.destination_url, input.text());
// Because of the vagaries of GURL, it's possible for match.destination_url
// to not contain the user's input at all. In this case don't mark anything
// as a match.
@@ -355,10 +645,11 @@ AutocompleteMatch HistoryURLProvider::SuggestExactInput(
return match;
}
-bool HistoryURLProvider::FixupExactSuggestion(history::URLDatabase* db,
- const AutocompleteInput& input,
- AutocompleteMatch* match,
- HistoryMatches* matches) const {
+bool HistoryURLProvider::FixupExactSuggestion(
+ history::URLDatabase* db,
+ const AutocompleteInput& input,
+ AutocompleteMatch* match,
+ history::HistoryMatches* matches) const {
DCHECK(match != NULL);
DCHECK(matches != NULL);
@@ -419,8 +710,8 @@ bool HistoryURLProvider::FixupExactSuggestion(history::URLDatabase* db,
bool HistoryURLProvider::PromoteMatchForInlineAutocomplete(
HistoryURLProviderParams* params,
- const HistoryMatch& match,
- const HistoryMatches& matches) {
+ const history::HistoryMatch& match,
+ const history::HistoryMatches& matches) {
// Promote the first match if it's been typed at least n times, where n == 1
// for "simple" (host-only) URLs and n == 2 for others. We set a higher bar
// for these long URLs because it's less likely that users will want to visit
@@ -429,7 +720,7 @@ bool HistoryURLProvider::PromoteMatchForInlineAutocomplete(
// hand, we wouldn't want to immediately start autocompleting it.
if (!match.url_info.typed_count() ||
((match.url_info.typed_count() == 1) &&
- !history::IsHostOnly(match.url_info.url())))
+ !IsHostOnly(match.url_info.url())))
return false;
// In the case where the user has typed "foo.com" and visited (but not typed)
@@ -444,284 +735,9 @@ bool HistoryURLProvider::PromoteMatchForInlineAutocomplete(
return true;
}
-HistoryURLProvider::~HistoryURLProvider() {}
-
-// static
-history::Prefixes HistoryURLProvider::GetPrefixes() {
- // We'll complete text following these prefixes.
- // NOTE: There's no requirement that these be in any particular order.
- Prefixes prefixes;
- prefixes.push_back(Prefix(ASCIIToUTF16("https://www."), 2));
- prefixes.push_back(Prefix(ASCIIToUTF16("http://www."), 2));
- prefixes.push_back(Prefix(ASCIIToUTF16("ftp://ftp."), 2));
- prefixes.push_back(Prefix(ASCIIToUTF16("ftp://www."), 2));
- prefixes.push_back(Prefix(ASCIIToUTF16("https://"), 1));
- prefixes.push_back(Prefix(ASCIIToUTF16("http://"), 1));
- prefixes.push_back(Prefix(ASCIIToUTF16("ftp://"), 1));
- // Empty string catches within-scheme matches as well.
- prefixes.push_back(Prefix(string16(), 0));
- return prefixes;
-}
-
-// static
-int HistoryURLProvider::CalculateRelevance(AutocompleteInput::Type input_type,
- MatchType match_type,
- size_t match_number) {
- switch (match_type) {
- case INLINE_AUTOCOMPLETE:
- return 1400;
-
- case WHAT_YOU_TYPED:
- return 1200;
-
- default:
- return 900 + static_cast<int>(match_number);
- }
-}
-
-// static
-float HistoryURLProvider::CalculateConfidence(
- const history::HistoryMatch& match,
- const history::HistoryMatches& matches) {
- // Calculate a score based on typed count.
- const float typed_numerator = match.url_info.typed_count();
- float typed_denominator = 0.0f;
- for (history::HistoryMatches::const_iterator it = matches.begin();
- it != matches.end(); ++it) {
- typed_denominator += it->url_info.typed_count();
- }
- const float typed_score = (typed_denominator > 0.0f) ?
- (typed_numerator / typed_denominator) : 0.0f;
-
- // Calculate a score based on visit count
- const float visit_numerator = match.url_info.visit_count();
- float visit_denominator = 0.0f;
- for (history::HistoryMatches::const_iterator it = matches.begin();
- it != matches.end(); ++it) {
- visit_denominator += it->url_info.visit_count();
- }
- const float visit_score = (visit_denominator > 0.0f) ?
- (visit_numerator / visit_denominator) : 0.0f;
-
- // Calculate a score based on innermost matching.
- const float innermost_score = (match.innermost_match ? 1.0f : 0.0f);
-
- // TODO(dominich): Add a boost for bookmarked pages?
- // Prefer typed count to visit count as:
- // - It's a better indicator of what the user wants to open given that they
- // are typing in the address bar (users tend to open certain URLs by typing
- // and others by e.g. bookmarks, so visit_count is a good indicator of
- // overall interest but a bad one for specifically omnibox interest).
- // - Since the DB query is sorted by typed_count, the results may be
- // effectively a random selection as far as visit_counts are concerned
- // (meaning many high-visit_count-URLs may be present in one query and
- // absent in a similar one), leading to wild swings in confidence for the
- // same result across distinct queries.
- // Add a boost for innermost matches (matches after scheme or 'www.').
- return (0.5f * typed_score) + (0.3f * visit_score) + (0.2f * innermost_score);
-}
-
-// static
-void HistoryURLProvider::PromoteOrCreateShorterSuggestion(
- history::URLDatabase* db,
- const HistoryURLProviderParams& params,
- bool have_what_you_typed_match,
- const AutocompleteMatch& what_you_typed_match,
- HistoryMatches* matches) {
- if (matches->empty())
- return; // No matches, nothing to do.
-
- // Determine the base URL from which to search, and whether that URL could
- // itself be added as a match. We can add the base iff it's not "effectively
- // the same" as any "what you typed" match.
- const HistoryMatch& match = matches->front();
- GURL search_base = history::ConvertToHostOnly(match, params.input.text());
- bool can_add_search_base_to_matches = !have_what_you_typed_match;
- if (search_base.is_empty()) {
- // Search from what the user typed when we couldn't reduce the best match
- // to a host. Careful: use a substring of |match| here, rather than the
- // first match in |params|, because they might have different prefixes. If
- // the user typed "google.com", |what_you_typed_match| will hold
- // "http://google.com/", but |match| might begin with
- // "http://www.google.com/".
- // TODO: this should be cleaned up, and is probably incorrect for IDN.
- std::string new_match = match.url_info.url().possibly_invalid_spec().
- substr(0, match.input_location + params.input.text().length());
- search_base = GURL(new_match);
- // TODO(mrossetti): There is a degenerate case where the following may
- // cause a failure: http://www/~someword/fubar.html. Diagnose.
- // See: http://crbug.com/50101
- if (search_base.is_empty())
- return; // Can't construct a valid URL from which to start a search.
- } else if (!can_add_search_base_to_matches) {
- can_add_search_base_to_matches =
- (search_base != what_you_typed_match.destination_url);
- }
- if (search_base == match.url_info.url())
- return; // Couldn't shorten |match|, so no range of URLs to search over.
-
- // Search the DB for short URLs between our base and |match|.
- history::URLRow info(search_base);
- bool promote = true;
- // A short URL is only worth suggesting if it's been visited at least a third
- // as often as the longer URL.
- const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1;
- // For stability between the in-memory and on-disk autocomplete passes, when
- // the long URL has been typed before, only suggest shorter URLs that have
- // also been typed. Otherwise, the on-disk pass could suggest a shorter URL
- // (which hasn't been typed) that the in-memory pass doesn't know about,
- // thereby making the top match, and thus the behavior of inline
- // autocomplete, unstable.
- const int min_typed_count = match.url_info.typed_count() ? 1 : 0;
- if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(),
- match.url_info.url().possibly_invalid_spec(), min_visit_count,
- min_typed_count, can_add_search_base_to_matches, &info)) {
- if (!can_add_search_base_to_matches)
- return; // Couldn't find anything and can't add the search base, bail.
-
- // Try to get info on the search base itself. Promote it to the top if the
- // original best match isn't good enough to autocomplete.
- db->GetRowForURL(search_base, &info);
- promote = match.url_info.typed_count() <= 1;
- }
-
- // Promote or add the desired URL to the list of matches.
- EnsureMatchPresent(info, match.input_location, match.match_in_scheme,
- matches, promote);
-}
-
-// static
-void HistoryURLProvider::EnsureMatchPresent(const history::URLRow& info,
- size_t input_location,
- bool match_in_scheme,
- HistoryMatches* matches,
- bool promote) {
- // |matches| may already have an entry for this.
- for (HistoryMatches::iterator i(matches->begin()); i != matches->end();
- ++i) {
- if (i->url_info.url() == info.url()) {
- // Rotate it to the front if the caller wishes.
- if (promote)
- std::rotate(matches->begin(), i, i + 1);
- return;
- }
- }
-
- // No entry, so create one.
- HistoryMatch match(info, input_location, match_in_scheme, true);
- if (promote)
- matches->push_front(match);
- else
- matches->push_back(match);
-}
-
-void HistoryURLProvider::RunAutocompletePasses(
- const AutocompleteInput& input,
- bool fixup_input_and_run_pass_1) {
- matches_.clear();
-
- if ((input.type() == AutocompleteInput::INVALID) ||
- (input.type() == AutocompleteInput::FORCED_QUERY))
- return;
-
- // Create a match for exactly what the user typed. This will only be used as
- // a fallback in case we can't get the history service or URL DB; otherwise,
- // we'll run this again in DoAutocomplete() and use that result instead.
- const bool trim_http = !HasHTTPScheme(input.text());
- // Don't do this for queries -- while we can sometimes mark up a match for
- // this, it's not what the user wants, and just adds noise.
- if ((input.type() != AutocompleteInput::QUERY) &&
- input.canonicalized_url().is_valid())
- matches_.push_back(SuggestExactInput(input, trim_http));
-
- // We'll need the history service to run both passes, so try to obtain it.
- if (!profile_)
- return;
- HistoryService* const history_service =
- profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
- if (!history_service)
- return;
-
- // Create the data structure for the autocomplete passes. We'll save this off
- // onto the |params_| member for later deletion below if we need to run pass
- // 2.
- std::string languages(languages_);
- if (languages.empty()) {
- languages =
- profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
- }
- scoped_ptr<HistoryURLProviderParams> params(
- new HistoryURLProviderParams(input, trim_http, languages));
-
- params->prevent_inline_autocomplete =
- PreventInlineAutocomplete(input);
-
- if (fixup_input_and_run_pass_1) {
- // Do some fixup on the user input before matching against it, so we provide
- // good results for local file paths, input with spaces, etc.
- // NOTE: This purposefully doesn't take input.desired_tld() into account; if
- // it did, then holding "ctrl" would change all the results from the
- // HistoryURLProvider provider, not just the What You Typed Result.
- const string16 fixed_text(FixupUserInput(input));
- if (fixed_text.empty()) {
- // Conceivably fixup could result in an empty string (although I don't
- // have cases where this happens offhand). We can't do anything with
- // empty input, so just bail; otherwise we'd crash later.
- return;
- }
- params->input.set_text(fixed_text);
-
- // Pass 1: Get the in-memory URL database, and use it to find and promote
- // the inline autocomplete match, if any.
- history::URLDatabase* url_db = history_service->InMemoryDatabase();
- // url_db can be NULL if it hasn't finished initializing (or failed to
- // initialize). In this case all we can do is fall back on the second
- // pass.
- //
- // TODO(pkasting): We should just block here until this loads. Any time
- // someone unloads the history backend, we'll get inconsistent inline
- // autocomplete behavior here.
- if (url_db) {
- DoAutocomplete(NULL, url_db, params.get());
- // params->matches now has the matches we should expose to the provider.
- // Pass 2 expects a "clean slate" set of matches.
- matches_.clear();
- matches_.swap(params->matches);
- UpdateStarredStateOfMatches();
- }
- }
-
- // Pass 2: Ask the history service to call us back on the history thread,
- // where we can read the full on-disk DB.
- if (input.matches_requested() == AutocompleteInput::ALL_MATCHES) {
- done_ = false;
- params_ = params.release(); // This object will be destroyed in
- // QueryComplete() once we're done with it.
- history_service->ScheduleAutocomplete(this, params_);
- }
-}
-
-const history::Prefix* HistoryURLProvider::BestPrefix(
- const GURL& url,
- const string16& prefix_suffix) const {
- const Prefix* best_prefix = NULL;
- const string16 text(UTF8ToUTF16(url.spec()));
- for (Prefixes::const_iterator i(prefixes_.begin()); i != prefixes_.end();
- ++i) {
- if ((best_prefix == NULL) ||
- (i->num_components > best_prefix->num_components)) {
- string16 prefix_with_suffix(i->prefix + prefix_suffix);
- if ((text.length() >= prefix_with_suffix.length()) &&
- !text.compare(0, prefix_with_suffix.length(), prefix_with_suffix))
- best_prefix = &(*i);
- }
- }
- return best_prefix;
-}
-
-void HistoryURLProvider::SortMatches(HistoryMatches* matches) const {
+void HistoryURLProvider::SortMatches(history::HistoryMatches* matches) const {
// Sort by quality, best first.
- std::sort(matches->begin(), matches->end(), &history::CompareHistoryMatch);
+ std::sort(matches->begin(), matches->end(), &CompareHistoryMatch);
// Remove duplicate matches (caused by the search string appearing in one of
// the prefixes as well as after it). Consider the following scenario:
@@ -743,8 +759,8 @@ void HistoryURLProvider::SortMatches(HistoryMatches* matches) const {
// we use an index instead of an iterator in the outer loop, and don't
// precalculate the ending position.
for (size_t i = 0; i < matches->size(); ++i) {
- HistoryMatches::iterator j(matches->begin() + i + 1);
- while (j != matches->end()) {
+ for (history::HistoryMatches::iterator j(matches->begin() + i + 1);
+ j != matches->end(); ) {
if ((*matches)[i].url_info.url() == j->url_info.url())
j = matches->erase(j);
else
@@ -753,9 +769,11 @@ void HistoryURLProvider::SortMatches(HistoryMatches* matches) const {
}
}
-void HistoryURLProvider::CullPoorMatches(HistoryMatches* matches) const {
+void HistoryURLProvider::CullPoorMatches(
+ history::HistoryMatches* matches) const {
const base::Time& threshold(history::AutocompleteAgeThreshold());
- for (HistoryMatches::iterator i(matches->begin()); i != matches->end();) {
+ for (history::HistoryMatches::iterator i(matches->begin());
+ i != matches->end();) {
if (RowQualifiesAsSignificant(i->url_info, threshold))
++i;
else
@@ -764,7 +782,7 @@ void HistoryURLProvider::CullPoorMatches(HistoryMatches* matches) const {
}
void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend,
- HistoryMatches* matches,
+ history::HistoryMatches* matches,
size_t max_results) const {
for (size_t source = 0;
(source < matches->size()) && (source < max_results); ) {
@@ -794,38 +812,37 @@ void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend,
}
size_t HistoryURLProvider::RemoveSubsequentMatchesOf(
- HistoryMatches* matches,
+ history::HistoryMatches* matches,
size_t source_index,
const std::vector<GURL>& remove) const {
size_t next_index = source_index + 1; // return value = item after source
// Find the first occurrence of any URL in the redirect chain. We want to
// keep this one since it is rated the highest.
- HistoryMatches::iterator first(std::find_first_of(
+ history::HistoryMatches::iterator first(std::find_first_of(
matches->begin(), matches->end(), remove.begin(), remove.end()));
- DCHECK(first != matches->end()) <<
- "We should have always found at least the original URL.";
+ DCHECK(first != matches->end()) << "We should have always found at least the "
+ "original URL.";
// Find any following occurrences of any URL in the redirect chain, these
// should be deleted.
- HistoryMatches::iterator next(first);
- next++; // Start searching immediately after the one we found already.
- while (next != matches->end() &&
- (next = std::find_first_of(next, matches->end(), remove.begin(),
- remove.end())) != matches->end()) {
+ for (history::HistoryMatches::iterator next(std::find_first_of(first + 1,
+ matches->end(), remove.begin(), remove.end()));
+ next != matches->end(); next = std::find_first_of(next, matches->end(),
+ remove.begin(), remove.end())) {
// Remove this item. When we remove an item before the source index, we
// need to shift it to the right and remember that so we can return it.
next = matches->erase(next);
if (static_cast<size_t>(next - matches->begin()) < next_index)
- next_index--;
+ --next_index;
}
return next_index;
}
AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch(
HistoryURLProviderParams* params,
- const HistoryMatch& history_match,
- const HistoryMatches& history_matches,
+ const history::HistoryMatch& history_match,
+ const history::HistoryMatches& history_matches,
MatchType match_type,
size_t match_number) {
const history::URLRow& info = history_match.url_info;