summaryrefslogtreecommitdiffstats
path: root/chrome/browser/search_engines/template_url.h
blob: e81d6716b05a2bfd3be170104e133429dc79daee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
#define CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_

#include <string>
#include <vector>

#include "base/gtest_prod_util.h"
#include "base/time.h"
#include "chrome/browser/search_engines/template_url_id.h"
#include "googleurl/src/gurl.h"
#include "googleurl/src/url_parse.h"

class Profile;
class SearchTermsData;
class TemplateURL;


// TemplateURLRef -------------------------------------------------------------

// A TemplateURLRef represents a single URL within the larger TemplateURL class
// (which represents an entire "search engine", see below).  If
// SupportsReplacement() is true, this URL has placeholders in it, for which
// callers can substitute values to get a "real" URL using ReplaceSearchTerms().
//
// TemplateURLRefs always have a non-NULL |owner_| TemplateURL, which they
// access in order to get at important data like the underlying URL string or
// the associated Profile.
class TemplateURLRef {
 public:
  // Magic numbers to pass to ReplaceSearchTerms() for the |accepted_suggestion|
  // parameter.  Most callers aren't using Suggest capabilities and should just
  // pass NO_SUGGESTIONS_AVAILABLE.
  // NOTE: Because positive values are meaningful, make sure these are negative!
  enum AcceptedSuggestion {
    NO_SUGGESTION_CHOSEN = -1,
    NO_SUGGESTIONS_AVAILABLE = -2,
  };

  // Which kind of URL within our owner we are.  This allows us to get at the
  // correct string field. Use |INDEXED| to indicate that the numerical
  // |index_in_owner_| should be used instead.
  enum Type {
    SEARCH,
    SUGGEST,
    INSTANT,
    INDEXED
  };

  // This struct encapsulates arguments passed to
  // TemplateURLRef::ReplaceSearchTerms methods.  By default, only search_terms
  // is required and is passed in the constructor.
  struct SearchTermsArgs {
    explicit SearchTermsArgs(const string16& search_terms);

    // The search terms (query).
    const string16 search_terms;
    // The original (input) query.
    string16 original_query;
    // The optional assisted query stats, aka AQS, used for logging purposes.
    // This string contains impressions of all autocomplete matches shown
    // at the query submission time.  For privacy reasons, we require the
    // search provider to support HTTPS protocol in order to receive the AQS
    // param.
    // For more details, see http://goto.google.com/binary-clients-logging .
    std::string assisted_query_stats;

    // TODO: Remove along with "aq" CGI param.
    int accepted_suggestion;

    // The 0-based position of the cursor within the query string at the time
    // the request was issued.  Set to string16::npos if not used.
    size_t cursor_position;
  };

  TemplateURLRef(TemplateURL* owner, Type type);
  TemplateURLRef(TemplateURL* owner, size_t index_in_owner);
  ~TemplateURLRef();

  // Returns the raw URL. None of the parameters will have been replaced.
  std::string GetURL() const;

  // Returns true if this URL supports replacement.
  bool SupportsReplacement() const;

  // Like SupportsReplacement but usable on threads other than the UI thread.
  bool SupportsReplacementUsingTermsData(
      const SearchTermsData& search_terms_data) const;

  // Returns a string that is the result of replacing the search terms in
  // the url with the specified arguments.  We use our owner's input encoding.
  //
  // If this TemplateURLRef does not support replacement (SupportsReplacement
  // returns false), an empty string is returned.
  std::string ReplaceSearchTerms(
      const SearchTermsArgs& search_terms_args) const;

  // Just like ReplaceSearchTerms except that it takes SearchTermsData to supply
  // the data for some search terms. Most of the time ReplaceSearchTerms should
  // be called.
  std::string ReplaceSearchTermsUsingTermsData(
      const SearchTermsArgs& search_terms_args,
      const SearchTermsData& search_terms_data) const;

  // Returns true if the TemplateURLRef is valid. An invalid TemplateURLRef is
  // one that contains unknown terms, or invalid characters.
  bool IsValid() const;

  // Like IsValid but usable on threads other than the UI thread.
  bool IsValidUsingTermsData(const SearchTermsData& search_terms_data) const;

  // Returns a string representation of this TemplateURLRef suitable for
  // display. The display format is the same as the format used by Firefox.
  string16 DisplayURL() const;

  // Converts a string as returned by DisplayURL back into a string as
  // understood by TemplateURLRef.
  static std::string DisplayURLToURLRef(const string16& display_url);

  // If this TemplateURLRef is valid and contains one search term, this returns
  // the scheme/host/path of the URL, otherwise this returns an empty string.
  const std::string& GetScheme() const;
  const std::string& GetHost() const;
  const std::string& GetPath() const;

  // If this TemplateURLRef is valid and contains one search term, this returns
  // the key of the search term, otherwise this returns an empty string.
  const std::string& GetSearchTermKey() const;

  // Converts the specified term in our owner's encoding to a string16.
  string16 SearchTermToString16(const std::string& term) const;

  // Returns true if this TemplateURLRef has a replacement term of
  // {google:baseURL} or {google:baseSuggestURL}.
  bool HasGoogleBaseURLs() const;

  // Use the pattern referred to by this TemplateURLRef to match the provided
  // |url| and extract |search_terms| from it. Returns true if the pattern
  // matches, even if |search_terms| is empty. Returns false and an empty
  // |search_terms| if the pattern does not match.
  bool ExtractSearchTermsFromURL(const GURL& url,
                                 string16* search_terms) const;

 private:
  friend class TemplateURL;
  FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, SetPrepopulatedAndParse);
  FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterKnown);
  FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterUnknown);
  FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLEmpty);
  FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoTemplateEnd);
  FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoKnownParameters);
  FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLTwoParameters);
  FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNestedParameter);

  // Enumeration of the known types.
  enum ReplacementType {
    ENCODING,
    GOOGLE_ACCEPTED_SUGGESTION,
    GOOGLE_ASSISTED_QUERY_STATS,
    GOOGLE_BASE_URL,
    GOOGLE_BASE_SUGGEST_URL,
    GOOGLE_CURSOR_POSITION,
    GOOGLE_INSTANT_ENABLED,
    GOOGLE_INSTANT_EXTENDED_ENABLED,
    GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION,
    GOOGLE_RLZ,
    GOOGLE_SEARCH_CLIENT,
    GOOGLE_SEARCH_FIELDTRIAL_GROUP,
    GOOGLE_UNESCAPED_SEARCH_TERMS,
    LANGUAGE,
    SEARCH_TERMS,
  };

  // Used to identify an element of the raw url that can be replaced.
  struct Replacement {
    Replacement(ReplacementType type, size_t index)
        : type(type), index(index) {}
    ReplacementType type;
    size_t index;
  };

  // The list of elements to replace.
  typedef std::vector<struct Replacement> Replacements;

  // TemplateURLRef internally caches values to make replacement quick. This
  // method invalidates any cached values.
  void InvalidateCachedValues() const;

  // Parses the parameter in url at the specified offset. start/end specify the
  // range of the parameter in the url, including the braces. If the parameter
  // is valid, url is updated to reflect the appropriate parameter. If
  // the parameter is one of the known parameters an element is added to
  // replacements indicating the type and range of the element. The original
  // parameter is erased from the url.
  //
  // If the parameter is not a known parameter, false is returned. If this is a
  // prepopulated URL, the parameter is erased, otherwise it is left alone.
  bool ParseParameter(size_t start,
                      size_t end,
                      std::string* url,
                      Replacements* replacements) const;

  // Parses the specified url, replacing parameters as necessary. If
  // successful, valid is set to true, and the parsed url is returned. For all
  // known parameters that are encountered an entry is added to replacements.
  // If there is an error parsing the url, valid is set to false, and an empty
  // string is returned.
  std::string ParseURL(const std::string& url,
                       Replacements* replacements,
                       bool* valid) const;

  // If the url has not yet been parsed, ParseURL is invoked.
  // NOTE: While this is const, it modifies parsed_, valid_, parsed_url_ and
  // search_offset_.
  void ParseIfNecessary() const;

  // Like ParseIfNecessary but usable on threads other than the UI thread.
  void ParseIfNecessaryUsingTermsData(
      const SearchTermsData& search_terms_data) const;

  // Extracts the query key and host from the url.
  void ParseHostAndSearchTermKey(
      const SearchTermsData& search_terms_data) const;

  // The TemplateURL that contains us.  This should outlive us.
  TemplateURL* const owner_;

  // What kind of URL we are.
  const Type type_;

  // If |type_| is |INDEXED|, this |index_in_owner_| is used instead to refer to
  // a url within our owner.
  const size_t index_in_owner_;

  // Whether the URL has been parsed.
  mutable bool parsed_;

  // Whether the url was successfully parsed.
  mutable bool valid_;

  // The parsed URL. All terms have been stripped out of this with
  // replacements_ giving the index of the terms to replace.
  mutable std::string parsed_url_;

  // Do we support replacement?
  mutable bool supports_replacements_;

  // The replaceable parts of url (parsed_url_). These are ordered by index
  // into the string, and may be empty.
  mutable Replacements replacements_;

  // Scheme, host, path, key and location of the search term. These are only set
  // if the url contains one search term.
  mutable std::string scheme_;
  mutable std::string host_;
  mutable std::string path_;
  mutable std::string search_term_key_;
  mutable url_parse::Parsed::ComponentType search_term_key_location_;

  // Whether the contained URL is a pre-populated URL.
  bool prepopulated_;

  DISALLOW_COPY_AND_ASSIGN(TemplateURLRef);
};


// TemplateURLData ------------------------------------------------------------

// The data for the TemplateURL.  Separating this into its own class allows most
// users to do SSA-style usage of TemplateURL: construct a TemplateURLData with
// whatever fields are desired, then create an immutable TemplateURL from it.
struct TemplateURLData {
  TemplateURLData();
  ~TemplateURLData();

  // A short description of the template. This is the name we show to the user
  // in various places that use TemplateURLs. For example, the location bar
  // shows this when the user selects a substituting match.
  string16 short_name;

  // The shortcut for this TemplateURL.  |keyword| must be non-empty.
  void SetKeyword(const string16& keyword);
  const string16& keyword() const { return keyword_; }

  // The raw URL for the TemplateURL, which may not be valid as-is (e.g. because
  // it requires substitutions first).  This must be non-empty.
  void SetURL(const std::string& url);
  const std::string& url() const { return url_; }

  // Optional additional raw URLs.
  std::string suggestions_url;
  std::string instant_url;

  // Optional favicon for the TemplateURL.
  GURL favicon_url;

  // URL to the OSD file this came from. May be empty.
  GURL originating_url;

  // Whether this TemplateURL is shown in the default list of search providers.
  // This is just a property and does not indicate whether the TemplateURL has a
  // TemplateURLRef that supports replacement. Use
  // TemplateURL::ShowInDefaultList() to test both.
  bool show_in_default_list;

  // Whether it's safe for auto-modification code (the autogenerator and the
  // code that imports data from other browsers) to replace the TemplateURL.
  // This should be set to false for any TemplateURL the user edits, or any
  // TemplateURL that the user clearly manually edited in the past, like a
  // bookmark keyword from another browser.
  bool safe_for_autoreplace;

  // The list of supported encodings for the search terms. This may be empty,
  // which indicates the terms should be encoded with UTF-8.
  std::vector<std::string> input_encodings;

  // Unique identifier of this TemplateURL. The unique ID is set by the
  // TemplateURLService when the TemplateURL is added to it.
  TemplateURLID id;

  // Date this TemplateURL was created.
  //
  // NOTE: this may be 0, which indicates the TemplateURL was created before we
  // started tracking creation time.
  base::Time date_created;

  // The last time this TemplateURL was modified by a user, since creation.
  //
  // NOTE: Like date_created above, this may be 0.
  base::Time last_modified;

  // True if this TemplateURL was automatically created by the administrator via
  // group policy.
  bool created_by_policy;

  // Number of times this TemplateURL has been explicitly used to load a URL.
  // We don't increment this for uses as the "default search engine" since
  // that's not really "explicit" usage and incrementing would result in pinning
  // the user's default search engine(s) to the top of the list of searches on
  // the New Tab page, de-emphasizing the omnibox as "where you go to search".
  int usage_count;

  // If this TemplateURL comes from prepopulated data the prepopulate_id is > 0.
  int prepopulate_id;

  // The primary unique identifier for Sync. This set on all TemplateURLs
  // regardless of whether they have been associated with Sync.
  std::string sync_guid;

  // A list of URL patterns that can be used, in addition to |url_|, to extract
  // search terms from a URL.
  std::vector<std::string> alternate_urls;

  // A parameter that, if present in the query or ref parameters of a search_url
  // or instant_url, causes Chrome to replace the URL with the search term.
  std::string search_terms_replacement_key;

 private:
  // Private so we can enforce using the setters and thus enforce that these
  // fields are never empty.
  string16 keyword_;
  std::string url_;
};


// TemplateURL ----------------------------------------------------------------

// A TemplateURL represents a single "search engine", defined primarily as a
// subset of the Open Search Description Document
// (http://www.opensearch.org/Specifications/OpenSearch) plus some extensions.
// One TemplateURL contains several TemplateURLRefs, which correspond to various
// different capabilities (e.g. doing searches or getting suggestions), as well
// as a TemplateURLData containing other details like the name, keyword, etc.
//
// TemplateURLs are intended to be read-only for most users; the only public
// non-const method is the Profile getter, which returns a non-const Profile*.
// The TemplateURLService, which handles storing and manipulating TemplateURLs,
// is made a friend so that it can be the exception to this pattern.
class TemplateURL {
 public:
  // |profile| may be NULL.  This will affect the results of e.g. calling
  // ReplaceSearchTerms() on the member TemplateURLRefs.
  TemplateURL(Profile* profile, const TemplateURLData& data);
  ~TemplateURL();

  // Generates a favicon URL from the specified url.
  static GURL GenerateFaviconURL(const GURL& url);

  Profile* profile() { return profile_; }
  const TemplateURLData& data() const { return data_; }

  const string16& short_name() const { return data_.short_name; }
  // An accessor for the short_name, but adjusted so it can be appropriately
  // displayed even if it is LTR and the UI is RTL.
  string16 AdjustedShortNameForLocaleDirection() const;

  const string16& keyword() const { return data_.keyword(); }

  const std::string& url() const { return data_.url(); }
  const std::string& suggestions_url() const { return data_.suggestions_url; }
  const std::string& instant_url() const { return data_.instant_url; }
  const std::vector<std::string>& alternate_urls() const {
    return data_.alternate_urls;
  }
  const GURL& favicon_url() const { return data_.favicon_url; }

  const GURL& originating_url() const { return data_.originating_url; }

  bool show_in_default_list() const { return data_.show_in_default_list; }
  // Returns true if show_in_default_list() is true and this TemplateURL has a
  // TemplateURLRef that supports replacement.
  bool ShowInDefaultList() const;

  bool safe_for_autoreplace() const { return data_.safe_for_autoreplace; }

  const std::vector<std::string>& input_encodings() const {
    return data_.input_encodings;
  }

  TemplateURLID id() const { return data_.id; }

  base::Time date_created() const { return data_.date_created; }
  base::Time last_modified() const { return data_.last_modified; }

  bool created_by_policy() const { return data_.created_by_policy; }

  int usage_count() const { return data_.usage_count; }

  int prepopulate_id() const { return data_.prepopulate_id; }

  const std::string& sync_guid() const { return data_.sync_guid; }

  // TODO(beaudoin): Rename this when renaming HasSearchTermsReplacementKey().
  const std::string& search_terms_replacement_key() const {
    return data_.search_terms_replacement_key;
  }

  const TemplateURLRef& url_ref() const { return url_ref_; }
  const TemplateURLRef& suggestions_url_ref() const {
    return suggestions_url_ref_;
  }
  const TemplateURLRef& instant_url_ref() const { return instant_url_ref_; }

  // Returns true if |url| supports replacement.
  bool SupportsReplacement() const;

  // Like SupportsReplacement but usable on threads other than the UI thread.
  bool SupportsReplacementUsingTermsData(
      const SearchTermsData& search_terms_data) const;

  // Returns true if this TemplateURL uses Google base URLs and has a keyword
  // of "google.TLD".  We use this to decide whether we can automatically
  // update the keyword to reflect the current Google base URL TLD.
  bool IsGoogleSearchURLWithReplaceableKeyword() const;

  // Returns true if the keywords match or if
  // IsGoogleSearchURLWithReplaceableKeyword() is true for both TemplateURLs.
  bool HasSameKeywordAs(const TemplateURL& other) const;

  std::string GetExtensionId() const;
  bool IsExtensionKeyword() const;

  // Returns the total number of URLs comprised in this template, including
  // search and alternate URLs.
  size_t URLCount() const;

  // Gets the search URL at the given index. The alternate URLs, if any, are
  // numbered starting at 0, and the primary search URL follows. This is used
  // to decode the search term given a search URL (see
  // ExtractSearchTermsFromURL()).
  const std::string& GetURL(size_t index) const;

  // Use the alternate URLs and the search URL to match the provided |url|
  // and extract |search_terms| from it. Returns false and an empty
  // |search_terms| if no search terms can be matched. The order in which the
  // alternate URLs are listed dictates their priority, the URL at index 0 is
  // treated as the highest priority and the primary search URL is treated as
  // the lowest priority (see GetURL()).  For example, if a TemplateURL has
  // alternate URL "http://foo/#q={searchTerms}" and search URL
  // "http://foo/?q={searchTerms}", and the URL to be decoded is
  // "http://foo/?q=a#q=b", the alternate URL will match first and the decoded
  // search term will be "b".
  bool ExtractSearchTermsFromURL(const GURL& url, string16* search_terms);

  // Returns true if the specified |url| contains the search terms replacement
  // key in either the query or the ref. This method does not verify anything
  // else about the URL. In particular, it does not check that the domain
  // matches that of this TemplateURL.
  // TODO(beaudoin): Rename this to reflect that it really checks for an
  // InstantExtended capable URL.
  bool HasSearchTermsReplacementKey(const GURL& url) const;

  // Returns true if the specified |url| matches the search, alternates, or
  // instant url in scheme, domain, and path.  In addition, the search term
  // replacement key must be present in the |url|.
  // This predicate is used for site isolation purposes, so has security
  // implications.  Seek security review if changing it.
  bool IsInstantURL(const GURL& url);

 private:
  friend class TemplateURLService;

  void CopyFrom(const TemplateURL& other);

  void SetURL(const std::string& url);
  void SetPrepopulateId(int id);

  // Resets the keyword if IsGoogleSearchURLWithReplaceableKeyword() or |force|.
  // The |force| parameter is useful when the existing keyword is known to be
  // a placeholder.  The resulting keyword is generated using
  // TemplateURLService::GenerateSearchURL() and
  // TemplateURLService::GenerateKeyword().
  void ResetKeywordIfNecessary(bool force);

  Profile* profile_;
  TemplateURLData data_;
  TemplateURLRef url_ref_;
  TemplateURLRef suggestions_url_ref_;
  TemplateURLRef instant_url_ref_;

  // TODO(sky): Add date last parsed OSD file.

  DISALLOW_COPY_AND_ASSIGN(TemplateURL);
};

#endif  // CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_