summaryrefslogtreecommitdiffstats
path: root/components/url_formatter
diff options
context:
space:
mode:
authorjochen <jochen@chromium.org>2015-08-04 00:05:17 -0700
committerCommit bot <commit-bot@chromium.org>2015-08-04 07:05:48 +0000
commit90437dd218ba09f01612ebfc47eda167d67fb368 (patch)
tree77d2fe1ff3c1830af96a8e03c32419c5b5634522 /components/url_formatter
parent31ad3e6a80c367c4f8ea0a921e8f2cfe555a5fd6 (diff)
downloadchromium_src-90437dd218ba09f01612ebfc47eda167d67fb368.zip
chromium_src-90437dd218ba09f01612ebfc47eda167d67fb368.tar.gz
chromium_src-90437dd218ba09f01612ebfc47eda167d67fb368.tar.bz2
Revert of Move net::FormatUrl and friends outside of //net and into //components (patchset #16 id:290001 of https://codereview.chromium.org/1171333003/ )
Reason for revert: breaks gn_check on Android: https://build.chromium.org/p/chromium.linux/builders/Android%20GN/builds/28796/steps/gn_check/logs/stdio Original issue's description: > Move net::FormatUrl and friends outside of //net and into //components > > net::FormatUrl and related are specifically concerned with display > policies of URLs, which is not something that //net needs to be aware > of, as that's a UX question. > > This folds in net::FormatURL along with the existing //components/url_fixer > and //components/secure_display into a common component, > //components/url_formatter, that handles reformatting URLs for user-friendly > or data storage (url_formatter), for use in security prompts (elide_url), > or for reformatting URLs from user input (url_fixer) > > (Disabling presubmit since this is intentionally not fixing a legacy API, just moving it for future cleanups) > > BUG=486979 > NOPRESUBMIT=true > > Committed: https://crrev.com/1659865c3eb47166c82378bb840801135b057a09 > Cr-Commit-Position: refs/heads/master@{#341605} TBR=droger@chromium.org,jam@chromium.org,mkwst@chromium.org,pkasting@chromium.org,sky@chromium.org,stuartmorgan@chromium.org,felt@chromium.org,rsleevi@chromium.org NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=486979 Review URL: https://codereview.chromium.org/1260033005 Cr-Commit-Position: refs/heads/master@{#341691}
Diffstat (limited to 'components/url_formatter')
-rw-r--r--components/url_formatter/BUILD.gn51
-rw-r--r--components/url_formatter/DEPS11
-rw-r--r--components/url_formatter/OWNERS9
-rw-r--r--components/url_formatter/elide_url.cc353
-rw-r--r--components/url_formatter/elide_url.h72
-rw-r--r--components/url_formatter/elide_url_unittest.cc324
-rw-r--r--components/url_formatter/url_fixer.cc673
-rw-r--r--components/url_formatter/url_fixer.h87
-rw-r--r--components/url_formatter/url_fixer_unittest.cc537
-rw-r--r--components/url_formatter/url_formatter.cc807
-rw-r--r--components/url_formatter/url_formatter.gyp39
-rw-r--r--components/url_formatter/url_formatter.h155
-rw-r--r--components/url_formatter/url_formatter_unittest.cc978
13 files changed, 0 insertions, 4096 deletions
diff --git a/components/url_formatter/BUILD.gn b/components/url_formatter/BUILD.gn
deleted file mode 100644
index 6a35fb3..0000000
--- a/components/url_formatter/BUILD.gn
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2015 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-static_library("url_formatter") {
- sources = [
- "elide_url.cc",
- "elide_url.h",
- "url_fixer.cc",
- "url_fixer.h",
- "url_formatter.cc",
- "url_formatter.h",
- ]
-
- # TODO(jschuh): crbug.com/167187 fix size_t to int truncations.
- configs += [ "//build/config/compiler:no_size_t_to_int_warning" ]
-
- deps = [
- "//base",
- "//third_party/icu",
- "//net",
- "//ui/gfx",
- "//url",
- ]
-
- if (is_android) {
- deps -= [ "//ui/gfx" ]
- }
-}
-
-source_set("unit_tests") {
- testonly = true
- sources = [
- "elide_url_unittest.cc",
- "url_fixer_unittest.cc",
- "url_formatter_unittest.cc",
- ]
-
- deps = [
- "//base",
- "//net",
- "//testing/gtest",
- "//ui/gfx",
- "//url",
- ":url_formatter",
- ]
-
- if (is_android) {
- deps -= [ "//ui/gfx" ]
- }
-}
diff --git a/components/url_formatter/DEPS b/components/url_formatter/DEPS
deleted file mode 100644
index 3c1754f..0000000
--- a/components/url_formatter/DEPS
+++ /dev/null
@@ -1,11 +0,0 @@
-include_rules = [
- # This is a shared component (Mandoline, iOS, content), and as such, MUST NOT
- # depend on content or other components that do.
- "-components/html_viewer",
- "-content",
- "-ios",
- "-mandoline",
-
- "+net",
- "+ui/gfx",
-]
diff --git a/components/url_formatter/OWNERS b/components/url_formatter/OWNERS
deleted file mode 100644
index 49e5b76..0000000
--- a/components/url_formatter/OWNERS
+++ /dev/null
@@ -1,9 +0,0 @@
-pkasting@chromium.org
-
-# Backup reviewer
-brettw@chromium.org
-
-# Changes to FormatUrlForSecurityDisplay require a security review to avoid
-# introducing security bugs.
-per-file elide_url.*=palmer@chromium.org
-per-file elide_url.*=felt@chromium.org
diff --git a/components/url_formatter/elide_url.cc b/components/url_formatter/elide_url.cc
deleted file mode 100644
index 8d7a91f..0000000
--- a/components/url_formatter/elide_url.cc
+++ /dev/null
@@ -1,353 +0,0 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "components/url_formatter/elide_url.h"
-
-#include "base/logging.h"
-#include "base/strings/string_split.h"
-#include "base/strings/utf_string_conversions.h"
-#include "components/url_formatter/url_formatter.h"
-#include "net/base/escape.h"
-#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
-#include "ui/gfx/text_elider.h"
-#include "ui/gfx/text_utils.h"
-#include "url/gurl.h"
-#include "url/url_constants.h"
-
-using base::UTF8ToUTF16;
-using gfx::ElideText;
-using gfx::GetStringWidthF;
-using gfx::kEllipsisUTF16;
-using gfx::kForwardSlash;
-
-namespace {
-
-#if !defined(OS_ANDROID)
-const base::char16 kDot = '.';
-
-// Build a path from the first |num_components| elements in |path_elements|.
-// Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate.
-base::string16 BuildPathFromComponents(
- const base::string16& path_prefix,
- const std::vector<base::string16>& path_elements,
- const base::string16& filename,
- size_t num_components) {
- // Add the initial elements of the path.
- base::string16 path = path_prefix;
-
- // Build path from first |num_components| elements.
- for (size_t j = 0; j < num_components; ++j)
- path += path_elements[j] + kForwardSlash;
-
- // Add |filename|, ellipsis if necessary.
- if (num_components != (path_elements.size() - 1))
- path += base::string16(kEllipsisUTF16) + kForwardSlash;
- path += filename;
-
- return path;
-}
-
-// Takes a prefix (Domain, or Domain+subdomain) and a collection of path
-// components and elides if possible. Returns a string containing the longest
-// possible elided path, or an empty string if elision is not possible.
-base::string16 ElideComponentizedPath(
- const base::string16& url_path_prefix,
- const std::vector<base::string16>& url_path_elements,
- const base::string16& url_filename,
- const base::string16& url_query,
- const gfx::FontList& font_list,
- float available_pixel_width) {
- const size_t url_path_number_of_elements = url_path_elements.size();
-
- CHECK(url_path_number_of_elements);
- for (size_t i = url_path_number_of_elements - 1; i > 0; --i) {
- base::string16 elided_path = BuildPathFromComponents(
- url_path_prefix, url_path_elements, url_filename, i);
- if (available_pixel_width >= GetStringWidthF(elided_path, font_list))
- return ElideText(elided_path + url_query, font_list,
- available_pixel_width, gfx::ELIDE_TAIL);
- }
-
- return base::string16();
-}
-
-// Splits the hostname in the |url| into sub-strings for the full hostname,
-// the domain (TLD+1), and the subdomain (everything leading the domain).
-void SplitHost(const GURL& url,
- base::string16* url_host,
- base::string16* url_domain,
- base::string16* url_subdomain) {
- // Get Host.
- *url_host = UTF8ToUTF16(url.host());
-
- // Get domain and registry information from the URL.
- *url_domain =
- UTF8ToUTF16(net::registry_controlled_domains::GetDomainAndRegistry(
- url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES));
- if (url_domain->empty())
- *url_domain = *url_host;
-
- // Add port if required.
- if (!url.port().empty()) {
- *url_host += UTF8ToUTF16(":" + url.port());
- *url_domain += UTF8ToUTF16(":" + url.port());
- }
-
- // Get sub domain.
- const size_t domain_start_index = url_host->find(*url_domain);
- base::string16 kWwwPrefix = UTF8ToUTF16("www.");
- if (domain_start_index != base::string16::npos)
- *url_subdomain = url_host->substr(0, domain_start_index);
- if ((*url_subdomain == kWwwPrefix || url_subdomain->empty() ||
- url.SchemeIsFile())) {
- url_subdomain->clear();
- }
-}
-
-#endif // !defined(OS_ANDROID)
-} // namespace
-
-namespace url_formatter {
-
-#if !defined(OS_ANDROID)
-
-// TODO(pkasting): http://crbug.com/77883 This whole function gets
-// kerning/ligatures/etc. issues potentially wrong by assuming that the width of
-// a rendered string is always the sum of the widths of its substrings. Also I
-// suspect it could be made simpler.
-base::string16 ElideUrl(const GURL& url,
- const gfx::FontList& font_list,
- float available_pixel_width,
- const std::string& languages) {
- // Get a formatted string and corresponding parsing of the url.
- url::Parsed parsed;
- const base::string16 url_string = url_formatter::FormatUrl(
- url, languages, url_formatter::kFormatUrlOmitAll,
- net::UnescapeRule::SPACES, &parsed, nullptr, nullptr);
- if (available_pixel_width <= 0)
- return url_string;
-
- // If non-standard, return plain eliding.
- if (!url.IsStandard())
- return ElideText(url_string, font_list, available_pixel_width,
- gfx::ELIDE_TAIL);
-
- // Now start eliding url_string to fit within available pixel width.
- // Fist pass - check to see whether entire url_string fits.
- const float pixel_width_url_string = GetStringWidthF(url_string, font_list);
- if (available_pixel_width >= pixel_width_url_string)
- return url_string;
-
- // Get the path substring, including query and reference.
- const size_t path_start_index = parsed.path.begin;
- const size_t path_len = parsed.path.len;
- base::string16 url_path_query_etc = url_string.substr(path_start_index);
- base::string16 url_path = url_string.substr(path_start_index, path_len);
-
- // Return general elided text if url minus the query fits.
- const base::string16 url_minus_query =
- url_string.substr(0, path_start_index + path_len);
- if (available_pixel_width >= GetStringWidthF(url_minus_query, font_list))
- return ElideText(url_string, font_list, available_pixel_width,
- gfx::ELIDE_TAIL);
-
- base::string16 url_host;
- base::string16 url_domain;
- base::string16 url_subdomain;
- SplitHost(url, &url_host, &url_domain, &url_subdomain);
-
- // If this is a file type, the path is now defined as everything after ":".
- // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the
- // domain is now C: - this is a nice hack for eliding to work pleasantly.
- if (url.SchemeIsFile()) {
- // Split the path string using ":"
- const base::string16 kColon(1, ':');
- std::vector<base::string16> file_path_split = base::SplitString(
- url_path, kColon, base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
- if (file_path_split.size() > 1) { // File is of type "file:///C:/.."
- url_host.clear();
- url_domain.clear();
- url_subdomain.clear();
-
- url_host = url_domain = file_path_split.at(0).substr(1) + kColon;
- url_path_query_etc = url_path = file_path_split.at(1);
- }
- }
-
- // Second Pass - remove scheme - the rest fits.
- const float pixel_width_url_host = GetStringWidthF(url_host, font_list);
- const float pixel_width_url_path =
- GetStringWidthF(url_path_query_etc, font_list);
- if (available_pixel_width >= pixel_width_url_host + pixel_width_url_path)
- return url_host + url_path_query_etc;
-
- // Third Pass: Subdomain, domain and entire path fits.
- const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list);
- const float pixel_width_url_subdomain =
- GetStringWidthF(url_subdomain, font_list);
- if (available_pixel_width >=
- pixel_width_url_subdomain + pixel_width_url_domain + pixel_width_url_path)
- return url_subdomain + url_domain + url_path_query_etc;
-
- // Query element.
- base::string16 url_query;
- const float kPixelWidthDotsTrailer =
- GetStringWidthF(base::string16(kEllipsisUTF16), font_list);
- if (parsed.query.is_nonempty()) {
- url_query = UTF8ToUTF16("?") + url_string.substr(parsed.query.begin);
- if (available_pixel_width >=
- (pixel_width_url_subdomain + pixel_width_url_domain +
- pixel_width_url_path - GetStringWidthF(url_query, font_list))) {
- return ElideText(url_subdomain + url_domain + url_path_query_etc,
- font_list, available_pixel_width, gfx::ELIDE_TAIL);
- }
- }
-
- // Parse url_path using '/'.
- std::vector<base::string16> url_path_elements =
- base::SplitString(url_path, base::string16(1, kForwardSlash),
- base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
-
- // Get filename - note that for a path ending with /
- // such as www.google.com/intl/ads/, the file name is ads/.
- base::string16 url_filename(
- url_path_elements.empty() ? base::string16() : url_path_elements.back());
- size_t url_path_number_of_elements = url_path_elements.size();
- if (url_filename.empty() && (url_path_number_of_elements > 1)) {
- // Path ends with a '/'.
- --url_path_number_of_elements;
- url_filename =
- url_path_elements[url_path_number_of_elements - 1] + kForwardSlash;
- }
-
- const size_t kMaxNumberOfUrlPathElementsAllowed = 1024;
- if (url_path_number_of_elements <= 1 ||
- url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) {
- // No path to elide, or too long of a path (could overflow in loop below)
- // Just elide this as a text string.
- return ElideText(url_subdomain + url_domain + url_path_query_etc, font_list,
- available_pixel_width, gfx::ELIDE_TAIL);
- }
-
- // Start eliding the path and replacing elements by ".../".
- const base::string16 kEllipsisAndSlash =
- base::string16(kEllipsisUTF16) + kForwardSlash;
- const float pixel_width_ellipsis_slash =
- GetStringWidthF(kEllipsisAndSlash, font_list);
-
- // Check with both subdomain and domain.
- base::string16 elided_path = ElideComponentizedPath(
- url_subdomain + url_domain, url_path_elements, url_filename, url_query,
- font_list, available_pixel_width);
- if (!elided_path.empty())
- return elided_path;
-
- // Check with only domain.
- // If a subdomain is present, add an ellipsis before domain.
- // This is added only if the subdomain pixel width is larger than
- // the pixel width of kEllipsis. Otherwise, subdomain remains,
- // which means that this case has been resolved earlier.
- base::string16 url_elided_domain = url_subdomain + url_domain;
- if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) {
- if (!url_subdomain.empty())
- url_elided_domain = kEllipsisAndSlash[0] + url_domain;
- else
- url_elided_domain = url_domain;
-
- elided_path = ElideComponentizedPath(url_elided_domain, url_path_elements,
- url_filename, url_query, font_list,
- available_pixel_width);
-
- if (!elided_path.empty())
- return elided_path;
- }
-
- // Return elided domain/.../filename anyway.
- base::string16 final_elided_url_string(url_elided_domain);
- const float url_elided_domain_width =
- GetStringWidthF(url_elided_domain, font_list);
-
- // A hack to prevent trailing ".../...".
- if ((available_pixel_width - url_elided_domain_width) >
- pixel_width_ellipsis_slash + kPixelWidthDotsTrailer +
- GetStringWidthF(base::ASCIIToUTF16("UV"), font_list)) {
- final_elided_url_string += BuildPathFromComponents(
- base::string16(), url_path_elements, url_filename, 1);
- } else {
- final_elided_url_string += url_path;
- }
-
- return ElideText(final_elided_url_string, font_list, available_pixel_width,
- gfx::ELIDE_TAIL);
-}
-
-base::string16 ElideHost(const GURL& url,
- const gfx::FontList& font_list,
- float available_pixel_width) {
- base::string16 url_host;
- base::string16 url_domain;
- base::string16 url_subdomain;
- SplitHost(url, &url_host, &url_domain, &url_subdomain);
-
- const float pixel_width_url_host = GetStringWidthF(url_host, font_list);
- if (available_pixel_width >= pixel_width_url_host)
- return url_host;
-
- if (url_subdomain.empty())
- return url_domain;
-
- const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list);
- float subdomain_width = available_pixel_width - pixel_width_url_domain;
- if (subdomain_width <= 0)
- return base::string16(kEllipsisUTF16) + kDot + url_domain;
-
- const base::string16 elided_subdomain =
- ElideText(url_subdomain, font_list, subdomain_width, gfx::ELIDE_HEAD);
- return elided_subdomain + url_domain;
-}
-
-#endif // !defined(OS_ANDROID)
-
-base::string16 FormatUrlForSecurityDisplay(const GURL& url,
- const std::string& languages) {
- if (!url.is_valid() || url.is_empty() || !url.IsStandard())
- return url_formatter::FormatUrl(url, languages);
-
- const base::string16 colon(base::ASCIIToUTF16(":"));
- const base::string16 scheme_separator(
- base::ASCIIToUTF16(url::kStandardSchemeSeparator));
-
- if (url.SchemeIsFile()) {
- return base::ASCIIToUTF16(url::kFileScheme) + scheme_separator +
- base::UTF8ToUTF16(url.path());
- }
-
- if (url.SchemeIsFileSystem()) {
- const GURL* inner_url = url.inner_url();
- if (inner_url->SchemeIsFile()) {
- return base::ASCIIToUTF16(url::kFileSystemScheme) + colon +
- FormatUrlForSecurityDisplay(*inner_url, languages) +
- base::UTF8ToUTF16(url.path());
- }
- return base::ASCIIToUTF16(url::kFileSystemScheme) + colon +
- FormatUrlForSecurityDisplay(*inner_url, languages);
- }
-
- const GURL origin = url.GetOrigin();
- const std::string& scheme = origin.scheme();
- const std::string& host = origin.host();
-
- base::string16 result = base::UTF8ToUTF16(scheme);
- result += scheme_separator;
- result += base::UTF8ToUTF16(host);
-
- const int port = origin.IntPort();
- const int default_port = url::DefaultPortForScheme(
- scheme.c_str(), static_cast<int>(scheme.length()));
- if (port != url::PORT_UNSPECIFIED && port != default_port)
- result += colon + base::UTF8ToUTF16(origin.port());
-
- return result;
-}
-} // namespace url_formatter
diff --git a/components/url_formatter/elide_url.h b/components/url_formatter/elide_url.h
deleted file mode 100644
index 528b20e..0000000
--- a/components/url_formatter/elide_url.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//
-// This file defines utility functions for eliding URLs.
-
-#ifndef COMPONENTS_URL_FORMATTER_ELIDE_URL_H_
-#define COMPONENTS_URL_FORMATTER_ELIDE_URL_H_
-
-#include <string>
-
-#include "base/strings/string16.h"
-
-class GURL;
-
-namespace gfx {
-class FontList;
-}
-
-namespace url_formatter {
-
-// ElideUrl and Elide host require
-// gfx::GetStringWidthF which is not implemented in Android
-#if !defined(OS_ANDROID)
-// This function takes a GURL object and elides it. It returns a string
-// which composed of parts from subdomain, domain, path, filename and query.
-// A "..." is added automatically at the end if the elided string is bigger
-// than the |available_pixel_width|. For |available_pixel_width| == 0, a
-// formatted, but un-elided, string is returned. |languages| is a comma
-// separated list of ISO 639 language codes and is used to determine what
-// characters are understood by a user. It should come from
-// |prefs::kAcceptLanguages|.
-//
-// Note: in RTL locales, if the URL returned by this function is going to be
-// displayed in the UI, then it is likely that the string needs to be marked
-// as an LTR string (using base::i18n::WrapStringWithLTRFormatting()) so that it
-// is displayed properly in an RTL context. Please refer to
-// http://crbug.com/6487 for more information.
-base::string16 ElideUrl(const GURL& url,
- const gfx::FontList& font_list,
- float available_pixel_width,
- const std::string& languages);
-
-// This function takes a GURL object and elides the host to fit within
-// the given width. The function will never elide past the TLD+1 point,
-// but after that, will leading-elide the domain name to fit the width.
-// Example: http://sub.domain.com ---> "...domain.com", or "...b.domain.com"
-// depending on the width.
-base::string16 ElideHost(const GURL& host_url,
- const gfx::FontList& font_list,
- float available_pixel_width);
-#endif // !defined(OS_ANDROID)
-
-// This is a convenience function for formatting a URL in a concise and
-// human-friendly way, to help users make security-related decisions (or in
-// other circumstances when people need to distinguish sites, origins, or
-// otherwise-simplified URLs from each other).
-//
-// Internationalized domain names (IDN) may be presented in Unicode if
-// |languages| accepts the Unicode representation (see
-// |url_formatter::FormatUrl| for more details on the algorithm).
-//
-// - Omits the path for standard schemes, excepting file and filesystem.
-// - Omits the port if it is the default for the scheme.
-//
-// Do not use this for URLs which will be parsed or sent to other applications.
-base::string16 FormatUrlForSecurityDisplay(const GURL& origin,
- const std::string& languages);
-
-} // namespace url_formatter
-
-#endif // COMPONENTS_URL_FORMATTER_ELIDE_URL_H_
diff --git a/components/url_formatter/elide_url_unittest.cc b/components/url_formatter/elide_url_unittest.cc
deleted file mode 100644
index f043478..0000000
--- a/components/url_formatter/elide_url_unittest.cc
+++ /dev/null
@@ -1,324 +0,0 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "components/url_formatter/elide_url.h"
-
-#include "base/ios/ios_util.h"
-#include "base/strings/utf_string_conversions.h"
-#include "testing/gtest/include/gtest/gtest.h"
-#include "ui/gfx/font_list.h"
-#include "ui/gfx/text_elider.h"
-#include "ui/gfx/text_utils.h"
-#include "url/gurl.h"
-
-using base::UTF8ToUTF16;
-using gfx::GetStringWidthF;
-using gfx::kEllipsis;
-
-namespace {
-
-struct Testcase {
- const std::string input;
- const std::string output;
-};
-
-#if !defined(OS_ANDROID)
-void RunUrlTest(Testcase* testcases, size_t num_testcases) {
- static const gfx::FontList font_list;
- for (size_t i = 0; i < num_testcases; ++i) {
- const GURL url(testcases[i].input);
- // Should we test with non-empty language list?
- // That's kinda redundant with net_util_unittests.
- const float available_width =
- GetStringWidthF(UTF8ToUTF16(testcases[i].output), font_list);
- EXPECT_EQ(UTF8ToUTF16(testcases[i].output),
- url_formatter::ElideUrl(url, font_list, available_width,
- std::string()));
- }
-}
-
-// Test eliding of commonplace URLs.
-TEST(TextEliderTest, TestGeneralEliding) {
- const std::string kEllipsisStr(kEllipsis);
- Testcase testcases[] = {
- {"http://www.google.com/intl/en/ads/", "www.google.com/intl/en/ads/"},
- {"http://www.google.com/intl/en/ads/", "www.google.com/intl/en/ads/"},
- {"http://www.google.com/intl/en/ads/",
- "google.com/intl/" + kEllipsisStr + "/ads/"},
- {"http://www.google.com/intl/en/ads/",
- "google.com/" + kEllipsisStr + "/ads/"},
- {"http://www.google.com/intl/en/ads/", "google.com/" + kEllipsisStr},
- {"http://www.google.com/intl/en/ads/", "goog" + kEllipsisStr},
- {"https://subdomain.foo.com/bar/filename.html",
- "subdomain.foo.com/bar/filename.html"},
- {"https://subdomain.foo.com/bar/filename.html",
- "subdomain.foo.com/" + kEllipsisStr + "/filename.html"},
- {"http://subdomain.foo.com/bar/filename.html",
- kEllipsisStr + "foo.com/" + kEllipsisStr + "/filename.html"},
- {"http://www.google.com/intl/en/ads/?aLongQueryWhichIsNotRequired",
- "www.google.com/intl/en/ads/?aLongQ" + kEllipsisStr},
- };
-
- RunUrlTest(testcases, arraysize(testcases));
-}
-
-// When there is very little space available, the elision code will shorten
-// both path AND file name to an ellipsis - ".../...". To avoid this result,
-// there is a hack in place that simply treats them as one string in this
-// case.
-TEST(TextEliderTest, TestTrailingEllipsisSlashEllipsisHack) {
- const std::string kEllipsisStr(kEllipsis);
-
- // Very little space, would cause double ellipsis.
- gfx::FontList font_list;
- GURL url("http://battersbox.com/directory/foo/peter_paul_and_mary.html");
- float available_width = GetStringWidthF(
- UTF8ToUTF16("battersbox.com/" + kEllipsisStr + "/" + kEllipsisStr),
- font_list);
-
- // Create the expected string, after elision. Depending on font size, the
- // directory might become /dir... or /di... or/d... - it never should be
- // shorter than that. (If it is, the font considers d... to be longer
- // than .../... - that should never happen).
- ASSERT_GT(GetStringWidthF(UTF8ToUTF16(kEllipsisStr + "/" + kEllipsisStr),
- font_list),
- GetStringWidthF(UTF8ToUTF16("d" + kEllipsisStr), font_list));
- GURL long_url("http://battersbox.com/directorynameisreallylongtoforcetrunc");
- base::string16 expected = url_formatter::ElideUrl(
- long_url, font_list, available_width, std::string());
- // Ensure that the expected result still contains part of the directory name.
- ASSERT_GT(expected.length(), std::string("battersbox.com/d").length());
- EXPECT_EQ(expected, url_formatter::ElideUrl(url, font_list, available_width,
- std::string()));
-
- // More space available - elide directories, partially elide filename.
- Testcase testcases[] = {
- {"http://battersbox.com/directory/foo/peter_paul_and_mary.html",
- "battersbox.com/" + kEllipsisStr + "/peter" + kEllipsisStr},
- };
- RunUrlTest(testcases, arraysize(testcases));
-}
-
-// Test eliding of empty strings, URLs with ports, passwords, queries, etc.
-TEST(TextEliderTest, TestMoreEliding) {
- const std::string kEllipsisStr(kEllipsis);
- Testcase testcases[] = {
- {"http://www.google.com/foo?bar", "www.google.com/foo?bar"},
- {"http://xyz.google.com/foo?bar", "xyz.google.com/foo?" + kEllipsisStr},
- {"http://xyz.google.com/foo?bar", "xyz.google.com/foo" + kEllipsisStr},
- {"http://xyz.google.com/foo?bar", "xyz.google.com/fo" + kEllipsisStr},
- {"http://a.b.com/pathname/c?d", "a.b.com/" + kEllipsisStr + "/c?d"},
- {"", ""},
- {"http://foo.bar..example.com...hello/test/filename.html",
- "foo.bar..example.com...hello/" + kEllipsisStr + "/filename.html"},
- {"http://foo.bar../", "foo.bar.."},
- {"http://xn--1lq90i.cn/foo", "\xe5\x8c\x97\xe4\xba\xac.cn/foo"},
- {"http://me:mypass@secrethost.com:99/foo?bar#baz",
- "secrethost.com:99/foo?bar#baz"},
- {"http://me:mypass@ss%xxfdsf.com/foo", "ss%25xxfdsf.com/foo"},
- {"mailto:elgoato@elgoato.com", "mailto:elgoato@elgoato.com"},
- {"javascript:click(0)", "javascript:click(0)"},
- {"https://chess.eecs.berkeley.edu:4430/login/arbitfilename",
- "chess.eecs.berkeley.edu:4430/login/arbitfilename"},
- {"https://chess.eecs.berkeley.edu:4430/login/arbitfilename",
- kEllipsisStr + "berkeley.edu:4430/" + kEllipsisStr + "/arbitfilename"},
-
- // Unescaping.
- {"http://www/%E4%BD%A0%E5%A5%BD?q=%E4%BD%A0%E5%A5%BD#\xe4\xbd\xa0",
- "www/\xe4\xbd\xa0\xe5\xa5\xbd?q=\xe4\xbd\xa0\xe5\xa5\xbd#\xe4\xbd\xa0"},
-
- // Invalid unescaping for path. The ref will always be valid UTF-8. We
- // don't
- // bother to do too many edge cases, since these are handled by the
- // escaper
- // unittest.
- {"http://www/%E4%A0%E5%A5%BD?q=%E4%BD%A0%E5%A5%BD#\xe4\xbd\xa0",
- "www/%E4%A0%E5%A5%BD?q=\xe4\xbd\xa0\xe5\xa5\xbd#\xe4\xbd\xa0"},
- };
-
- RunUrlTest(testcases, arraysize(testcases));
-}
-
-// Test eliding of file: URLs.
-TEST(TextEliderTest, TestFileURLEliding) {
- const std::string kEllipsisStr(kEllipsis);
- Testcase testcases[] = {
- {"file:///C:/path1/path2/path3/filename",
- "file:///C:/path1/path2/path3/filename"},
- {"file:///C:/path1/path2/path3/filename", "C:/path1/path2/path3/filename"},
-// GURL parses "file:///C:path" differently on windows than it does on posix.
-#if defined(OS_WIN)
- {"file:///C:path1/path2/path3/filename",
- "C:/path1/path2/" + kEllipsisStr + "/filename"},
- {"file:///C:path1/path2/path3/filename",
- "C:/path1/" + kEllipsisStr + "/filename"},
- {"file:///C:path1/path2/path3/filename",
- "C:/" + kEllipsisStr + "/filename"},
-#endif // defined(OS_WIN)
- {"file://filer/foo/bar/file", "filer/foo/bar/file"},
- {"file://filer/foo/bar/file", "filer/foo/" + kEllipsisStr + "/file"},
- {"file://filer/foo/bar/file", "filer/" + kEllipsisStr + "/file"},
- {"file://filer/foo/", "file://filer/foo/"},
- {"file://filer/foo/", "filer/foo/"},
- {"file://filer/foo/", "filer" + kEllipsisStr},
- // Eliding file URLs with nothing after the ':' shouldn't crash.
- {"file:///aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:", "aaa" + kEllipsisStr},
- {"file:///aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:/", "aaa" + kEllipsisStr},
- };
-
- RunUrlTest(testcases, arraysize(testcases));
-}
-
-TEST(TextEliderTest, TestHostEliding) {
-#if defined(OS_IOS)
- // TODO(eugenebut): Disable test on iOS9 crbug.com/513703
- if (base::ios::IsRunningOnIOS9OrLater()) {
- LOG(WARNING) << "Test disabled on iOS9.";
- return;
- }
-#endif
- const std::string kEllipsisStr(kEllipsis);
- Testcase testcases[] = {
- {"http://google.com", "google.com"},
- {"http://subdomain.google.com", kEllipsisStr + ".google.com"},
- {"http://reallyreallyreallylongdomainname.com",
- "reallyreallyreallylongdomainname.com"},
- {"http://a.b.c.d.e.f.com", kEllipsisStr + "f.com"},
- {"http://foo", "foo"},
- {"http://foo.bar", "foo.bar"},
- {"http://subdomain.foo.bar", kEllipsisStr + "in.foo.bar"},
-// IOS width calculations are off by a letter from other platforms for
-// some strings from other platforms, probably for strings with too
-// many kerned letters on the default font set.
-#if !defined(OS_IOS)
- {"http://subdomain.reallylongdomainname.com",
- kEllipsisStr + "ain.reallylongdomainname.com"},
- {"http://a.b.c.d.e.f.com", kEllipsisStr + ".e.f.com"},
-#endif // !defined(OS_IOS)
- };
-
- for (size_t i = 0; i < arraysize(testcases); ++i) {
- const float available_width =
- GetStringWidthF(UTF8ToUTF16(testcases[i].output), gfx::FontList());
- EXPECT_EQ(UTF8ToUTF16(testcases[i].output),
- url_formatter::ElideHost(GURL(testcases[i].input),
- gfx::FontList(), available_width));
- }
-
- // Trying to elide to a really short length will still keep the full TLD+1
- EXPECT_EQ(
- base::ASCIIToUTF16("google.com"),
- url_formatter::ElideHost(GURL("http://google.com"), gfx::FontList(), 2));
- EXPECT_EQ(base::UTF8ToUTF16(kEllipsisStr + ".google.com"),
- url_formatter::ElideHost(GURL("http://subdomain.google.com"),
- gfx::FontList(), 2));
- EXPECT_EQ(
- base::ASCIIToUTF16("foo.bar"),
- url_formatter::ElideHost(GURL("http://foo.bar"), gfx::FontList(), 2));
-}
-
-#endif // !defined(OS_ANDROID)
-
-TEST(TextEliderTest, FormatUrlForSecurityDisplay) {
- struct OriginTestData {
- const char* const description;
- const char* const input;
- const wchar_t* const output;
- };
-
- const OriginTestData tests[] = {
- {"Empty URL", "", L""},
- {"HTTP URL", "http://www.google.com/", L"http://www.google.com"},
- {"HTTPS URL", "https://www.google.com/", L"https://www.google.com"},
- {"Standard HTTP port", "http://www.google.com:80/",
- L"http://www.google.com"},
- {"Standard HTTPS port", "https://www.google.com:443/",
- L"https://www.google.com"},
- {"Standard HTTP port, IDN Chinese",
- "http://\xe4\xb8\xad\xe5\x9b\xbd.icom.museum:80",
- L"http://xn--fiqs8s.icom.museum"},
- {"HTTP URL, IDN Hebrew (RTL)",
- "http://"
- "\xd7\x90\xd7\x99\xd7\xa7\xd7\x95\xd7\xb4\xd7\x9d."
- "\xd7\x99\xd7\xa9\xd7\xa8\xd7\x90\xd7\x9c.museum/",
- L"http://xn--4dbklr2c8d.xn--4dbrk0ce.museum"},
- {"HTTP URL with query string, IDN Arabic (RTL)",
- "http://\xd9\x85\xd8\xb5\xd8\xb1.icom.museum/foo.html?yes=no",
- L"http://xn--wgbh1c.icom.museum"},
- {"Non-standard HTTP port", "http://www.google.com:9000/",
- L"http://www.google.com:9000"},
- {"Non-standard HTTPS port", "https://www.google.com:9000/",
- L"https://www.google.com:9000"},
- {"File URI", "file:///usr/example/file.html",
- L"file:///usr/example/file.html"},
- {"File URI with hostname", "file://localhost/usr/example/file.html",
- L"file:///usr/example/file.html"},
- {"UNC File URI 1", "file:///CONTOSO/accounting/money.xls",
- L"file:///CONTOSO/accounting/money.xls"},
- {"UNC File URI 2",
- "file:///C:/Program%20Files/Music/Web%20Sys/main.html?REQUEST=RADIO",
- L"file:///C:/Program%20Files/Music/Web%20Sys/main.html"},
- {"HTTP URL with path", "http://www.google.com/test.html",
- L"http://www.google.com"},
- {"HTTPS URL with path", "https://www.google.com/test.html",
- L"https://www.google.com"},
- {"Unusual secure scheme (wss)", "wss://www.google.com/",
- L"wss://www.google.com"},
- {"Unusual non-secure scheme (gopher)", "gopher://www.google.com/",
- L"gopher://www.google.com"},
- {"Unlisted scheme (chrome)", "chrome://version", L"chrome://version"},
- {"HTTP IP address", "http://173.194.65.103", L"http://173.194.65.103"},
- {"HTTPS IP address", "https://173.194.65.103", L"https://173.194.65.103"},
- {"HTTP IPv6 address", "http://[FE80:0000:0000:0000:0202:B3FF:FE1E:8329]/",
- L"http://[fe80::202:b3ff:fe1e:8329]"},
- {"HTTPS IPv6 address with port", "https://[2001:db8:0:1]:443/",
- L"https://[2001:db8:0:1]"},
- {"HTTPS IP address, non-default port", "https://173.194.65.103:8443",
- L"https://173.194.65.103:8443"},
- {"HTTP filesystem: URL with path",
- "filesystem:http://www.google.com/temporary/test.html",
- L"filesystem:http://www.google.com"},
- {"File filesystem: URL with path",
- "filesystem:file://localhost/temporary/stuff/test.html?z=fun&goat=billy",
- L"filesystem:file:///temporary/stuff/test.html"},
- {"Invalid scheme 1", "twelve://www.cyber.org/wow.php",
- L"twelve://www.cyber.org/wow.php"},
- {"Invalid scheme 2", "://www.cyber.org/wow.php",
- L"://www.cyber.org/wow.php"},
- {"Invalid host 1", "https://www.cyber../wow.php", L"https://www.cyber.."},
- {"Invalid host 2", "https://www...cyber/wow.php", L"https://www...cyber"},
- {"Invalid port 1", "https://173.194.65.103:000",
- L"https://173.194.65.103:0"},
- {"Invalid port 2", "https://173.194.65.103:gruffle",
- L"https://173.194.65.103:gruffle"},
- {"Invalid port 3", "https://173.194.65.103:/hello.aspx",
- L"https://173.194.65.103"},
- {"Trailing dot in DNS name", "https://www.example.com./get/goat",
- L"https://www.example.com."},
- {"Blob URL",
- "blob:http%3A//www.html5rocks.com/4d4ff040-6d61-4446-86d3-13ca07ec9ab9",
- L"blob:http%3A//www.html5rocks.com/"
- L"4d4ff040-6d61-4446-86d3-13ca07ec9ab9"},
- };
-
- const char languages[] = "zh-TW,en-US,en,am,ar-EG,ar";
- for (size_t i = 0; i < arraysize(tests); ++i) {
- base::string16 formatted = url_formatter::FormatUrlForSecurityDisplay(
- GURL(tests[i].input), std::string());
- EXPECT_EQ(base::WideToUTF16(tests[i].output), formatted)
- << tests[i].description;
- base::string16 formatted_with_languages =
- url_formatter::FormatUrlForSecurityDisplay(GURL(tests[i].input),
- languages);
- EXPECT_EQ(base::WideToUTF16(tests[i].output), formatted_with_languages)
- << tests[i].description;
- }
-
- base::string16 formatted =
- url_formatter::FormatUrlForSecurityDisplay(GURL(), std::string());
- EXPECT_EQ(base::string16(), formatted)
- << "Explicitly test the 0-argument GURL constructor";
-}
-
-} // namespace
diff --git a/components/url_formatter/url_fixer.cc b/components/url_formatter/url_fixer.cc
deleted file mode 100644
index c49a902..0000000
--- a/components/url_formatter/url_fixer.cc
+++ /dev/null
@@ -1,673 +0,0 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "components/url_formatter/url_fixer.h"
-
-#include <algorithm>
-
-#include "base/files/file_path.h"
-#include "base/files/file_util.h"
-#include "base/logging.h"
-#if defined(OS_POSIX)
-#include "base/path_service.h"
-#endif
-#include "base/strings/string_util.h"
-#include "base/strings/utf_string_conversions.h"
-#include "components/url_formatter/url_formatter.h"
-#include "net/base/escape.h"
-#include "net/base/filename_util.h"
-#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
-#include "url/third_party/mozilla/url_parse.h"
-#include "url/url_file.h"
-#include "url/url_util.h"
-
-namespace url_formatter {
-
-const char* home_directory_override = nullptr;
-
-namespace {
-
-// Hardcode these constants to avoid dependences on //chrome and //content.
-const char kChromeUIScheme[] = "chrome";
-const char kChromeUIDefaultHost[] = "version";
-const char kViewSourceScheme[] = "view-source";
-
-// TODO(estade): Remove these ugly, ugly functions. They are only used in
-// SegmentURL. A url::Parsed object keeps track of a bunch of indices into
-// a url string, and these need to be updated when the URL is converted from
-// UTF8 to UTF16. Instead of this after-the-fact adjustment, we should parse it
-// in the correct string format to begin with.
-url::Component UTF8ComponentToUTF16Component(
- const std::string& text_utf8,
- const url::Component& component_utf8) {
- if (component_utf8.len == -1)
- return url::Component();
-
- std::string before_component_string =
- text_utf8.substr(0, component_utf8.begin);
- std::string component_string =
- text_utf8.substr(component_utf8.begin, component_utf8.len);
- base::string16 before_component_string_16 =
- base::UTF8ToUTF16(before_component_string);
- base::string16 component_string_16 = base::UTF8ToUTF16(component_string);
- url::Component component_16(before_component_string_16.length(),
- component_string_16.length());
- return component_16;
-}
-
-void UTF8PartsToUTF16Parts(const std::string& text_utf8,
- const url::Parsed& parts_utf8,
- url::Parsed* parts) {
- if (base::IsStringASCII(text_utf8)) {
- *parts = parts_utf8;
- return;
- }
-
- parts->scheme = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.scheme);
- parts->username =
- UTF8ComponentToUTF16Component(text_utf8, parts_utf8.username);
- parts->password =
- UTF8ComponentToUTF16Component(text_utf8, parts_utf8.password);
- parts->host = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.host);
- parts->port = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.port);
- parts->path = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.path);
- parts->query = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.query);
- parts->ref = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.ref);
-}
-
-base::TrimPositions TrimWhitespaceUTF8(const std::string& input,
- base::TrimPositions positions,
- std::string* output) {
- // This implementation is not so fast since it converts the text encoding
- // twice. Please feel free to file a bug if this function hurts the
- // performance of Chrome.
- DCHECK(base::IsStringUTF8(input));
- base::string16 input16 = base::UTF8ToUTF16(input);
- base::string16 output16;
- base::TrimPositions result =
- base::TrimWhitespace(input16, positions, &output16);
- *output = base::UTF16ToUTF8(output16);
- return result;
-}
-
-// does some basic fixes for input that we want to test for file-ness
-void PrepareStringForFileOps(const base::FilePath& text,
- base::FilePath::StringType* output) {
-#if defined(OS_WIN)
- base::TrimWhitespace(text.value(), base::TRIM_ALL, output);
- replace(output->begin(), output->end(), '/', '\\');
-#else
- TrimWhitespaceUTF8(text.value(), base::TRIM_ALL, output);
-#endif
-}
-
-// Tries to create a full path from |text|. If the result is valid and the
-// file exists, returns true and sets |full_path| to the result. Otherwise,
-// returns false and leaves |full_path| unchanged.
-bool ValidPathForFile(const base::FilePath::StringType& text,
- base::FilePath* full_path) {
- base::FilePath file_path = base::MakeAbsoluteFilePath(base::FilePath(text));
- if (file_path.empty())
- return false;
-
- if (!base::PathExists(file_path))
- return false;
-
- *full_path = file_path;
- return true;
-}
-
-#if defined(OS_POSIX)
-// Given a path that starts with ~, return a path that starts with an
-// expanded-out /user/foobar directory.
-std::string FixupHomedir(const std::string& text) {
- DCHECK(text.length() > 0 && text[0] == '~');
-
- if (text.length() == 1 || text[1] == '/') {
- base::FilePath file_path;
- if (home_directory_override)
- file_path = base::FilePath(home_directory_override);
- else
- PathService::Get(base::DIR_HOME, &file_path);
-
- // We'll probably break elsewhere if $HOME is undefined, but check here
- // just in case.
- if (file_path.value().empty())
- return text;
- // Append requires to be a relative path, so we have to cut all preceeding
- // '/' characters.
- size_t i = 1;
- while (i < text.length() && text[i] == '/')
- ++i;
- return file_path.Append(text.substr(i)).value();
- }
-
-// Otherwise, this is a path like ~foobar/baz, where we must expand to
-// user foobar's home directory. Officially, we should use getpwent(),
-// but that is a nasty blocking call.
-
-#if defined(OS_MACOSX)
- static const char kHome[] = "/Users/";
-#else
- static const char kHome[] = "/home/";
-#endif
- return kHome + text.substr(1);
-}
-#endif
-
-// Tries to create a file: URL from |text| if it looks like a filename, even if
-// it doesn't resolve as a valid path or to an existing file. Returns a
-// (possibly invalid) file: URL in |fixed_up_url| for input beginning
-// with a drive specifier or "\\". Returns the unchanged input in other cases
-// (including file: URLs: these don't look like filenames).
-std::string FixupPath(const std::string& text) {
- DCHECK(!text.empty());
-
- base::FilePath::StringType filename;
-#if defined(OS_WIN)
- base::FilePath input_path(base::UTF8ToWide(text));
- PrepareStringForFileOps(input_path, &filename);
-
- // Fixup Windows-style drive letters, where "C:" gets rewritten to "C|".
- if (filename.length() > 1 && filename[1] == '|')
- filename[1] = ':';
-#elif defined(OS_POSIX)
- base::FilePath input_path(text);
- PrepareStringForFileOps(input_path, &filename);
- if (filename.length() > 0 && filename[0] == '~')
- filename = FixupHomedir(filename);
-#endif
-
- // Here, we know the input looks like a file.
- GURL file_url = net::FilePathToFileURL(base::FilePath(filename));
- if (file_url.is_valid()) {
- return base::UTF16ToUTF8(url_formatter::FormatUrl(
- file_url, std::string(), url_formatter::kFormatUrlOmitUsernamePassword,
- net::UnescapeRule::NORMAL, nullptr, nullptr, nullptr));
- }
-
- // Invalid file URL, just return the input.
- return text;
-}
-
-// Checks |domain| to see if a valid TLD is already present. If not, appends
-// |desired_tld| to the domain, and prepends "www." unless it's already present.
-void AddDesiredTLD(const std::string& desired_tld, std::string* domain) {
- if (desired_tld.empty() || domain->empty())
- return;
-
- // Check the TLD. If the return value is positive, we already have a TLD, so
- // abort. If the return value is std::string::npos, there's no valid host,
- // but we can try to append a TLD anyway, since the host may become valid once
- // the TLD is attached -- for example, "999999999999" is detected as a broken
- // IP address and marked invalid, but attaching ".com" makes it legal. When
- // the return value is 0, there's a valid host with no known TLD, so we can
- // definitely append the user's TLD. We disallow unknown registries here so
- // users can input "mail.yahoo" and hit ctrl-enter to get
- // "www.mail.yahoo.com".
- const size_t registry_length =
- net::registry_controlled_domains::GetRegistryLength(
- *domain, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
- net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
- if ((registry_length != 0) && (registry_length != std::string::npos))
- return;
-
- // Add the suffix at the end of the domain.
- const size_t domain_length(domain->length());
- DCHECK_GT(domain_length, 0U);
- DCHECK_NE(desired_tld[0], '.');
- if ((*domain)[domain_length - 1] != '.')
- domain->push_back('.');
- domain->append(desired_tld);
-
- // Now, if the domain begins with "www.", stop.
- const std::string prefix("www.");
- if (domain->compare(0, prefix.length(), prefix) != 0) {
- // Otherwise, add www. to the beginning of the URL.
- domain->insert(0, prefix);
- }
-}
-
-inline void FixupUsername(const std::string& text,
- const url::Component& part,
- std::string* url) {
- if (!part.is_valid())
- return;
-
- // We don't fix up the username at the moment.
- url->append(text, part.begin, part.len);
- // Do not append the trailing '@' because we might need to include the user's
- // password. FixupURL itself will append the '@' for us.
-}
-
-inline void FixupPassword(const std::string& text,
- const url::Component& part,
- std::string* url) {
- if (!part.is_valid())
- return;
-
- // We don't fix up the password at the moment.
- url->append(":");
- url->append(text, part.begin, part.len);
-}
-
-void FixupHost(const std::string& text,
- const url::Component& part,
- bool has_scheme,
- const std::string& desired_tld,
- std::string* url) {
- if (!part.is_valid())
- return;
-
- // Make domain valid.
- // Strip all leading dots and all but one trailing dot, unless the user only
- // typed dots, in which case their input is totally invalid and we should just
- // leave it unchanged.
- std::string domain(text, part.begin, part.len);
- const size_t first_nondot(domain.find_first_not_of('.'));
- if (first_nondot != std::string::npos) {
- domain.erase(0, first_nondot);
- size_t last_nondot(domain.find_last_not_of('.'));
- DCHECK(last_nondot != std::string::npos);
- last_nondot += 2; // Point at second period in ending string
- if (last_nondot < domain.length())
- domain.erase(last_nondot);
- }
-
- // Add any user-specified TLD, if applicable.
- AddDesiredTLD(desired_tld, &domain);
-
- url->append(domain);
-}
-
-void FixupPort(const std::string& text,
- const url::Component& part,
- std::string* url) {
- if (!part.is_valid())
- return;
-
- // We don't fix up the port at the moment.
- url->append(":");
- url->append(text, part.begin, part.len);
-}
-
-inline void FixupPath(const std::string& text,
- const url::Component& part,
- std::string* url) {
- if (!part.is_valid() || part.len == 0) {
- // We should always have a path.
- url->append("/");
- return;
- }
-
- // Append the path as is.
- url->append(text, part.begin, part.len);
-}
-
-inline void FixupQuery(const std::string& text,
- const url::Component& part,
- std::string* url) {
- if (!part.is_valid())
- return;
-
- // We don't fix up the query at the moment.
- url->append("?");
- url->append(text, part.begin, part.len);
-}
-
-inline void FixupRef(const std::string& text,
- const url::Component& part,
- std::string* url) {
- if (!part.is_valid())
- return;
-
- // We don't fix up the ref at the moment.
- url->append("#");
- url->append(text, part.begin, part.len);
-}
-
-bool HasPort(const std::string& original_text,
- const url::Component& scheme_component) {
- // Find the range between the ":" and the "/".
- size_t port_start = scheme_component.end() + 1;
- size_t port_end = port_start;
- while ((port_end < original_text.length()) &&
- !url::IsAuthorityTerminator(original_text[port_end]))
- ++port_end;
- if (port_end == port_start)
- return false;
-
- // Scan the range to see if it is entirely digits.
- for (size_t i = port_start; i < port_end; ++i) {
- if (!base::IsAsciiDigit(original_text[i]))
- return false;
- }
-
- return true;
-}
-
-// Try to extract a valid scheme from the beginning of |text|.
-// If successful, set |scheme_component| to the text range where the scheme
-// was located, and fill |canon_scheme| with its canonicalized form.
-// Otherwise, return false and leave the outputs in an indeterminate state.
-bool GetValidScheme(const std::string& text,
- url::Component* scheme_component,
- std::string* canon_scheme) {
- canon_scheme->clear();
-
- // Locate everything up to (but not including) the first ':'
- if (!url::ExtractScheme(text.data(), static_cast<int>(text.length()),
- scheme_component)) {
- return false;
- }
-
- // Make sure the scheme contains only valid characters, and convert
- // to lowercase. This also catches IPv6 literals like [::1], because
- // brackets are not in the whitelist.
- url::StdStringCanonOutput canon_scheme_output(canon_scheme);
- url::Component canon_scheme_component;
- if (!url::CanonicalizeScheme(text.data(), *scheme_component,
- &canon_scheme_output, &canon_scheme_component)) {
- return false;
- }
-
- // Strip the ':', and any trailing buffer space.
- DCHECK_EQ(0, canon_scheme_component.begin);
- canon_scheme->erase(canon_scheme_component.len);
-
- // We need to fix up the segmentation for "www.example.com:/". For this
- // case, we guess that schemes with a "." are not actually schemes.
- if (canon_scheme->find('.') != std::string::npos)
- return false;
-
- // We need to fix up the segmentation for "www:123/". For this case, we
- // will add an HTTP scheme later and make the URL parser happy.
- // TODO(pkasting): Maybe we should try to use GURL's parser for this?
- if (HasPort(text, *scheme_component))
- return false;
-
- // Everything checks out.
- return true;
-}
-
-// Performs the work for url_formatter::SegmentURL. |text| may be modified on
-// output on success: a semicolon following a valid scheme is replaced with a
-// colon.
-std::string SegmentURLInternal(std::string* text, url::Parsed* parts) {
- // Initialize the result.
- *parts = url::Parsed();
-
- std::string trimmed;
- TrimWhitespaceUTF8(*text, base::TRIM_ALL, &trimmed);
- if (trimmed.empty())
- return std::string(); // Nothing to segment.
-
-#if defined(OS_WIN)
- int trimmed_length = static_cast<int>(trimmed.length());
- if (url::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) ||
- url::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, true))
- return "file";
-#elif defined(OS_POSIX)
- if (base::FilePath::IsSeparator(trimmed.data()[0]) ||
- trimmed.data()[0] == '~')
- return "file";
-#endif
-
- // Otherwise, we need to look at things carefully.
- std::string scheme;
- if (!GetValidScheme(*text, &parts->scheme, &scheme)) {
- // Try again if there is a ';' in the text. If changing it to a ':' results
- // in a scheme being found, continue processing with the modified text.
- bool found_scheme = false;
- size_t semicolon = text->find(';');
- if (semicolon != 0 && semicolon != std::string::npos) {
- (*text)[semicolon] = ':';
- if (GetValidScheme(*text, &parts->scheme, &scheme))
- found_scheme = true;
- else
- (*text)[semicolon] = ';';
- }
- if (!found_scheme) {
- // Couldn't determine the scheme, so just pick one.
- parts->scheme.reset();
- scheme =
- base::StartsWith(*text, "ftp.", base::CompareCase::INSENSITIVE_ASCII)
- ? url::kFtpScheme
- : url::kHttpScheme;
- }
- }
-
- // Proceed with about and chrome schemes, but not file or nonstandard schemes.
- if ((scheme != url::kAboutScheme) && (scheme != kChromeUIScheme) &&
- ((scheme == url::kFileScheme) ||
- !url::IsStandard(
- scheme.c_str(),
- url::Component(0, static_cast<int>(scheme.length()))))) {
- return scheme;
- }
-
- if (scheme == url::kFileSystemScheme) {
- // Have the GURL parser do the heavy lifting for us.
- url::ParseFileSystemURL(text->data(), static_cast<int>(text->length()),
- parts);
- return scheme;
- }
-
- if (parts->scheme.is_valid()) {
- // Have the GURL parser do the heavy lifting for us.
- url::ParseStandardURL(text->data(), static_cast<int>(text->length()),
- parts);
- return scheme;
- }
-
- // We need to add a scheme in order for ParseStandardURL to be happy.
- // Find the first non-whitespace character.
- std::string::iterator first_nonwhite = text->begin();
- while ((first_nonwhite != text->end()) &&
- base::IsUnicodeWhitespace(*first_nonwhite))
- ++first_nonwhite;
-
- // Construct the text to parse by inserting the scheme.
- std::string inserted_text(scheme);
- inserted_text.append(url::kStandardSchemeSeparator);
- std::string text_to_parse(text->begin(), first_nonwhite);
- text_to_parse.append(inserted_text);
- text_to_parse.append(first_nonwhite, text->end());
-
- // Have the GURL parser do the heavy lifting for us.
- url::ParseStandardURL(text_to_parse.data(),
- static_cast<int>(text_to_parse.length()), parts);
-
- // Offset the results of the parse to match the original text.
- const int offset = -static_cast<int>(inserted_text.length());
- OffsetComponent(offset, &parts->scheme);
- OffsetComponent(offset, &parts->username);
- OffsetComponent(offset, &parts->password);
- OffsetComponent(offset, &parts->host);
- OffsetComponent(offset, &parts->port);
- OffsetComponent(offset, &parts->path);
- OffsetComponent(offset, &parts->query);
- OffsetComponent(offset, &parts->ref);
-
- return scheme;
-}
-
-} // namespace
-
-std::string SegmentURL(const std::string& text, url::Parsed* parts) {
- std::string mutable_text(text);
- return SegmentURLInternal(&mutable_text, parts);
-}
-
-base::string16 SegmentURL(const base::string16& text, url::Parsed* parts) {
- std::string text_utf8 = base::UTF16ToUTF8(text);
- url::Parsed parts_utf8;
- std::string scheme_utf8 = SegmentURL(text_utf8, &parts_utf8);
- UTF8PartsToUTF16Parts(text_utf8, parts_utf8, parts);
- return base::UTF8ToUTF16(scheme_utf8);
-}
-
-GURL FixupURL(const std::string& text, const std::string& desired_tld) {
- std::string trimmed;
- TrimWhitespaceUTF8(text, base::TRIM_ALL, &trimmed);
- if (trimmed.empty())
- return GURL(); // Nothing here.
-
- // Segment the URL.
- url::Parsed parts;
- std::string scheme(SegmentURLInternal(&trimmed, &parts));
-
- // For view-source: URLs, we strip "view-source:", do fixup, and stick it back
- // on. This allows us to handle things like "view-source:google.com".
- if (scheme == kViewSourceScheme) {
- // Reject "view-source:view-source:..." to avoid deep recursion.
- std::string view_source(kViewSourceScheme + std::string(":"));
- if (!base::StartsWith(text, view_source + view_source,
- base::CompareCase::INSENSITIVE_ASCII)) {
- return GURL(kViewSourceScheme + std::string(":") +
- FixupURL(trimmed.substr(scheme.length() + 1), desired_tld)
- .possibly_invalid_spec());
- }
- }
-
- // We handle the file scheme separately.
- if (scheme == url::kFileScheme)
- return GURL(parts.scheme.is_valid() ? text : FixupPath(text));
-
- // We handle the filesystem scheme separately.
- if (scheme == url::kFileSystemScheme) {
- if (parts.inner_parsed() && parts.inner_parsed()->scheme.is_valid())
- return GURL(text);
- return GURL();
- }
-
- // Parse and rebuild about: and chrome: URLs, except about:blank.
- bool chrome_url =
- !base::LowerCaseEqualsASCII(trimmed, url::kAboutBlankURL) &&
- ((scheme == url::kAboutScheme) || (scheme == kChromeUIScheme));
-
- // For some schemes whose layouts we understand, we rebuild it.
- if (chrome_url ||
- url::IsStandard(scheme.c_str(),
- url::Component(0, static_cast<int>(scheme.length())))) {
- // Replace the about: scheme with the chrome: scheme.
- std::string url(chrome_url ? kChromeUIScheme : scheme);
- url.append(url::kStandardSchemeSeparator);
-
- // We need to check whether the |username| is valid because it is our
- // responsibility to append the '@' to delineate the user information from
- // the host portion of the URL.
- if (parts.username.is_valid()) {
- FixupUsername(trimmed, parts.username, &url);
- FixupPassword(trimmed, parts.password, &url);
- url.append("@");
- }
-
- FixupHost(trimmed, parts.host, parts.scheme.is_valid(), desired_tld, &url);
- if (chrome_url && !parts.host.is_valid())
- url.append(kChromeUIDefaultHost);
- FixupPort(trimmed, parts.port, &url);
- FixupPath(trimmed, parts.path, &url);
- FixupQuery(trimmed, parts.query, &url);
- FixupRef(trimmed, parts.ref, &url);
-
- return GURL(url);
- }
-
- // In the worst-case, we insert a scheme if the URL lacks one.
- if (!parts.scheme.is_valid()) {
- std::string fixed_scheme(scheme);
- fixed_scheme.append(url::kStandardSchemeSeparator);
- trimmed.insert(0, fixed_scheme);
- }
-
- return GURL(trimmed);
-}
-
-// The rules are different here than for regular fixup, since we need to handle
-// input like "hello.html" and know to look in the current directory. Regular
-// fixup will look for cues that it is actually a file path before trying to
-// figure out what file it is. If our logic doesn't work, we will fall back on
-// regular fixup.
-GURL FixupRelativeFile(const base::FilePath& base_dir,
- const base::FilePath& text) {
- base::FilePath old_cur_directory;
- if (!base_dir.empty()) {
- // Save the old current directory before we move to the new one.
- base::GetCurrentDirectory(&old_cur_directory);
- base::SetCurrentDirectory(base_dir);
- }
-
- // Allow funny input with extra whitespace and the wrong kind of slashes.
- base::FilePath::StringType trimmed;
- PrepareStringForFileOps(text, &trimmed);
-
- bool is_file = true;
- // Avoid recognizing definite non-file URLs as file paths.
- GURL gurl(trimmed);
- if (gurl.is_valid() && gurl.IsStandard())
- is_file = false;
- base::FilePath full_path;
- if (is_file && !ValidPathForFile(trimmed, &full_path)) {
-// Not a path as entered, try unescaping it in case the user has
-// escaped things. We need to go through 8-bit since the escaped values
-// only represent 8-bit values.
-#if defined(OS_WIN)
- std::wstring unescaped = base::UTF8ToWide(net::UnescapeURLComponent(
- base::WideToUTF8(trimmed),
- net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS));
-#elif defined(OS_POSIX)
- std::string unescaped = net::UnescapeURLComponent(
- trimmed,
- net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);
-#endif
-
- if (!ValidPathForFile(unescaped, &full_path))
- is_file = false;
- }
-
- // Put back the current directory if we saved it.
- if (!base_dir.empty())
- base::SetCurrentDirectory(old_cur_directory);
-
- if (is_file) {
- GURL file_url = net::FilePathToFileURL(full_path);
- if (file_url.is_valid())
- return GURL(base::UTF16ToUTF8(url_formatter::FormatUrl(
- file_url, std::string(),
- url_formatter::kFormatUrlOmitUsernamePassword,
- net::UnescapeRule::NORMAL, nullptr, nullptr, nullptr)));
- // Invalid files fall through to regular processing.
- }
-
-// Fall back on regular fixup for this input.
-#if defined(OS_WIN)
- std::string text_utf8 = base::WideToUTF8(text.value());
-#elif defined(OS_POSIX)
- std::string text_utf8 = text.value();
-#endif
- return FixupURL(text_utf8, std::string());
-}
-
-void OffsetComponent(int offset, url::Component* part) {
- DCHECK(part);
-
- if (part->is_valid()) {
- // Offset the location of this component.
- part->begin += offset;
-
- // This part might not have existed in the original text.
- if (part->begin < 0)
- part->reset();
- }
-}
-
-bool IsEquivalentScheme(const std::string& scheme1,
- const std::string& scheme2) {
- return scheme1 == scheme2 ||
- (scheme1 == url::kAboutScheme && scheme2 == kChromeUIScheme) ||
- (scheme1 == kChromeUIScheme && scheme2 == url::kAboutScheme);
-}
-
-} // namespace url_formatter
diff --git a/components/url_formatter/url_fixer.h b/components/url_formatter/url_fixer.h
deleted file mode 100644
index b7c592d..0000000
--- a/components/url_formatter/url_fixer.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef COMPONENTS_URL_FORMATTER_URL_FIXER_H_
-#define COMPONENTS_URL_FORMATTER_URL_FIXER_H_
-
-#include <string>
-
-#include "base/strings/string16.h"
-#include "url/gurl.h"
-
-namespace base {
-class FilePath;
-}
-
-namespace url {
-struct Component;
-struct Parsed;
-}
-
-// This object is designed to convert various types of input into URLs that we
-// know are valid. For example, user typing in the URL bar or command line
-// options. This is NOT the place for converting between different types of URLs
-// or parsing them, see net_util.h for that.
-namespace url_formatter {
-
-// Segments the given text string into parts of a URL. This is most useful for
-// schemes such as http, https, and ftp where |SegmentURL| will find many
-// segments. Currently does not segment "file" schemes.
-// Returns the canonicalized scheme, or the empty string when |text| is only
-// whitespace.
-std::string SegmentURL(const std::string& text, url::Parsed* parts);
-base::string16 SegmentURL(const base::string16& text, url::Parsed* parts);
-
-// Converts |text| to a fixed-up URL and returns it. Attempts to make some
-// "smart" adjustments to obviously-invalid input where possible.
-// |text| may be an absolute path to a file, which will get converted to a
-// "file:" URL.
-//
-// The result will be a "more" valid URL than the input. It may still not be
-// valid, so check the return value's validity or use possibly_invalid_spec().
-//
-// Schemes "about" and "chrome" are normalized to "chrome://", with slashes.
-// "about:blank" is unaltered, as Webkit allows frames to access about:blank.
-// Additionally, if a chrome URL does not have a valid host, as in "about:", the
-// returned URL will have the host "version", as in "chrome://version".
-//
-// If |desired_tld| is non-empty, it represents the TLD the user wishes to
-// append in the case of an incomplete domain. We check that this is not a file
-// path and there does not appear to be a valid TLD already, then append
-// |desired_tld| to the domain and prepend "www." (unless it, or a scheme, are
-// already present.) This TLD should not have a leading '.' (use "com" instead
-// of ".com").
-GURL FixupURL(const std::string& text, const std::string& desired_tld);
-
-// Converts |text| to a fixed-up URL, allowing it to be a relative path on the
-// local filesystem. Begin searching in |base_dir|; if empty, use the current
-// working directory. If this resolves to a file on disk, convert it to a
-// "file:" URL in |fixed_up_url|; otherwise, fall back to the behavior of
-// FixupURL().
-//
-// For "regular" input, even if it is possibly a file with a full path, you
-// should use FixupURL() directly. This function should only be used when
-// relative path handling is desired, as for command line processing.
-GURL FixupRelativeFile(const base::FilePath& base_dir,
- const base::FilePath& text);
-
-// Offsets the beginning index of |part| by |offset|, which is allowed to be
-// negative. In some cases, the desired component does not exist at the given
-// offset. For example, when converting from "http://foo" to "foo", the scheme
-// component no longer exists. In such a case, the beginning index is set to 0.
-// Does nothing if |part| is invalid.
-void OffsetComponent(int offset, url::Component* part);
-
-// Returns true if |scheme1| is equivalent to |scheme2|.
-// Generally this is true if the two schemes are actually identical, but it's
-// also true when one scheme is "about" and the other "chrome".
-bool IsEquivalentScheme(const std::string& scheme1, const std::string& scheme2);
-
-// For paths like ~, we use $HOME for the current user's home directory.
-// For tests, we allow our idea of $HOME to be overriden by this variable.
-extern const char* home_directory_override;
-
-} // namespace url_formatter
-
-#endif // COMPONENTS_URL_FORMATTER_URL_FIXER_H_
diff --git a/components/url_formatter/url_fixer_unittest.cc b/components/url_formatter/url_fixer_unittest.cc
deleted file mode 100644
index 900b553..0000000
--- a/components/url_formatter/url_fixer_unittest.cc
+++ /dev/null
@@ -1,537 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <stdlib.h>
-
-#include "base/base_paths.h"
-#include "base/basictypes.h"
-#include "base/files/file_path.h"
-#include "base/files/file_util.h"
-#include "base/files/scoped_temp_dir.h"
-#include "base/strings/string_util.h"
-#include "base/strings/utf_string_conversions.h"
-#include "components/url_formatter/url_fixer.h"
-#include "net/base/filename_util.h"
-#include "testing/gtest/include/gtest/gtest.h"
-#include "url/gurl.h"
-#include "url/third_party/mozilla/url_parse.h"
-
-namespace url {
-
-std::ostream& operator<<(std::ostream& os, const Component& part) {
- return os << "(begin=" << part.begin << ", len=" << part.len << ")";
-}
-
-} // namespace url
-
-struct SegmentCase {
- const std::string input;
- const std::string result;
- const url::Component scheme;
- const url::Component username;
- const url::Component password;
- const url::Component host;
- const url::Component port;
- const url::Component path;
- const url::Component query;
- const url::Component ref;
-};
-
-static const SegmentCase segment_cases[] = {
- { "http://www.google.com/", "http",
- url::Component(0, 4), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(7, 14), // host
- url::Component(), // port
- url::Component(21, 1), // path
- url::Component(), // query
- url::Component(), // ref
- },
- { "aBoUt:vErSiOn", "about",
- url::Component(0, 5), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(6, 7), // host
- url::Component(), // port
- url::Component(), // path
- url::Component(), // query
- url::Component(), // ref
- },
- { "about:host/path?query#ref", "about",
- url::Component(0, 5), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(6, 4), // host
- url::Component(), // port
- url::Component(10, 5), // path
- url::Component(16, 5), // query
- url::Component(22, 3), // ref
- },
- { "about://host/path?query#ref", "about",
- url::Component(0, 5), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(8, 4), // host
- url::Component(), // port
- url::Component(12, 5), // path
- url::Component(18, 5), // query
- url::Component(24, 3), // ref
- },
- { "chrome:host/path?query#ref", "chrome",
- url::Component(0, 6), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(7, 4), // host
- url::Component(), // port
- url::Component(11, 5), // path
- url::Component(17, 5), // query
- url::Component(23, 3), // ref
- },
- { "chrome://host/path?query#ref", "chrome",
- url::Component(0, 6), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(9, 4), // host
- url::Component(), // port
- url::Component(13, 5), // path
- url::Component(19, 5), // query
- url::Component(25, 3), // ref
- },
- { " www.google.com:124?foo#", "http",
- url::Component(), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(4, 14), // host
- url::Component(19, 3), // port
- url::Component(), // path
- url::Component(23, 3), // query
- url::Component(27, 0), // ref
- },
- { "user@www.google.com", "http",
- url::Component(), // scheme
- url::Component(0, 4), // username
- url::Component(), // password
- url::Component(5, 14), // host
- url::Component(), // port
- url::Component(), // path
- url::Component(), // query
- url::Component(), // ref
- },
- { "ftp:/user:P:a$$Wd@..ftp.google.com...::23///pub?foo#bar", "ftp",
- url::Component(0, 3), // scheme
- url::Component(5, 4), // username
- url::Component(10, 7), // password
- url::Component(18, 20), // host
- url::Component(39, 2), // port
- url::Component(41, 6), // path
- url::Component(48, 3), // query
- url::Component(52, 3), // ref
- },
- { "[2001:db8::1]/path", "http",
- url::Component(), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(0, 13), // host
- url::Component(), // port
- url::Component(13, 5), // path
- url::Component(), // query
- url::Component(), // ref
- },
- { "[::1]", "http",
- url::Component(), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(0, 5), // host
- url::Component(), // port
- url::Component(), // path
- url::Component(), // query
- url::Component(), // ref
- },
- // Incomplete IPv6 addresses (will not canonicalize).
- { "[2001:4860:", "http",
- url::Component(), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(0, 11), // host
- url::Component(), // port
- url::Component(), // path
- url::Component(), // query
- url::Component(), // ref
- },
- { "[2001:4860:/foo", "http",
- url::Component(), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(0, 11), // host
- url::Component(), // port
- url::Component(11, 4), // path
- url::Component(), // query
- url::Component(), // ref
- },
- { "http://:b005::68]", "http",
- url::Component(0, 4), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(7, 10), // host
- url::Component(), // port
- url::Component(), // path
- url::Component(), // query
- url::Component(), // ref
- },
- // Can't do anything useful with this.
- { ":b005::68]", "",
- url::Component(0, 0), // scheme
- url::Component(), // username
- url::Component(), // password
- url::Component(), // host
- url::Component(), // port
- url::Component(), // path
- url::Component(), // query
- url::Component(), // ref
- },
-};
-
-typedef testing::Test URLFixerTest;
-
-TEST(URLFixerTest, SegmentURL) {
- std::string result;
- url::Parsed parts;
-
- for (size_t i = 0; i < arraysize(segment_cases); ++i) {
- SegmentCase value = segment_cases[i];
- result = url_formatter::SegmentURL(value.input, &parts);
- EXPECT_EQ(value.result, result);
- EXPECT_EQ(value.scheme, parts.scheme);
- EXPECT_EQ(value.username, parts.username);
- EXPECT_EQ(value.password, parts.password);
- EXPECT_EQ(value.host, parts.host);
- EXPECT_EQ(value.port, parts.port);
- EXPECT_EQ(value.path, parts.path);
- EXPECT_EQ(value.query, parts.query);
- EXPECT_EQ(value.ref, parts.ref);
- }
-}
-
-// Creates a file and returns its full name as well as the decomposed
-// version. Example:
-// full_path = "c:\foo\bar.txt"
-// dir = "c:\foo"
-// file_name = "bar.txt"
-static bool MakeTempFile(const base::FilePath& dir,
- const base::FilePath& file_name,
- base::FilePath* full_path) {
- *full_path = dir.Append(file_name);
- return base::WriteFile(*full_path, "", 0) == 0;
-}
-
-// Returns true if the given URL is a file: URL that matches the given file
-static bool IsMatchingFileURL(const std::string& url,
- const base::FilePath& full_file_path) {
- if (url.length() <= 8)
- return false;
- if (std::string("file:///") != url.substr(0, 8))
- return false; // no file:/// prefix
- if (url.find('\\') != std::string::npos)
- return false; // contains backslashes
-
- base::FilePath derived_path;
- net::FileURLToFilePath(GURL(url), &derived_path);
-
- return base::FilePath::CompareEqualIgnoreCase(derived_path.value(),
- full_file_path.value());
-}
-
-struct FixupCase {
- const std::string input;
- const std::string output;
-} fixup_cases[] = {
- {"www.google.com", "http://www.google.com/"},
- {" www.google.com ", "http://www.google.com/"},
- {" foo.com/asdf bar", "http://foo.com/asdf%20%20bar"},
- {"..www.google.com..", "http://www.google.com./"},
- {"http://......", "http://....../"},
- {"http://host.com:ninety-two/", "http://host.com:ninety-two/"},
- {"http://host.com:ninety-two?foo", "http://host.com:ninety-two/?foo"},
- {"google.com:123", "http://google.com:123/"},
- {"about:", "chrome://version/"},
- {"about:foo", "chrome://foo/"},
- {"about:version", "chrome://version/"},
- {"about:blank", "about:blank"},
- {"about:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
- {"about://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
- {"chrome:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
- {"chrome://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
- {"www:123", "http://www:123/"},
- {" www:123", "http://www:123/"},
- {"www.google.com?foo", "http://www.google.com/?foo"},
- {"www.google.com#foo", "http://www.google.com/#foo"},
- {"www.google.com?", "http://www.google.com/?"},
- {"www.google.com#", "http://www.google.com/#"},
- {"www.google.com:123?foo#bar", "http://www.google.com:123/?foo#bar"},
- {"user@www.google.com", "http://user@www.google.com/"},
- {"\xE6\xB0\xB4.com", "http://xn--1rw.com/"},
- // It would be better if this next case got treated as http, but I don't see
- // a clean way to guess this isn't the new-and-exciting "user" scheme.
- {"user:passwd@www.google.com:8080/", "user:passwd@www.google.com:8080/"},
- // {"file:///c:/foo/bar%20baz.txt", "file:///C:/foo/bar%20baz.txt"},
- {"ftp.google.com", "ftp://ftp.google.com/"},
- {" ftp.google.com", "ftp://ftp.google.com/"},
- {"FTP.GooGle.com", "ftp://ftp.google.com/"},
- {"ftpblah.google.com", "http://ftpblah.google.com/"},
- {"ftp", "http://ftp/"},
- {"google.ftp.com", "http://google.ftp.com/"},
- // URLs which end with 0x85 (NEL in ISO-8859).
- {"http://foo.com/s?q=\xd0\x85", "http://foo.com/s?q=%D0%85"},
- {"http://foo.com/s?q=\xec\x97\x85", "http://foo.com/s?q=%EC%97%85"},
- {"http://foo.com/s?q=\xf0\x90\x80\x85", "http://foo.com/s?q=%F0%90%80%85"},
- // URLs which end with 0xA0 (non-break space in ISO-8859).
- {"http://foo.com/s?q=\xd0\xa0", "http://foo.com/s?q=%D0%A0"},
- {"http://foo.com/s?q=\xec\x97\xa0", "http://foo.com/s?q=%EC%97%A0"},
- {"http://foo.com/s?q=\xf0\x90\x80\xa0", "http://foo.com/s?q=%F0%90%80%A0"},
- // URLs containing IPv6 literals.
- {"[2001:db8::2]", "http://[2001:db8::2]/"},
- {"[::]:80", "http://[::]/"},
- {"[::]:80/path", "http://[::]/path"},
- {"[::]:180/path", "http://[::]:180/path"},
- // TODO(pmarks): Maybe we should parse bare IPv6 literals someday.
- {"::1", "::1"},
- // Semicolon as scheme separator for standard schemes.
- {"http;//www.google.com/", "http://www.google.com/"},
- {"about;chrome", "chrome://chrome/"},
- // Semicolon left as-is for non-standard schemes.
- {"whatsup;//fool", "whatsup://fool"},
- // Semicolon left as-is in URL itself.
- {"http://host/port?query;moar", "http://host/port?query;moar"},
- // Fewer slashes than expected.
- {"http;www.google.com/", "http://www.google.com/"},
- {"http;/www.google.com/", "http://www.google.com/"},
- // Semicolon at start.
- {";http://www.google.com/", "http://%3Bhttp//www.google.com/"},
-};
-
-TEST(URLFixerTest, FixupURL) {
- for (size_t i = 0; i < arraysize(fixup_cases); ++i) {
- FixupCase value = fixup_cases[i];
- EXPECT_EQ(value.output,
- url_formatter::FixupURL(value.input, "").possibly_invalid_spec())
- << "input: " << value.input;
- }
-
- // Check the TLD-appending functionality.
- FixupCase tld_cases[] = {
- {"somedomainthatwillnotbeagtld",
- "http://www.somedomainthatwillnotbeagtld.com/"},
- {"somedomainthatwillnotbeagtld.",
- "http://www.somedomainthatwillnotbeagtld.com/"},
- {"somedomainthatwillnotbeagtld..",
- "http://www.somedomainthatwillnotbeagtld.com/"},
- {".somedomainthatwillnotbeagtld",
- "http://www.somedomainthatwillnotbeagtld.com/"},
- {"www.somedomainthatwillnotbeagtld",
- "http://www.somedomainthatwillnotbeagtld.com/"},
- {"somedomainthatwillnotbeagtld.com",
- "http://somedomainthatwillnotbeagtld.com/"},
- {"http://somedomainthatwillnotbeagtld",
- "http://www.somedomainthatwillnotbeagtld.com/"},
- {"..somedomainthatwillnotbeagtld..",
- "http://www.somedomainthatwillnotbeagtld.com/"},
- {"http://www.somedomainthatwillnotbeagtld",
- "http://www.somedomainthatwillnotbeagtld.com/"},
- {"9999999999999999", "http://www.9999999999999999.com/"},
- {"somedomainthatwillnotbeagtld/foo",
- "http://www.somedomainthatwillnotbeagtld.com/foo"},
- {"somedomainthatwillnotbeagtld.com/foo",
- "http://somedomainthatwillnotbeagtld.com/foo"},
- {"somedomainthatwillnotbeagtld/?foo=.com",
- "http://www.somedomainthatwillnotbeagtld.com/?foo=.com"},
- {"www.somedomainthatwillnotbeagtld/?foo=www.",
- "http://www.somedomainthatwillnotbeagtld.com/?foo=www."},
- {"somedomainthatwillnotbeagtld.com/?foo=.com",
- "http://somedomainthatwillnotbeagtld.com/?foo=.com"},
- {"http://www.somedomainthatwillnotbeagtld.com",
- "http://www.somedomainthatwillnotbeagtld.com/"},
- {"somedomainthatwillnotbeagtld:123",
- "http://www.somedomainthatwillnotbeagtld.com:123/"},
- {"http://somedomainthatwillnotbeagtld:123",
- "http://www.somedomainthatwillnotbeagtld.com:123/"},
- };
- for (size_t i = 0; i < arraysize(tld_cases); ++i) {
- FixupCase value = tld_cases[i];
- EXPECT_EQ(value.output, url_formatter::FixupURL(value.input, "com")
- .possibly_invalid_spec());
- }
-}
-
-// Test different types of file inputs to URIFixerUpper::FixupURL. This
-// doesn't go into the nice array of fixups above since the file input
-// has to exist.
-TEST(URLFixerTest, FixupFile) {
- // this "original" filename is the one we tweak to get all the variations
- base::ScopedTempDir temp_dir_;
- ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
- base::FilePath original;
- ASSERT_TRUE(MakeTempFile(
- temp_dir_.path(),
- base::FilePath(FILE_PATH_LITERAL("url fixer upper existing file.txt")),
- &original));
-
- // reference path
- GURL golden(net::FilePathToFileURL(original));
-
- // c:\foo\bar.txt -> file:///c:/foo/bar.txt (basic)
- GURL fixedup(url_formatter::FixupURL(original.AsUTF8Unsafe(), std::string()));
- EXPECT_EQ(golden, fixedup);
-
- // TODO(port): Make some equivalent tests for posix.
-#if defined(OS_WIN)
- // c|/foo\bar.txt -> file:///c:/foo/bar.txt (pipe allowed instead of colon)
- std::string cur(base::WideToUTF8(original.value()));
- EXPECT_EQ(':', cur[1]);
- cur[1] = '|';
- EXPECT_EQ(golden, url_formatter::FixupURL(cur, std::string()));
-
- FixupCase cases[] = {
- {"c:\\Non-existent%20file.txt", "file:///C:/Non-existent%2520file.txt"},
-
- // \\foo\bar.txt -> file://foo/bar.txt
- // UNC paths, this file won't exist, but since there are no escapes, it
- // should be returned just converted to a file: URL.
- {"\\\\NonexistentHost\\foo\\bar.txt", "file://nonexistenthost/foo/bar.txt"},
- // We do this strictly, like IE8, which only accepts this form using
- // backslashes and not forward ones. Turning "//foo" into "http" matches
- // Firefox and IE, silly though it may seem (it falls out of adding "http"
- // as the default protocol if you haven't entered one).
- {"//NonexistentHost\\foo/bar.txt", "http://nonexistenthost/foo/bar.txt"},
- {"file:///C:/foo/bar", "file:///C:/foo/bar"},
-
- // Much of the work here comes from GURL's canonicalization stage.
- {"file://C:/foo/bar", "file:///C:/foo/bar"},
- {"file:c:", "file:///C:/"},
- {"file:c:WINDOWS", "file:///C:/WINDOWS"},
- {"file:c|Program Files", "file:///C:/Program%20Files"},
- {"file:/file", "file://file/"},
- {"file:////////c:\\foo", "file:///C:/foo"},
- {"file://server/folder/file", "file://server/folder/file"},
-
- // These are fixups we don't do, but could consider:
- // {"file:///foo:/bar", "file://foo/bar"},
- // {"file:/\\/server\\folder/file", "file://server/folder/file"},
- };
-#elif defined(OS_POSIX)
-
-#if defined(OS_MACOSX)
-#define HOME "/Users/"
-#else
-#define HOME "/home/"
-#endif
- url_formatter::home_directory_override = "/foo";
- FixupCase cases[] = {
- // File URLs go through GURL, which tries to escape intelligently.
- {"/A%20non-existent file.txt", "file:///A%2520non-existent%20file.txt"},
- // A plain "/" refers to the root.
- {"/", "file:///"},
-
- // These rely on the above home_directory_override.
- {"~", "file:///foo"},
- {"~/bar", "file:///foo/bar"},
-
- // References to other users' homedirs.
- {"~foo", "file://" HOME "foo"},
- {"~x/blah", "file://" HOME "x/blah"},
- };
-#endif
-
- for (size_t i = 0; i < arraysize(cases); i++) {
- EXPECT_EQ(cases[i].output,
- url_formatter::FixupURL(cases[i].input, std::string())
- .possibly_invalid_spec());
- }
-
- EXPECT_TRUE(base::DeleteFile(original, false));
-}
-
-TEST(URLFixerTest, FixupRelativeFile) {
- base::FilePath full_path;
- base::FilePath file_part(
- FILE_PATH_LITERAL("url_fixer_upper_existing_file.txt"));
- base::ScopedTempDir temp_dir_;
- ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
- ASSERT_TRUE(MakeTempFile(temp_dir_.path(), file_part, &full_path));
- full_path = base::MakeAbsoluteFilePath(full_path);
- ASSERT_FALSE(full_path.empty());
-
- // make sure we pass through good URLs
- for (size_t i = 0; i < arraysize(fixup_cases); ++i) {
- FixupCase value = fixup_cases[i];
- base::FilePath input = base::FilePath::FromUTF8Unsafe(value.input);
- EXPECT_EQ(value.output,
- url_formatter::FixupRelativeFile(temp_dir_.path(),
- input).possibly_invalid_spec());
- }
-
- // make sure the existing file got fixed-up to a file URL, and that there
- // are no backslashes
- EXPECT_TRUE(IsMatchingFileURL(
- url_formatter::FixupRelativeFile(temp_dir_.path(),
- file_part).possibly_invalid_spec(), full_path));
- EXPECT_TRUE(base::DeleteFile(full_path, false));
-
- // create a filename we know doesn't exist and make sure it doesn't get
- // fixed up to a file URL
- base::FilePath nonexistent_file(
- FILE_PATH_LITERAL("url_fixer_upper_nonexistent_file.txt"));
- std::string fixedup(url_formatter::FixupRelativeFile(
- temp_dir_.path(), nonexistent_file).possibly_invalid_spec());
- EXPECT_NE(std::string("file:///"), fixedup.substr(0, 8));
- EXPECT_FALSE(IsMatchingFileURL(fixedup, nonexistent_file));
-
- // make a subdir to make sure relative paths with directories work, also
- // test spaces:
- // "app_dir\url fixer-upper dir\url fixer-upper existing file.txt"
- base::FilePath sub_dir(FILE_PATH_LITERAL("url fixer-upper dir"));
- base::FilePath sub_file(
- FILE_PATH_LITERAL("url fixer-upper existing file.txt"));
- base::FilePath new_dir = temp_dir_.path().Append(sub_dir);
- base::CreateDirectory(new_dir);
- ASSERT_TRUE(MakeTempFile(new_dir, sub_file, &full_path));
- full_path = base::MakeAbsoluteFilePath(full_path);
- ASSERT_FALSE(full_path.empty());
-
- // test file in the subdir
- base::FilePath relative_file = sub_dir.Append(sub_file);
- EXPECT_TRUE(IsMatchingFileURL(
- url_formatter::FixupRelativeFile(temp_dir_.path(),
- relative_file).possibly_invalid_spec(), full_path));
-
- // test file in the subdir with different slashes and escaping.
- base::FilePath::StringType relative_file_str = sub_dir.value() +
- FILE_PATH_LITERAL("/") + sub_file.value();
- base::ReplaceSubstringsAfterOffset(&relative_file_str, 0,
- FILE_PATH_LITERAL(" "), FILE_PATH_LITERAL("%20"));
- EXPECT_TRUE(IsMatchingFileURL(
- url_formatter::FixupRelativeFile(temp_dir_.path(),
- base::FilePath(relative_file_str)).possibly_invalid_spec(),
- full_path));
-
- // test relative directories and duplicate slashes
- // (should resolve to the same file as above)
- relative_file_str = sub_dir.value() + FILE_PATH_LITERAL("/../") +
- sub_dir.value() + FILE_PATH_LITERAL("///./") + sub_file.value();
- EXPECT_TRUE(IsMatchingFileURL(
- url_formatter::FixupRelativeFile(temp_dir_.path(),
- base::FilePath(relative_file_str)).possibly_invalid_spec(),
- full_path));
-
- // done with the subdir
- EXPECT_TRUE(base::DeleteFile(full_path, false));
- EXPECT_TRUE(base::DeleteFile(new_dir, true));
-
- // Test that an obvious HTTP URL isn't accidentally treated as an absolute
- // file path (on account of system-specific craziness).
- base::FilePath empty_path;
- base::FilePath http_url_path(FILE_PATH_LITERAL("http://../"));
- EXPECT_TRUE(url_formatter::FixupRelativeFile(empty_path, http_url_path)
- .SchemeIs("http"));
-}
diff --git a/components/url_formatter/url_formatter.cc b/components/url_formatter/url_formatter.cc
deleted file mode 100644
index cc209d8..0000000
--- a/components/url_formatter/url_formatter.cc
+++ /dev/null
@@ -1,807 +0,0 @@
-// Copyright 2015 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "components/url_formatter/url_formatter.h"
-
-#include <algorithm>
-#include <map>
-#include <utility>
-
-#include "base/lazy_instance.h"
-#include "base/logging.h"
-#include "base/macros.h"
-#include "base/memory/singleton.h"
-#include "base/stl_util.h"
-#include "base/strings/string_tokenizer.h"
-#include "base/strings/string_util.h"
-#include "base/strings/utf_offset_string_conversions.h"
-#include "base/strings/utf_string_conversions.h"
-#include "base/synchronization/lock.h"
-#include "third_party/icu/source/common/unicode/uidna.h"
-#include "third_party/icu/source/common/unicode/uniset.h"
-#include "third_party/icu/source/common/unicode/uscript.h"
-#include "third_party/icu/source/i18n/unicode/regex.h"
-#include "third_party/icu/source/i18n/unicode/ulocdata.h"
-#include "url/gurl.h"
-#include "url/third_party/mozilla/url_parse.h"
-
-namespace url_formatter {
-
-namespace {
-
-base::string16 IDNToUnicodeWithAdjustments(
- const std::string& host,
- const std::string& languages,
- base::OffsetAdjuster::Adjustments* adjustments);
-bool IDNToUnicodeOneComponent(const base::char16* comp,
- size_t comp_len,
- const std::string& languages,
- base::string16* out);
-
-class AppendComponentTransform {
- public:
- AppendComponentTransform() {}
- virtual ~AppendComponentTransform() {}
-
- virtual base::string16 Execute(
- const std::string& component_text,
- base::OffsetAdjuster::Adjustments* adjustments) const = 0;
-
- // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an
- // accessible copy constructor in order to call AppendFormattedComponent()
- // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ).
-};
-
-class HostComponentTransform : public AppendComponentTransform {
- public:
- explicit HostComponentTransform(const std::string& languages)
- : languages_(languages) {}
-
- private:
- base::string16 Execute(
- const std::string& component_text,
- base::OffsetAdjuster::Adjustments* adjustments) const override {
- return IDNToUnicodeWithAdjustments(component_text, languages_, adjustments);
- }
-
- const std::string& languages_;
-};
-
-class NonHostComponentTransform : public AppendComponentTransform {
- public:
- explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules)
- : unescape_rules_(unescape_rules) {}
-
- private:
- base::string16 Execute(
- const std::string& component_text,
- base::OffsetAdjuster::Adjustments* adjustments) const override {
- return (unescape_rules_ == net::UnescapeRule::NONE)
- ? base::UTF8ToUTF16WithAdjustments(component_text, adjustments)
- : net::UnescapeAndDecodeUTF8URLComponentWithAdjustments(
- component_text, unescape_rules_, adjustments);
- }
-
- const net::UnescapeRule::Type unescape_rules_;
-};
-
-// Transforms the portion of |spec| covered by |original_component| according to
-// |transform|. Appends the result to |output|. If |output_component| is
-// non-NULL, its start and length are set to the transformed component's new
-// start and length. If |adjustments| is non-NULL, appends adjustments (if
-// any) that reflect the transformation the original component underwent to
-// become the transformed value appended to |output|.
-void AppendFormattedComponent(const std::string& spec,
- const url::Component& original_component,
- const AppendComponentTransform& transform,
- base::string16* output,
- url::Component* output_component,
- base::OffsetAdjuster::Adjustments* adjustments) {
- DCHECK(output);
- if (original_component.is_nonempty()) {
- size_t original_component_begin =
- static_cast<size_t>(original_component.begin);
- size_t output_component_begin = output->length();
- std::string component_str(spec, original_component_begin,
- static_cast<size_t>(original_component.len));
-
- // Transform |component_str| and modify |adjustments| appropriately.
- base::OffsetAdjuster::Adjustments component_transform_adjustments;
- output->append(
- transform.Execute(component_str, &component_transform_adjustments));
-
- // Shift all the adjustments made for this component so the offsets are
- // valid for the original string and add them to |adjustments|.
- for (base::OffsetAdjuster::Adjustments::iterator comp_iter =
- component_transform_adjustments.begin();
- comp_iter != component_transform_adjustments.end(); ++comp_iter)
- comp_iter->original_offset += original_component_begin;
- if (adjustments) {
- adjustments->insert(adjustments->end(),
- component_transform_adjustments.begin(),
- component_transform_adjustments.end());
- }
-
- // Set positions of the parsed component.
- if (output_component) {
- output_component->begin = static_cast<int>(output_component_begin);
- output_component->len =
- static_cast<int>(output->length() - output_component_begin);
- }
- } else if (output_component) {
- output_component->reset();
- }
-}
-
-// If |component| is valid, its begin is incremented by |delta|.
-void AdjustComponent(int delta, url::Component* component) {
- if (!component->is_valid())
- return;
-
- DCHECK(delta >= 0 || component->begin >= -delta);
- component->begin += delta;
-}
-
-// Adjusts all the components of |parsed| by |delta|, except for the scheme.
-void AdjustAllComponentsButScheme(int delta, url::Parsed* parsed) {
- AdjustComponent(delta, &(parsed->username));
- AdjustComponent(delta, &(parsed->password));
- AdjustComponent(delta, &(parsed->host));
- AdjustComponent(delta, &(parsed->port));
- AdjustComponent(delta, &(parsed->path));
- AdjustComponent(delta, &(parsed->query));
- AdjustComponent(delta, &(parsed->ref));
-}
-
-// Helper for FormatUrlWithOffsets().
-base::string16 FormatViewSourceUrl(
- const GURL& url,
- const std::string& languages,
- FormatUrlTypes format_types,
- net::UnescapeRule::Type unescape_rules,
- url::Parsed* new_parsed,
- size_t* prefix_end,
- base::OffsetAdjuster::Adjustments* adjustments) {
- DCHECK(new_parsed);
- const char kViewSource[] = "view-source:";
- const size_t kViewSourceLength = arraysize(kViewSource) - 1;
-
- // Format the underlying URL and record adjustments.
- const std::string& url_str(url.possibly_invalid_spec());
- adjustments->clear();
- base::string16 result(
- base::ASCIIToUTF16(kViewSource) +
- FormatUrlWithAdjustments(GURL(url_str.substr(kViewSourceLength)),
- languages, format_types, unescape_rules,
- new_parsed, prefix_end, adjustments));
- // Revise |adjustments| by shifting to the offsets to prefix that the above
- // call to FormatUrl didn't get to see.
- for (base::OffsetAdjuster::Adjustments::iterator it = adjustments->begin();
- it != adjustments->end(); ++it)
- it->original_offset += kViewSourceLength;
-
- // Adjust positions of the parsed components.
- if (new_parsed->scheme.is_nonempty()) {
- // Assume "view-source:real-scheme" as a scheme.
- new_parsed->scheme.len += kViewSourceLength;
- } else {
- new_parsed->scheme.begin = 0;
- new_parsed->scheme.len = kViewSourceLength - 1;
- }
- AdjustAllComponentsButScheme(kViewSourceLength, new_parsed);
-
- if (prefix_end)
- *prefix_end += kViewSourceLength;
-
- return result;
-}
-
-// TODO(brettw) bug 734373: check the scripts for each host component and
-// don't un-IDN-ize if there is more than one. Alternatively, only IDN for
-// scripts that the user has installed. For now, just put the entire
-// path through IDN. Maybe this feature can be implemented in ICU itself?
-//
-// We may want to skip this step in the case of file URLs to allow unicode
-// UNC hostnames regardless of encodings.
-base::string16 IDNToUnicodeWithAdjustments(
- const std::string& host,
- const std::string& languages,
- base::OffsetAdjuster::Adjustments* adjustments) {
- if (adjustments)
- adjustments->clear();
- // Convert the ASCII input to a base::string16 for ICU.
- base::string16 input16;
- input16.reserve(host.length());
- input16.insert(input16.end(), host.begin(), host.end());
-
- // Do each component of the host separately, since we enforce script matching
- // on a per-component basis.
- base::string16 out16;
- for (size_t component_start = 0, component_end;
- component_start < input16.length();
- component_start = component_end + 1) {
- // Find the end of the component.
- component_end = input16.find('.', component_start);
- if (component_end == base::string16::npos)
- component_end = input16.length(); // For getting the last component.
- size_t component_length = component_end - component_start;
- size_t new_component_start = out16.length();
- bool converted_idn = false;
- if (component_end > component_start) {
- // Add the substring that we just found.
- converted_idn =
- IDNToUnicodeOneComponent(input16.data() + component_start,
- component_length, languages, &out16);
- }
- size_t new_component_length = out16.length() - new_component_start;
-
- if (converted_idn && adjustments) {
- adjustments->push_back(base::OffsetAdjuster::Adjustment(
- component_start, component_length, new_component_length));
- }
-
- // Need to add the dot we just found (if we found one).
- if (component_end < input16.length())
- out16.push_back('.');
- }
- return out16;
-}
-
-// Does some simple normalization of scripts so we can allow certain scripts
-// to exist together.
-// TODO(brettw) bug 880223: we should allow some other languages to be
-// oombined such as Chinese and Latin. We will probably need a more
-// complicated system of language pairs to have more fine-grained control.
-UScriptCode NormalizeScript(UScriptCode code) {
- switch (code) {
- case USCRIPT_KATAKANA:
- case USCRIPT_HIRAGANA:
- case USCRIPT_KATAKANA_OR_HIRAGANA:
- case USCRIPT_HANGUL: // This one is arguable.
- return USCRIPT_HAN;
- default:
- return code;
- }
-}
-
-bool IsIDNComponentInSingleScript(const base::char16* str, int str_len) {
- UScriptCode first_script = USCRIPT_INVALID_CODE;
- bool is_first = true;
-
- int i = 0;
- while (i < str_len) {
- unsigned code_point;
- U16_NEXT(str, i, str_len, code_point);
-
- UErrorCode err = U_ZERO_ERROR;
- UScriptCode cur_script = uscript_getScript(code_point, &err);
- if (err != U_ZERO_ERROR)
- return false; // Report mixed on error.
- cur_script = NormalizeScript(cur_script);
-
- // TODO(brettw) We may have to check for USCRIPT_INHERENT as well.
- if (is_first && cur_script != USCRIPT_COMMON) {
- first_script = cur_script;
- is_first = false;
- } else {
- if (cur_script != USCRIPT_COMMON && cur_script != first_script)
- return false;
- }
- }
- return true;
-}
-
-// Check if the script of a language can be 'safely' mixed with
-// Latin letters in the ASCII range.
-bool IsCompatibleWithASCIILetters(const std::string& lang) {
- // For now, just list Chinese, Japanese and Korean (positive list).
- // An alternative is negative-listing (languages using Greek and
- // Cyrillic letters), but it can be more dangerous.
- return !lang.substr(0, 2).compare("zh") || !lang.substr(0, 2).compare("ja") ||
- !lang.substr(0, 2).compare("ko");
-}
-
-typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap;
-
-class LangToExemplarSet {
- public:
- static LangToExemplarSet* GetInstance() {
- return Singleton<LangToExemplarSet>::get();
- }
-
- private:
- LangToExemplarSetMap map;
- LangToExemplarSet() {}
- ~LangToExemplarSet() {
- STLDeleteContainerPairSecondPointers(map.begin(), map.end());
- }
-
- friend class Singleton<LangToExemplarSet>;
- friend struct DefaultSingletonTraits<LangToExemplarSet>;
- friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**);
- friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*);
-
- DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet);
-};
-
-bool GetExemplarSetForLang(const std::string& lang,
- icu::UnicodeSet** lang_set) {
- const LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map;
- LangToExemplarSetMap::const_iterator pos = map.find(lang);
- if (pos != map.end()) {
- *lang_set = pos->second;
- return true;
- }
- return false;
-}
-
-void SetExemplarSetForLang(const std::string& lang, icu::UnicodeSet* lang_set) {
- LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map;
- map.insert(std::make_pair(lang, lang_set));
-}
-
-static base::LazyInstance<base::Lock>::Leaky g_lang_set_lock =
- LAZY_INSTANCE_INITIALIZER;
-
-// Returns true if all the characters in component_characters are used by
-// the language |lang|.
-bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters,
- const std::string& lang) {
- CR_DEFINE_STATIC_LOCAL(const icu::UnicodeSet, kASCIILetters, ('a', 'z'));
- icu::UnicodeSet* lang_set = nullptr;
- // We're called from both the UI thread and the history thread.
- {
- base::AutoLock lock(g_lang_set_lock.Get());
- if (!GetExemplarSetForLang(lang, &lang_set)) {
- UErrorCode status = U_ZERO_ERROR;
- ULocaleData* uld = ulocdata_open(lang.c_str(), &status);
- // TODO(jungshik) Turn this check on when the ICU data file is
- // rebuilt with the minimal subset of locale data for languages
- // to which Chrome is not localized but which we offer in the list
- // of languages selectable for Accept-Languages. With the rebuilt ICU
- // data, ulocdata_open never should fall back to the default locale.
- // (issue 2078)
- // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING);
- if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) {
- lang_set = reinterpret_cast<icu::UnicodeSet*>(ulocdata_getExemplarSet(
- uld, nullptr, 0, ULOCDATA_ES_STANDARD, &status));
- // On success, if |lang| is compatible with ASCII Latin letters, add
- // them.
- if (lang_set && IsCompatibleWithASCIILetters(lang))
- lang_set->addAll(kASCIILetters);
- }
-
- if (!lang_set)
- lang_set = new icu::UnicodeSet(1, 0);
-
- lang_set->freeze();
- SetExemplarSetForLang(lang, lang_set);
- ulocdata_close(uld);
- }
- }
- return !lang_set->isEmpty() && lang_set->containsAll(component_characters);
-}
-
-// Returns true if the given Unicode host component is safe to display to the
-// user.
-bool IsIDNComponentSafe(const base::char16* str,
- int str_len,
- const std::string& languages) {
- // Most common cases (non-IDN) do not reach here so that we don't
- // need a fast return path.
- // TODO(jungshik) : Check if there's any character inappropriate
- // (although allowed) for domain names.
- // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and
- // http://www.unicode.org/reports/tr39/data/xidmodifications.txt
- // For now, we borrow the list from Mozilla and tweaked it slightly.
- // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because
- // they're gonna be canonicalized to U+0020 and full stop before
- // reaching here.)
- // The original list is available at
- // http://kb.mozillazine.org/Network.IDN.blacklist_chars and
- // at
- // http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703
-
- UErrorCode status = U_ZERO_ERROR;
-#ifdef U_WCHAR_IS_UTF16
- icu::UnicodeSet dangerous_characters(
- icu::UnicodeString(
- L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338"
- L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"
- L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"
- L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"
- L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"
- L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"
- L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"
- L"[\ufffa-\ufffd]\U0001f50f\U0001f510\U0001f512\U0001f513]"),
- status);
- DCHECK(U_SUCCESS(status));
- icu::RegexMatcher dangerous_patterns(
- icu::UnicodeString(
- // Lone katakana no, so, or n
- L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"
- // Repeating Japanese accent characters
- L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"),
- 0, status);
-#else
- icu::UnicodeSet dangerous_characters(
- icu::UnicodeString(
- "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
- "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"
- "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"
- "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"
- "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"
- "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe"
- "14"
- "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\uff"
- "f9]"
- "[\\ufffa-\\ufffd]\\U0001f50f\\U0001f510\\U0001f512\\U0001f513]",
- -1, US_INV),
- status);
- DCHECK(U_SUCCESS(status));
- icu::RegexMatcher dangerous_patterns(
- icu::UnicodeString(
- // Lone katakana no, so, or n
- "[^\\p{Katakana}][\\u30ce\\u30f3\\u30bd][^\\p{Katakana}]"
- // Repeating Japanese accent characters
- "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"),
- 0, status);
-#endif
- DCHECK(U_SUCCESS(status));
- icu::UnicodeSet component_characters;
- icu::UnicodeString component_string(str, str_len);
- component_characters.addAll(component_string);
- if (dangerous_characters.containsSome(component_characters))
- return false;
-
- DCHECK(U_SUCCESS(status));
- dangerous_patterns.reset(component_string);
- if (dangerous_patterns.find())
- return false;
-
- // If the language list is empty, the result is completely determined
- // by whether a component is a single script or not. This will block
- // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are
- // allowed with |languages| (while it blocks Chinese + Latin letters with
- // an accent as should be the case), but we want to err on the safe side
- // when |languages| is empty.
- if (languages.empty())
- return IsIDNComponentInSingleScript(str, str_len);
-
- // |common_characters| is made up of ASCII numbers, hyphen, plus and
- // underscore that are used across scripts and allowed in domain names.
- // (sync'd with characters allowed in url_canon_host with square
- // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc.
- icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"),
- status);
- DCHECK(U_SUCCESS(status));
- // Subtract common characters because they're always allowed so that
- // we just have to check if a language-specific set contains
- // the remainder.
- component_characters.removeAll(common_characters);
-
- base::StringTokenizer t(languages, ",");
- while (t.GetNext()) {
- if (IsComponentCoveredByLang(component_characters, t.token()))
- return true;
- }
- return false;
-}
-
-// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to
-// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().
-//
-// We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with
-// the backward compatibility in mind. What it does:
-//
-// 1. Use the up-to-date Unicode data.
-// 2. Define a case folding/mapping with the up-to-date Unicode data as
-// in IDNA 2003.
-// 3. Use transitional mechanism for 4 deviation characters (sharp-s,
-// final sigma, ZWJ and ZWNJ) for now.
-// 4. Continue to allow symbols and punctuations.
-// 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.
-// 6. Do not apply STD3 rules
-// 7. Do not allow unassigned code points.
-//
-// It also closely matches what IE 10 does except for the BiDi check (
-// http://goo.gl/3XBhqw ).
-// See http://http://unicode.org/reports/tr46/ and references therein
-// for more details.
-struct UIDNAWrapper {
- UIDNAWrapper() {
- UErrorCode err = U_ZERO_ERROR;
- // TODO(jungshik): Change options as different parties (browsers,
- // registrars, search engines) converge toward a consensus.
- value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);
- if (U_FAILURE(err))
- value = NULL;
- }
-
- UIDNA* value;
-};
-
-static base::LazyInstance<UIDNAWrapper>::Leaky g_uidna =
- LAZY_INSTANCE_INITIALIZER;
-
-// Converts one component of a host (between dots) to IDN if safe. The result
-// will be APPENDED to the given output string and will be the same as the input
-// if it is not IDN or the IDN is unsafe to display. Returns whether any
-// conversion was performed.
-bool IDNToUnicodeOneComponent(const base::char16* comp,
- size_t comp_len,
- const std::string& languages,
- base::string16* out) {
- DCHECK(out);
- if (comp_len == 0)
- return false;
-
- // Only transform if the input can be an IDN component.
- static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};
- if ((comp_len > arraysize(kIdnPrefix)) &&
- !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {
- UIDNA* uidna = g_uidna.Get().value;
- DCHECK(uidna != NULL);
- size_t original_length = out->length();
- int output_length = 64;
- UIDNAInfo info = UIDNA_INFO_INITIALIZER;
- UErrorCode status;
- do {
- out->resize(original_length + output_length);
- status = U_ZERO_ERROR;
- // This returns the actual length required. If this is more than 64
- // code units, |status| will be U_BUFFER_OVERFLOW_ERROR and we'll try
- // the conversion again, but with a sufficiently large buffer.
- output_length = uidna_labelToUnicode(
- uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length],
- output_length, &info, &status);
- } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0));
-
- if (U_SUCCESS(status) && info.errors == 0) {
- // Converted successfully. Ensure that the converted component
- // can be safely displayed to the user.
- out->resize(original_length + output_length);
- if (IsIDNComponentSafe(out->data() + original_length, output_length,
- languages))
- return true;
- }
-
- // Something went wrong. Revert to original string.
- out->resize(original_length);
- }
-
- // We get here with no IDN or on error, in which case we just append the
- // literal input.
- out->append(comp, comp_len);
- return false;
-}
-
-} // namespace
-
-const FormatUrlType kFormatUrlOmitNothing = 0;
-const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;
-const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;
-const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;
-const FormatUrlType kFormatUrlOmitAll =
- kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP |
- kFormatUrlOmitTrailingSlashOnBareHostname;
-
-base::string16 FormatUrl(const GURL& url,
- const std::string& languages,
- FormatUrlTypes format_types,
- net::UnescapeRule::Type unescape_rules,
- url::Parsed* new_parsed,
- size_t* prefix_end,
- size_t* offset_for_adjustment) {
- std::vector<size_t> offsets;
- if (offset_for_adjustment)
- offsets.push_back(*offset_for_adjustment);
- base::string16 result =
- FormatUrlWithOffsets(url, languages, format_types, unescape_rules,
- new_parsed, prefix_end, &offsets);
- if (offset_for_adjustment)
- *offset_for_adjustment = offsets[0];
- return result;
-}
-
-base::string16 FormatUrlWithOffsets(
- const GURL& url,
- const std::string& languages,
- FormatUrlTypes format_types,
- net::UnescapeRule::Type unescape_rules,
- url::Parsed* new_parsed,
- size_t* prefix_end,
- std::vector<size_t>* offsets_for_adjustment) {
- base::OffsetAdjuster::Adjustments adjustments;
- const base::string16& format_url_return_value =
- FormatUrlWithAdjustments(url, languages, format_types, unescape_rules,
- new_parsed, prefix_end, &adjustments);
- base::OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);
- if (offsets_for_adjustment) {
- std::for_each(
- offsets_for_adjustment->begin(), offsets_for_adjustment->end(),
- base::LimitOffset<std::string>(format_url_return_value.length()));
- }
- return format_url_return_value;
-}
-
-base::string16 FormatUrlWithAdjustments(
- const GURL& url,
- const std::string& languages,
- FormatUrlTypes format_types,
- net::UnescapeRule::Type unescape_rules,
- url::Parsed* new_parsed,
- size_t* prefix_end,
- base::OffsetAdjuster::Adjustments* adjustments) {
- DCHECK(adjustments != NULL);
- adjustments->clear();
- url::Parsed parsed_temp;
- if (!new_parsed)
- new_parsed = &parsed_temp;
- else
- *new_parsed = url::Parsed();
-
- // Special handling for view-source:. Don't use content::kViewSourceScheme
- // because this library shouldn't depend on chrome.
- const char kViewSource[] = "view-source";
- // Reject "view-source:view-source:..." to avoid deep recursion.
- const char kViewSourceTwice[] = "view-source:view-source:";
- if (url.SchemeIs(kViewSource) &&
- !base::StartsWith(url.possibly_invalid_spec(), kViewSourceTwice,
- base::CompareCase::INSENSITIVE_ASCII)) {
- return FormatViewSourceUrl(url, languages, format_types, unescape_rules,
- new_parsed, prefix_end, adjustments);
- }
-
- // We handle both valid and invalid URLs (this will give us the spec
- // regardless of validity).
- const std::string& spec = url.possibly_invalid_spec();
- const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
-
- // Scheme & separators. These are ASCII.
- base::string16 url_string;
- url_string.insert(
- url_string.end(), spec.begin(),
- spec.begin() + parsed.CountCharactersBefore(url::Parsed::USERNAME, true));
- const char kHTTP[] = "http://";
- const char kFTP[] = "ftp.";
- // url_formatter::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This
- // means that if we trim "http://" off a URL whose host starts with "ftp." and
- // the user inputs this into any field subject to fixup (which is basically
- // all input fields), the meaning would be changed. (In fact, often the
- // formatted URL is directly pre-filled into an input field.) For this reason
- // we avoid stripping "http://" in this case.
- bool omit_http =
- (format_types & kFormatUrlOmitHTTP) &&
- base::EqualsASCII(url_string, kHTTP) &&
- !base::StartsWith(url.host(), kFTP, base::CompareCase::SENSITIVE);
- new_parsed->scheme = parsed.scheme;
-
- // Username & password.
- if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {
- // Remove the username and password fields. We don't want to display those
- // to the user since they can be used for attacks,
- // e.g. "http://google.com:search@evil.ru/"
- new_parsed->username.reset();
- new_parsed->password.reset();
- // Update the adjustments based on removed username and/or password.
- if (parsed.username.is_nonempty() || parsed.password.is_nonempty()) {
- if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {
- // The seeming off-by-two is to account for the ':' after the username
- // and '@' after the password.
- adjustments->push_back(base::OffsetAdjuster::Adjustment(
- static_cast<size_t>(parsed.username.begin),
- static_cast<size_t>(parsed.username.len + parsed.password.len + 2),
- 0));
- } else {
- const url::Component* nonempty_component =
- parsed.username.is_nonempty() ? &parsed.username : &parsed.password;
- // The seeming off-by-one is to account for the '@' after the
- // username/password.
- adjustments->push_back(base::OffsetAdjuster::Adjustment(
- static_cast<size_t>(nonempty_component->begin),
- static_cast<size_t>(nonempty_component->len + 1), 0));
- }
- }
- } else {
- AppendFormattedComponent(spec, parsed.username,
- NonHostComponentTransform(unescape_rules),
- &url_string, &new_parsed->username, adjustments);
- if (parsed.password.is_valid())
- url_string.push_back(':');
- AppendFormattedComponent(spec, parsed.password,
- NonHostComponentTransform(unescape_rules),
- &url_string, &new_parsed->password, adjustments);
- if (parsed.username.is_valid() || parsed.password.is_valid())
- url_string.push_back('@');
- }
- if (prefix_end)
- *prefix_end = static_cast<size_t>(url_string.length());
-
- // Host.
- AppendFormattedComponent(spec, parsed.host, HostComponentTransform(languages),
- &url_string, &new_parsed->host, adjustments);
-
- // Port.
- if (parsed.port.is_nonempty()) {
- url_string.push_back(':');
- new_parsed->port.begin = url_string.length();
- url_string.insert(url_string.end(), spec.begin() + parsed.port.begin,
- spec.begin() + parsed.port.end());
- new_parsed->port.len = url_string.length() - new_parsed->port.begin;
- } else {
- new_parsed->port.reset();
- }
-
- // Path & query. Both get the same general unescape & convert treatment.
- if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) ||
- !CanStripTrailingSlash(url)) {
- AppendFormattedComponent(spec, parsed.path,
- NonHostComponentTransform(unescape_rules),
- &url_string, &new_parsed->path, adjustments);
- } else {
- if (parsed.path.len > 0) {
- adjustments->push_back(base::OffsetAdjuster::Adjustment(
- parsed.path.begin, parsed.path.len, 0));
- }
- }
- if (parsed.query.is_valid())
- url_string.push_back('?');
- AppendFormattedComponent(spec, parsed.query,
- NonHostComponentTransform(unescape_rules),
- &url_string, &new_parsed->query, adjustments);
-
- // Ref. This is valid, unescaped UTF-8, so we can just convert.
- if (parsed.ref.is_valid())
- url_string.push_back('#');
- AppendFormattedComponent(spec, parsed.ref,
- NonHostComponentTransform(net::UnescapeRule::NONE),
- &url_string, &new_parsed->ref, adjustments);
-
- // If we need to strip out http do it after the fact.
- if (omit_http && base::StartsWith(url_string, base::ASCIIToUTF16(kHTTP),
- base::CompareCase::SENSITIVE)) {
- const size_t kHTTPSize = arraysize(kHTTP) - 1;
- url_string = url_string.substr(kHTTPSize);
- // Because offsets in the |adjustments| are already calculated with respect
- // to the string with the http:// prefix in it, those offsets remain correct
- // after stripping the prefix. The only thing necessary is to add an
- // adjustment to reflect the stripped prefix.
- adjustments->insert(adjustments->begin(),
- base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0));
-
- if (prefix_end)
- *prefix_end -= kHTTPSize;
-
- // Adjust new_parsed.
- DCHECK(new_parsed->scheme.is_valid());
- int delta = -(new_parsed->scheme.len + 3); // +3 for ://.
- new_parsed->scheme.reset();
- AdjustAllComponentsButScheme(delta, new_parsed);
- }
-
- return url_string;
-}
-
-bool CanStripTrailingSlash(const GURL& url) {
- // Omit the path only for standard, non-file URLs with nothing but "/" after
- // the hostname.
- return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() &&
- !url.has_query() && !url.has_ref() && url.path() == "/";
-}
-
-void AppendFormattedHost(const GURL& url,
- const std::string& languages,
- base::string16* output) {
- AppendFormattedComponent(
- url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host,
- HostComponentTransform(languages), output, NULL, NULL);
-}
-
-base::string16 IDNToUnicode(const std::string& host,
- const std::string& languages) {
- return IDNToUnicodeWithAdjustments(host, languages, NULL);
-}
-
-} // url_formatter
diff --git a/components/url_formatter/url_formatter.gyp b/components/url_formatter/url_formatter.gyp
deleted file mode 100644
index 9375e96..0000000
--- a/components/url_formatter/url_formatter.gyp
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright 2015 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-{
- 'targets': [
- {
- # GN version: //components/url_formatter
- 'target_name': 'url_formatter',
- 'type': 'static_library',
- 'dependencies': [
- '../../base/base.gyp:base',
- '../../net/net.gyp:net',
- '../../third_party/icu/icu.gyp:icui18n',
- '../../third_party/icu/icu.gyp:icuuc',
- '../../url/url.gyp:url_lib',
- ],
- 'sources': [
- # Note: sources list duplicated in GN build.
- 'elide_url.cc',
- 'elide_url.h',
- 'url_fixer.cc',
- 'url_fixer.h',
- 'url_formatter.cc',
- 'url_formatter.h',
- ],
- # TODO(jschuh): crbug.com/167187 fix size_t to int truncations.
- 'msvs_disabled_warnings': [4267, ],
-
- 'conditions': [
- ['OS != "android"', {
- 'dependencies': [
- '../../ui/gfx/gfx.gyp:gfx',
- ]
- }],
- ],
- },
- ],
-}
diff --git a/components/url_formatter/url_formatter.h b/components/url_formatter/url_formatter.h
deleted file mode 100644
index 01c8795..0000000
--- a/components/url_formatter/url_formatter.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright 2015 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// url_formatter contains routines for formatting URLs in a way that can be
-// safely and securely displayed to users. For example, it is responsible
-// for determining when to convert an IDN A-Label (e.g. "xn--[something]")
-// into the IDN U-Label.
-//
-// Note that this formatting is only intended for display purposes; it would
-// be insecure and insufficient to make comparisons solely on formatted URLs
-// (that is, it should not be used for normalizing URLs for comparison for
-// security decisions).
-
-#ifndef COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
-#define COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
-
-#include <stdint.h>
-
-#include <string>
-#include <vector>
-
-#include "base/strings/string16.h"
-#include "base/strings/utf_offset_string_conversions.h"
-#include "net/base/escape.h"
-
-class GURL;
-
-namespace url {
-struct Parsed;
-} // url
-
-namespace url_formatter {
-
-// Used by FormatUrl to specify handling of certain parts of the url.
-typedef uint32_t FormatUrlType;
-typedef uint32_t FormatUrlTypes;
-
-// Nothing is ommitted.
-extern const FormatUrlType kFormatUrlOmitNothing;
-
-// If set, any username and password are removed.
-extern const FormatUrlType kFormatUrlOmitUsernamePassword;
-
-// If the scheme is 'http://', it's removed.
-extern const FormatUrlType kFormatUrlOmitHTTP;
-
-// Omits the path if it is just a slash and there is no query or ref. This is
-// meaningful for non-file "standard" URLs.
-extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname;
-
-// Convenience for omitting all unecessary types.
-extern const FormatUrlType kFormatUrlOmitAll;
-
-// Creates a string representation of |url|. The IDN host name may be in Unicode
-// if |languages| accepts the Unicode representation. |format_type| is a bitmask
-// of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean
-// the URL for human readability. You will generally want |UnescapeRule::SPACES|
-// for display to the user if you can handle spaces, or |UnescapeRule::NORMAL|
-// if not. If the path part and the query part seem to be encoded in %-encoded
-// UTF-8, decodes %-encoding and UTF-8.
-//
-// The last three parameters may be NULL.
-//
-// |new_parsed| will be set to the parsing parameters of the resultant URL.
-//
-// |prefix_end| will be the length before the hostname of the resultant URL.
-//
-// |offset[s]_for_adjustment| specifies one or more offsets into the original
-// URL, representing insertion or selection points between characters: if the
-// input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is
-// between the scheme and the host, and offset 15 is after the end of the URL.
-// Valid input offsets range from 0 to the length of the input URL string. On
-// exit, each offset will have been modified to reflect any changes made to the
-// output string. For example, if |url| is "http://a:b@c.com/",
-// |omit_username_password| is true, and an offset is 12 (pointing between 'c'
-// and '.'), then on return the output string will be "http://c.com/" and the
-// offset will be 8. If an offset cannot be successfully adjusted (e.g. because
-// it points into the middle of a component that was entirely removed or into
-// the middle of an encoding sequence), it will be set to base::string16::npos.
-// For consistency, if an input offset points between the scheme and the
-// username/password, and both are removed, on output this offset will be 0
-// rather than npos; this means that offsets at the starts and ends of removed
-// components are always transformed the same way regardless of what other
-// components are adjacent.
-base::string16 FormatUrl(const GURL& url,
- const std::string& languages,
- FormatUrlTypes format_types,
- net::UnescapeRule::Type unescape_rules,
- url::Parsed* new_parsed,
- size_t* prefix_end,
- size_t* offset_for_adjustment);
-
-base::string16 FormatUrlWithOffsets(
- const GURL& url,
- const std::string& languages,
- FormatUrlTypes format_types,
- net::UnescapeRule::Type unescape_rules,
- url::Parsed* new_parsed,
- size_t* prefix_end,
- std::vector<size_t>* offsets_for_adjustment);
-
-// This function is like those above except it takes |adjustments| rather
-// than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all
-// the transformations that happened to |url| to convert it into the returned
-// value.
-base::string16 FormatUrlWithAdjustments(
- const GURL& url,
- const std::string& languages,
- FormatUrlTypes format_types,
- net::UnescapeRule::Type unescape_rules,
- url::Parsed* new_parsed,
- size_t* prefix_end,
- base::OffsetAdjuster::Adjustments* adjustments);
-
-// This is a convenience function for FormatUrl() with
-// format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical
-// set of flags for "URLs to display to the user". You should be cautious about
-// using this for URLs which will be parsed or sent to other applications.
-inline base::string16 FormatUrl(const GURL& url, const std::string& languages) {
- return FormatUrl(url, languages, kFormatUrlOmitAll, net::UnescapeRule::SPACES,
- nullptr, nullptr, nullptr);
-}
-
-// Returns whether FormatUrl() would strip a trailing slash from |url|, given a
-// format flag including kFormatUrlOmitTrailingSlashOnBareHostname.
-bool CanStripTrailingSlash(const GURL& url);
-
-// Formats the host in |url| and appends it to |output|. The host formatter
-// takes the same accept languages component as ElideURL().
-void AppendFormattedHost(const GURL& url,
- const std::string& languages,
- base::string16* output);
-
-// Converts the given host name to unicode characters. This can be called for
-// any host name, if the input is not IDN or is invalid in some way, we'll just
-// return the ASCII source so it is still usable.
-//
-// The input should be the canonicalized ASCII host name from GURL. This
-// function does NOT accept UTF-8!
-//
-// |languages| is a comma separated list of ISO 639 language codes. It
-// is used to determine whether a hostname is 'comprehensible' to a user
-// who understands languages listed. |host| will be converted to a
-// human-readable form (Unicode) ONLY when each component of |host| is
-// regarded as 'comprehensible'. Scipt-mixing is not allowed except that
-// Latin letters in the ASCII range can be mixed with a limited set of
-// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
-// When |languages| is empty, even that mixing is not allowed.
-base::string16 IDNToUnicode(const std::string& host,
- const std::string& languages);
-
-} // url_formatter
-
-#endif // COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
diff --git a/components/url_formatter/url_formatter_unittest.cc b/components/url_formatter/url_formatter_unittest.cc
deleted file mode 100644
index 0dd635a..0000000
--- a/components/url_formatter/url_formatter_unittest.cc
+++ /dev/null
@@ -1,978 +0,0 @@
-// Copyright 2015 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "components/url_formatter/url_formatter.h"
-
-#include <string.h>
-
-#include <vector>
-
-#include "base/macros.h"
-#include "base/strings/string_number_conversions.h"
-#include "base/strings/stringprintf.h"
-#include "base/strings/utf_string_conversions.h"
-#include "testing/gtest/include/gtest/gtest.h"
-#include "url/gurl.h"
-
-
-namespace url_formatter {
-
-namespace {
-
-using base::WideToUTF16;
-using base::ASCIIToUTF16;
-
-const size_t kNpos = base::string16::npos;
-
-const char* const kLanguages[] = {
- "", "en", "zh-CN", "ja", "ko",
- "he", "ar", "ru", "el", "fr",
- "de", "pt", "sv", "th", "hi",
- "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
- "zh,ru,en"
-};
-
-struct IDNTestCase {
- const char* const input;
- const wchar_t* unicode_output;
- const bool unicode_allowed[arraysize(kLanguages)];
-};
-
-// TODO(jungshik) This is just a random sample of languages and is far
-// from exhaustive. We may have to generate all the combinations
-// of languages (powerset of a set of all the languages).
-const IDNTestCase idn_cases[] = {
- // No IDN
- {"www.google.com", L"www.google.com",
- {true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true}},
- {"www.google.com.", L"www.google.com.",
- {true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true}},
- {".", L".",
- {true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true}},
- {"", L"",
- {true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true}},
- // IDN
- // Hanzi (Traditional Chinese)
- {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn",
- {true, false, true, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, true, true, false,
- true}},
- // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
- {"xn--cy2a840a.com", L"\x89c6\x9891.com",
- {true, false, true, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- true}},
- // Hanzi + '123'
- {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com",
- {true, false, true, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, true, true, false,
- true}},
- // Hanzi + Latin : U+56FD is simplified and is regarded
- // as not supported in zh-TW.
- {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
- {false, false, true, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- true}},
- // Kanji + Kana (Japanese)
- {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp",
- {true, false, false, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- false}},
- // Katakana including U+30FC
- {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp",
- {true, false, false, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- }},
- {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
- {true, false, false, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- }},
- // Katakana + Latin (Japanese)
- // TODO(jungshik): Change 'false' in the first element to 'true'
- // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
- // of our IsIDNComponentInSingleScript().
- {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
- {false, false, false, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- }},
- {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
- {false, false, false, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- }},
- // Hangul (Korean)
- {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr",
- {true, false, false, false, true,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- false}},
- // b<u-umlaut>cher (German)
- {"xn--bcher-kva.de", L"b\x00fc" L"cher.de",
- {true, false, false, false, false,
- false, false, false, false, true,
- true, false, false, false, false,
- true, false, false, false, false,
- false}},
- // a with diaeresis
- {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
- {true, false, false, false, false,
- false, false, false, false, false,
- true, false, true, false, false,
- true, false, false, false, false,
- false}},
- // c-cedilla (French)
- {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
- {true, false, false, false, false,
- false, false, false, false, true,
- false, true, false, false, false,
- false, false, false, false, false,
- false}},
- // caf'e with acute accent' (French)
- {"xn--caf-dma.fr", L"caf\x00e9.fr",
- {true, false, false, false, false,
- false, false, false, false, true,
- false, true, true, false, false,
- false, false, false, false, false,
- false}},
- // c-cedillla and a with tilde (Portuguese)
- {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
- {true, false, false, false, false,
- false, false, false, false, false,
- false, true, false, false, false,
- false, false, false, false, false,
- false}},
- // s with caron
- {"xn--achy-f6a.com", L"\x0161" L"achy.com",
- {true, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // TODO(jungshik) : Add examples with Cyrillic letters
- // only used in some languages written in Cyrillic.
- // Eutopia (Greek)
- {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
- {true, false, false, false, false,
- false, false, false, true, false,
- false, false, false, false, false,
- false, true, false, false, false,
- false}},
- // Eutopia + 123 (Greek)
- {"xn---123-pldm0haj2bk.gr",
- L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
- {true, false, false, false, false,
- false, false, false, true, false,
- false, false, false, false, false,
- false, true, false, false, false,
- false}},
- // Cyrillic (Russian)
- {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
- {true, false, false, false, false,
- false, false, true, false, false,
- false, false, false, false, false,
- false, false, false, false, true,
- true}},
- // Cyrillic + 123 (Russian)
- {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
- {true, false, false, false, false,
- false, false, true, false, false,
- false, false, false, false, false,
- false, false, false, false, true,
- true}},
- // Arabic
- {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
- {true, false, false, false, false,
- false, true, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // Hebrew
- {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
- {true, false, false, false, false,
- true, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, true,
- false}},
- // Thai
- {"xn--12c2cc4ag3b4ccu.th",
- L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
- {true, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- false, false, false, false, false,
- false}},
- // Devangari (Hindi)
- {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
- {true, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, true,
- false, false, false, false, false,
- false}},
- // Invalid IDN
- {"xn--hello?world.com", NULL,
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // Unsafe IDNs
- // "payp<alpha>l.com"
- {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // google.gr with Greek omicron and epsilon
- {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // google.ru with Cyrillic o
- {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // h<e with acute>llo<China in Han>.cn
- {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // <Greek rho><Cyrillic a><Cyrillic u>.ru
- {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // One that's really long that will force a buffer realloc
- {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
- "aaaaaaa",
- L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
- L"aaaaaaaa",
- {true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true}},
- // Test cases for characters we blacklisted although allowed in IDN.
- // Embedded spaces will be turned to %20 in the display.
- // TODO(jungshik): We need to have more cases. This is a typical
- // data-driven trap. The following test cases need to be separated
- // and tested only for a couple of languages.
- {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- }},
- {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- }},
- {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- }},
- {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- }},
- // Padlock icon spoof.
- {"xn--google-hj64e", L"\U0001f512google.com",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- }},
- // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist
- // all strings with the surrogate '\xdd12'.
- {"xn--fk9c.com", L"\U00010912.com",
- {true, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- }},
-#if 0
- // These two cases are special. We need a separate test.
- // U+3000 and U+3002 are normalized to ASCII space and dot.
- {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
- {false, false, true, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, true, false, false,
- true}},
- {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
- {false, false, true, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, true, false, false,
- true}},
-#endif
-};
-
-struct AdjustOffsetCase {
- size_t input_offset;
- size_t output_offset;
-};
-
-struct UrlTestData {
- const char* const description;
- const char* const input;
- const char* const languages;
- FormatUrlTypes format_types;
- net::UnescapeRule::Type escape_rules;
- const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily.
- size_t prefix_len;
-};
-
-// A helper for IDN*{Fast,Slow}.
-// Append "::<language list>" to |expected| and |actual| to make it
-// easy to tell which sub-case fails without debugging.
-void AppendLanguagesToOutputs(const char* languages,
- base::string16* expected,
- base::string16* actual) {
- base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages);
- expected->append(to_append);
- actual->append(to_append);
-}
-
-// A pair of helpers for the FormatUrlWithOffsets() test.
-void VerboseExpect(size_t expected,
- size_t actual,
- const std::string& original_url,
- size_t position,
- const base::string16& formatted_url) {
- EXPECT_EQ(expected, actual) << "Original URL: " << original_url
- << " (at char " << position << ")\nFormatted URL: " << formatted_url;
-}
-
-void CheckAdjustedOffsets(const std::string& url_string,
- const std::string& languages,
- FormatUrlTypes format_types,
- net::UnescapeRule::Type unescape_rules,
- const size_t* output_offsets) {
- GURL url(url_string);
- size_t url_length = url_string.length();
- std::vector<size_t> offsets;
- for (size_t i = 0; i <= url_length + 1; ++i)
- offsets.push_back(i);
- offsets.push_back(500000); // Something larger than any input length.
- offsets.push_back(std::string::npos);
- base::string16 formatted_url = FormatUrlWithOffsets(url, languages,
- format_types, unescape_rules, NULL, NULL, &offsets);
- for (size_t i = 0; i < url_length; ++i)
- VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url);
- VerboseExpect(formatted_url.length(), offsets[url_length], url_string,
- url_length, formatted_url);
- VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string,
- 500000, formatted_url);
- VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string,
- std::string::npos, formatted_url);
-}
-
-TEST(UrlFormatterTest, IDNToUnicodeFast) {
- for (size_t i = 0; i < arraysize(idn_cases); i++) {
- for (size_t j = 0; j < arraysize(kLanguages); j++) {
- // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
- if (j == 3 || j == 17 || j == 18)
- continue;
- base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
- base::string16 expected(idn_cases[i].unicode_allowed[j] ?
- WideToUTF16(idn_cases[i].unicode_output) :
- ASCIIToUTF16(idn_cases[i].input));
- AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
- EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
- << "\", languages: \"" << kLanguages[j]
- << "\"";
- }
- }
-}
-
-TEST(UrlFormatterTest, IDNToUnicodeSlow) {
- for (size_t i = 0; i < arraysize(idn_cases); i++) {
- for (size_t j = 0; j < arraysize(kLanguages); j++) {
- // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
- if (!(j == 3 || j == 17 || j == 18))
- continue;
- base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
- base::string16 expected(idn_cases[i].unicode_allowed[j] ?
- WideToUTF16(idn_cases[i].unicode_output) :
- ASCIIToUTF16(idn_cases[i].input));
- AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
- EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
- << "\", languages: \"" << kLanguages[j]
- << "\"";
- }
- }
-}
-
-// ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and
-// te), which was causing a crash (See http://crbug.com/510551). This may be an
-// icu bug, but regardless, that should not cause a crash.
-TEST(UrlFormatterTest, IDNToUnicodeNeverCrashes) {
- for (char c1 = 'a'; c1 <= 'z'; c1++) {
- for (char c2 = 'a'; c2 <= 'z'; c2++) {
- std::string lang = base::StringPrintf("%c%c", c1, c2);
- base::string16 output(IDNToUnicode("xn--74h", lang));
- }
- }
-}
-
-TEST(UrlFormatterTest, FormatUrl) {
- FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
- const UrlTestData tests[] = {
- {"Empty URL", "", "", default_format_type, net::UnescapeRule::NORMAL, L"",
- 0},
-
- {"Simple URL", "http://www.google.com/", "", default_format_type,
- net::UnescapeRule::NORMAL, L"http://www.google.com/", 7},
-
- {"With a port number and a reference",
- "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type,
- net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7},
-
- // -------- IDN tests --------
- {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", "ja",
- default_format_type, net::UnescapeRule::NORMAL,
- L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
-
- {"Japanese IDN with en", "http://xn--l8jvb1ey91xtjb.jp", "en",
- default_format_type, net::UnescapeRule::NORMAL,
- L"http://xn--l8jvb1ey91xtjb.jp/", 7},
-
- {"Japanese IDN without any languages", "http://xn--l8jvb1ey91xtjb.jp", "",
- default_format_type, net::UnescapeRule::NORMAL,
- // Single script is safe for empty languages.
- L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
-
- {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja",
- default_format_type, net::UnescapeRule::NORMAL,
- // GURL doesn't assume an email address's domain part as a host name.
- L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
-
- {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys",
- "ja", default_format_type, net::UnescapeRule::NORMAL,
- L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
-
- {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja",
- default_format_type, net::UnescapeRule::NORMAL,
- L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
-
- // -------- omit_username_password flag tests --------
- {"With username and password, omit_username_password=false",
- "http://user:passwd@example.com/foo", "", kFormatUrlOmitNothing,
- net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19},
-
- {"With username and password, omit_username_password=true",
- "http://user:passwd@example.com/foo", "", default_format_type,
- net::UnescapeRule::NORMAL, L"http://example.com/foo", 7},
-
- {"With username and no password", "http://user@example.com/foo", "",
- default_format_type, net::UnescapeRule::NORMAL,
- L"http://example.com/foo", 7},
-
- {"Just '@' without username and password", "http://@example.com/foo", "",
- default_format_type, net::UnescapeRule::NORMAL,
- L"http://example.com/foo", 7},
-
- // GURL doesn't think local-part of an email address is username for URL.
- {"mailto:, omit_username_password=true", "mailto:foo@example.com", "",
- default_format_type, net::UnescapeRule::NORMAL,
- L"mailto:foo@example.com", 7},
-
- // -------- unescape flag tests --------
- {"Do not unescape",
- "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
- "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
- "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
- "en", default_format_type, net::UnescapeRule::NONE,
- // GURL parses %-encoded hostnames into Punycode.
- L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
- L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
- 7},
-
- {"Unescape normally",
- "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
- "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
- "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
- "en", default_format_type, net::UnescapeRule::NORMAL,
- L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
- L"?q=\x30B0\x30FC\x30B0\x30EB",
- 7},
-
- {"Unescape normally with BiDi control character",
- "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en",
- default_format_type, net::UnescapeRule::NORMAL,
- L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7},
-
- {"Unescape normally including unescape spaces",
- "http://www.google.com/search?q=Hello%20World", "en",
- default_format_type, net::UnescapeRule::SPACES,
- L"http://www.google.com/search?q=Hello World", 7},
-
- /*
- {"unescape=true with some special characters",
- "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
- kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
- L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
- */
- // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
-
- // -------- omit http: --------
- {"omit http with user name", "http://user@example.com/foo", "",
- kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0},
-
- {"omit http", "http://www.google.com/", "en", kFormatUrlOmitHTTP,
- net::UnescapeRule::NORMAL, L"www.google.com/", 0},
-
- {"omit http with https", "https://www.google.com/", "en",
- kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL,
- L"https://www.google.com/", 8},
-
- {"omit http starts with ftp.", "http://ftp.google.com/", "en",
- kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/",
- 7},
-
- // -------- omit trailing slash on bare hostname --------
- {"omit slash when it's the entire path", "http://www.google.com/", "en",
- kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
- L"http://www.google.com", 7},
- {"omit slash when there's a ref", "http://www.google.com/#ref", "en",
- kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
- L"http://www.google.com/#ref", 7},
- {"omit slash when there's a query", "http://www.google.com/?", "en",
- kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
- L"http://www.google.com/?", 7},
- {"omit slash when it's not the entire path", "http://www.google.com/foo",
- "en", kFormatUrlOmitTrailingSlashOnBareHostname,
- net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7},
- {"omit slash for nonstandard URLs", "data:/", "en",
- kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
- L"data:/", 5},
- {"omit slash for file URLs", "file:///", "en",
- kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
- L"file:///", 7},
-
- // -------- view-source: --------
- {"view-source", "view-source:http://xn--qcka1pmc.jp/", "ja",
- default_format_type, net::UnescapeRule::NORMAL,
- L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19},
-
- {"view-source of view-source",
- "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
- default_format_type, net::UnescapeRule::NORMAL,
- L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
-
- // view-source should omit http and trailing slash where non-view-source
- // would.
- {"view-source omit http", "view-source:http://a.b/c", "en",
- kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12},
- {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c",
- "en", kFormatUrlOmitAll, net::UnescapeRule::NORMAL,
- L"view-source:http://ftp.b/c", 19},
- {"view-source omit slash when it's the entire path",
- "view-source:http://a.b/", "en", kFormatUrlOmitAll,
- net::UnescapeRule::NORMAL, L"view-source:a.b", 12},
- };
-
- for (size_t i = 0; i < arraysize(tests); ++i) {
- size_t prefix_len;
- base::string16 formatted = FormatUrl(
- GURL(tests[i].input), tests[i].languages, tests[i].format_types,
- tests[i].escape_rules, NULL, &prefix_len, NULL);
- EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
- EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
- }
-}
-
-TEST(UrlFormatterTest, FormatUrlParsed) {
- // No unescape case.
- url::Parsed parsed;
- base::string16 formatted =
- FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
- "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
- "ja", kFormatUrlOmitNothing, net::UnescapeRule::NONE, &parsed,
- NULL, NULL);
- EXPECT_EQ(WideToUTF16(
- L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
- L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
- EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
- formatted.substr(parsed.username.begin, parsed.username.len));
- EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
- formatted.substr(parsed.password.begin, parsed.password.len));
- EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
- formatted.substr(parsed.host.begin, parsed.host.len));
- EXPECT_EQ(WideToUTF16(L"8080"),
- formatted.substr(parsed.port.begin, parsed.port.len));
- EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"),
- formatted.substr(parsed.path.begin, parsed.path.len));
- EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"),
- formatted.substr(parsed.query.begin, parsed.query.len));
- EXPECT_EQ(WideToUTF16(L"\x30B0"),
- formatted.substr(parsed.ref.begin, parsed.ref.len));
-
- // Unescape case.
- formatted =
- FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
- "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
- "ja", kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, &parsed,
- NULL, NULL);
- EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
- L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
- EXPECT_EQ(WideToUTF16(L"\x30B0"),
- formatted.substr(parsed.username.begin, parsed.username.len));
- EXPECT_EQ(WideToUTF16(L"\x30FC"),
- formatted.substr(parsed.password.begin, parsed.password.len));
- EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
- formatted.substr(parsed.host.begin, parsed.host.len));
- EXPECT_EQ(WideToUTF16(L"8080"),
- formatted.substr(parsed.port.begin, parsed.port.len));
- EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
- formatted.substr(parsed.path.begin, parsed.path.len));
- EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
- formatted.substr(parsed.query.begin, parsed.query.len));
- EXPECT_EQ(WideToUTF16(L"\x30B0"),
- formatted.substr(parsed.ref.begin, parsed.ref.len));
-
- // Omit_username_password + unescape case.
- formatted =
- FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
- "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
- "ja", kFormatUrlOmitUsernamePassword, net::UnescapeRule::NORMAL,
- &parsed, NULL, NULL);
- EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
- L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
- EXPECT_FALSE(parsed.username.is_valid());
- EXPECT_FALSE(parsed.password.is_valid());
- EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
- formatted.substr(parsed.host.begin, parsed.host.len));
- EXPECT_EQ(WideToUTF16(L"8080"),
- formatted.substr(parsed.port.begin, parsed.port.len));
- EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
- formatted.substr(parsed.path.begin, parsed.path.len));
- EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
- formatted.substr(parsed.query.begin, parsed.query.len));
- EXPECT_EQ(WideToUTF16(L"\x30B0"),
- formatted.substr(parsed.ref.begin, parsed.ref.len));
-
- // View-source case.
- formatted =
- FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"),
- std::string(), kFormatUrlOmitUsernamePassword,
- net::UnescapeRule::NORMAL, &parsed, NULL, NULL);
- EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"),
- formatted);
- EXPECT_EQ(WideToUTF16(L"view-source:http"),
- formatted.substr(parsed.scheme.begin, parsed.scheme.len));
- EXPECT_FALSE(parsed.username.is_valid());
- EXPECT_FALSE(parsed.password.is_valid());
- EXPECT_EQ(WideToUTF16(L"host"),
- formatted.substr(parsed.host.begin, parsed.host.len));
- EXPECT_EQ(WideToUTF16(L"81"),
- formatted.substr(parsed.port.begin, parsed.port.len));
- EXPECT_EQ(WideToUTF16(L"/path"),
- formatted.substr(parsed.path.begin, parsed.path.len));
- EXPECT_EQ(WideToUTF16(L"query"),
- formatted.substr(parsed.query.begin, parsed.query.len));
- EXPECT_EQ(WideToUTF16(L"ref"),
- formatted.substr(parsed.ref.begin, parsed.ref.len));
-
- // omit http case.
- formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"), std::string(),
- kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, &parsed,
- NULL, NULL);
- EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted);
- EXPECT_FALSE(parsed.scheme.is_valid());
- EXPECT_FALSE(parsed.username.is_valid());
- EXPECT_FALSE(parsed.password.is_valid());
- EXPECT_EQ(WideToUTF16(L"host"),
- formatted.substr(parsed.host.begin, parsed.host.len));
- EXPECT_EQ(WideToUTF16(L"8000"),
- formatted.substr(parsed.port.begin, parsed.port.len));
- EXPECT_EQ(WideToUTF16(L"/a"),
- formatted.substr(parsed.path.begin, parsed.path.len));
- EXPECT_EQ(WideToUTF16(L"b=c"),
- formatted.substr(parsed.query.begin, parsed.query.len));
- EXPECT_EQ(WideToUTF16(L"d"),
- formatted.substr(parsed.ref.begin, parsed.ref.len));
-
- // omit http starts with ftp case.
- formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"), std::string(),
- kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, &parsed,
- NULL, NULL);
- EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted);
- EXPECT_TRUE(parsed.scheme.is_valid());
- EXPECT_FALSE(parsed.username.is_valid());
- EXPECT_FALSE(parsed.password.is_valid());
- EXPECT_EQ(WideToUTF16(L"http"),
- formatted.substr(parsed.scheme.begin, parsed.scheme.len));
- EXPECT_EQ(WideToUTF16(L"ftp.host"),
- formatted.substr(parsed.host.begin, parsed.host.len));
- EXPECT_EQ(WideToUTF16(L"8000"),
- formatted.substr(parsed.port.begin, parsed.port.len));
- EXPECT_EQ(WideToUTF16(L"/a"),
- formatted.substr(parsed.path.begin, parsed.path.len));
- EXPECT_EQ(WideToUTF16(L"b=c"),
- formatted.substr(parsed.query.begin, parsed.query.len));
- EXPECT_EQ(WideToUTF16(L"d"),
- formatted.substr(parsed.ref.begin, parsed.ref.len));
-
- // omit http starts with 'f' case.
- formatted = FormatUrl(GURL("http://f/"), std::string(), kFormatUrlOmitHTTP,
- net::UnescapeRule::NORMAL, &parsed, NULL, NULL);
- EXPECT_EQ(WideToUTF16(L"f/"), formatted);
- EXPECT_FALSE(parsed.scheme.is_valid());
- EXPECT_FALSE(parsed.username.is_valid());
- EXPECT_FALSE(parsed.password.is_valid());
- EXPECT_FALSE(parsed.port.is_valid());
- EXPECT_TRUE(parsed.path.is_valid());
- EXPECT_FALSE(parsed.query.is_valid());
- EXPECT_FALSE(parsed.ref.is_valid());
- EXPECT_EQ(WideToUTF16(L"f"),
- formatted.substr(parsed.host.begin, parsed.host.len));
- EXPECT_EQ(WideToUTF16(L"/"),
- formatted.substr(parsed.path.begin, parsed.path.len));
-}
-
-// Make sure that calling FormatUrl on a GURL and then converting back to a GURL
-// results in the original GURL, for each ASCII character in the path.
-TEST(UrlFormatterTest, FormatUrlRoundTripPathASCII) {
- for (unsigned char test_char = 32; test_char < 128; ++test_char) {
- GURL url(std::string("http://www.google.com/") +
- static_cast<char>(test_char));
- size_t prefix_len;
- base::string16 formatted =
- FormatUrl(url, std::string(), kFormatUrlOmitUsernamePassword,
- net::UnescapeRule::NORMAL, NULL, &prefix_len, NULL);
- EXPECT_EQ(url.spec(), GURL(formatted).spec());
- }
-}
-
-// Make sure that calling FormatUrl on a GURL and then converting back to a GURL
-// results in the original GURL, for each escaped ASCII character in the path.
-TEST(UrlFormatterTest, FormatUrlRoundTripPathEscaped) {
- for (unsigned char test_char = 32; test_char < 128; ++test_char) {
- std::string original_url("http://www.google.com/");
- original_url.push_back('%');
- original_url.append(base::HexEncode(&test_char, 1));
-
- GURL url(original_url);
- size_t prefix_len;
- base::string16 formatted =
- FormatUrl(url, std::string(), kFormatUrlOmitUsernamePassword,
- net::UnescapeRule::NORMAL, NULL, &prefix_len, NULL);
- EXPECT_EQ(url.spec(), GURL(formatted).spec());
- }
-}
-
-// Make sure that calling FormatUrl on a GURL and then converting back to a GURL
-// results in the original GURL, for each ASCII character in the query.
-TEST(UrlFormatterTest, FormatUrlRoundTripQueryASCII) {
- for (unsigned char test_char = 32; test_char < 128; ++test_char) {
- GURL url(std::string("http://www.google.com/?") +
- static_cast<char>(test_char));
- size_t prefix_len;
- base::string16 formatted =
- FormatUrl(url, std::string(), kFormatUrlOmitUsernamePassword,
- net::UnescapeRule::NORMAL, NULL, &prefix_len, NULL);
- EXPECT_EQ(url.spec(), GURL(formatted).spec());
- }
-}
-
-// Make sure that calling FormatUrl on a GURL and then converting back to a GURL
-// only results in a different GURL for certain characters.
-TEST(UrlFormatterTest, FormatUrlRoundTripQueryEscaped) {
- // A full list of characters which FormatURL should unescape and GURL should
- // not escape again, when they appear in a query string.
- const char kUnescapedCharacters[] =
- "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~";
- for (unsigned char test_char = 0; test_char < 128; ++test_char) {
- std::string original_url("http://www.google.com/?");
- original_url.push_back('%');
- original_url.append(base::HexEncode(&test_char, 1));
-
- GURL url(original_url);
- size_t prefix_len;
- base::string16 formatted =
- FormatUrl(url, std::string(), kFormatUrlOmitUsernamePassword,
- net::UnescapeRule::NORMAL, NULL, &prefix_len, NULL);
-
- if (test_char &&
- strchr(kUnescapedCharacters, static_cast<char>(test_char))) {
- EXPECT_NE(url.spec(), GURL(formatted).spec());
- } else {
- EXPECT_EQ(url.spec(), GURL(formatted).spec());
- }
- }
-}
-
-TEST(UrlFormatterTest, FormatUrlWithOffsets) {
- CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing,
- net::UnescapeRule::NORMAL, NULL);
-
- const size_t basic_offsets[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
- 21, 22, 23, 24, 25
- };
- CheckAdjustedOffsets("http://www.google.com/foo/", "en",
- kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
- basic_offsets);
-
- const size_t omit_auth_offsets_1[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7,
- 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
- };
- CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en",
- kFormatUrlOmitUsernamePassword,
- net::UnescapeRule::NORMAL, omit_auth_offsets_1);
-
- const size_t omit_auth_offsets_2[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21
- };
- CheckAdjustedOffsets("http://foo@www.google.com/", "en",
- kFormatUrlOmitUsernamePassword,
- net::UnescapeRule::NORMAL, omit_auth_offsets_2);
-
- const size_t dont_omit_auth_offsets[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
- kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
- kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31
- };
- // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com".
- CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en",
- kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
- dont_omit_auth_offsets);
-
- const size_t view_source_offsets[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos,
- kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
- };
- CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en",
- kFormatUrlOmitUsernamePassword,
- net::UnescapeRule::NORMAL, view_source_offsets);
-
- const size_t idn_hostname_offsets_1[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
- kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12,
- 13, 14, 15, 16, 17, 18, 19
- };
- // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/".
- CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja",
- kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
- idn_hostname_offsets_1);
-
- const size_t idn_hostname_offsets_2[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos,
- kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos,
- kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
- kNpos, 19, 20, 21, 22, 23, 24
- };
- // Convert punycode to
- // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/".
- CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/",
- "zh-CN", kFormatUrlOmitNothing,
- net::UnescapeRule::NORMAL, idn_hostname_offsets_2);
-
- const size_t unescape_offsets[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
- 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos,
- kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos,
- kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
- kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos
- };
- // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB".
- CheckAdjustedOffsets(
- "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
- "en", kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets);
-
- const size_t ref_offsets[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
- 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos,
- 33
- };
- // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z".
- CheckAdjustedOffsets(
- "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en",
- kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets);
-
- const size_t omit_http_offsets[] = {
- 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
- 10, 11, 12, 13, 14
- };
- CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP,
- net::UnescapeRule::NORMAL, omit_http_offsets);
-
- const size_t omit_http_start_with_ftp_offsets[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
- };
- CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
- net::UnescapeRule::NORMAL,
- omit_http_start_with_ftp_offsets);
-
- const size_t omit_all_offsets[] = {
- 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
- 0, 1, 2, 3, 4, 5, 6, 7
- };
- CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll,
- net::UnescapeRule::NORMAL, omit_all_offsets);
-}
-
-} // namespace
-
-} // namespace url_formatter