diff options
author | jochen <jochen@chromium.org> | 2015-08-04 00:05:17 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-08-04 07:05:48 +0000 |
commit | 90437dd218ba09f01612ebfc47eda167d67fb368 (patch) | |
tree | 77d2fe1ff3c1830af96a8e03c32419c5b5634522 /components/url_formatter | |
parent | 31ad3e6a80c367c4f8ea0a921e8f2cfe555a5fd6 (diff) | |
download | chromium_src-90437dd218ba09f01612ebfc47eda167d67fb368.zip chromium_src-90437dd218ba09f01612ebfc47eda167d67fb368.tar.gz chromium_src-90437dd218ba09f01612ebfc47eda167d67fb368.tar.bz2 |
Revert of Move net::FormatUrl and friends outside of //net and into //components (patchset #16 id:290001 of https://codereview.chromium.org/1171333003/ )
Reason for revert:
breaks gn_check on Android: https://build.chromium.org/p/chromium.linux/builders/Android%20GN/builds/28796/steps/gn_check/logs/stdio
Original issue's description:
> Move net::FormatUrl and friends outside of //net and into //components
>
> net::FormatUrl and related are specifically concerned with display
> policies of URLs, which is not something that //net needs to be aware
> of, as that's a UX question.
>
> This folds in net::FormatURL along with the existing //components/url_fixer
> and //components/secure_display into a common component,
> //components/url_formatter, that handles reformatting URLs for user-friendly
> or data storage (url_formatter), for use in security prompts (elide_url),
> or for reformatting URLs from user input (url_fixer)
>
> (Disabling presubmit since this is intentionally not fixing a legacy API, just moving it for future cleanups)
>
> BUG=486979
> NOPRESUBMIT=true
>
> Committed: https://crrev.com/1659865c3eb47166c82378bb840801135b057a09
> Cr-Commit-Position: refs/heads/master@{#341605}
TBR=droger@chromium.org,jam@chromium.org,mkwst@chromium.org,pkasting@chromium.org,sky@chromium.org,stuartmorgan@chromium.org,felt@chromium.org,rsleevi@chromium.org
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=486979
Review URL: https://codereview.chromium.org/1260033005
Cr-Commit-Position: refs/heads/master@{#341691}
Diffstat (limited to 'components/url_formatter')
-rw-r--r-- | components/url_formatter/BUILD.gn | 51 | ||||
-rw-r--r-- | components/url_formatter/DEPS | 11 | ||||
-rw-r--r-- | components/url_formatter/OWNERS | 9 | ||||
-rw-r--r-- | components/url_formatter/elide_url.cc | 353 | ||||
-rw-r--r-- | components/url_formatter/elide_url.h | 72 | ||||
-rw-r--r-- | components/url_formatter/elide_url_unittest.cc | 324 | ||||
-rw-r--r-- | components/url_formatter/url_fixer.cc | 673 | ||||
-rw-r--r-- | components/url_formatter/url_fixer.h | 87 | ||||
-rw-r--r-- | components/url_formatter/url_fixer_unittest.cc | 537 | ||||
-rw-r--r-- | components/url_formatter/url_formatter.cc | 807 | ||||
-rw-r--r-- | components/url_formatter/url_formatter.gyp | 39 | ||||
-rw-r--r-- | components/url_formatter/url_formatter.h | 155 | ||||
-rw-r--r-- | components/url_formatter/url_formatter_unittest.cc | 978 |
13 files changed, 0 insertions, 4096 deletions
diff --git a/components/url_formatter/BUILD.gn b/components/url_formatter/BUILD.gn deleted file mode 100644 index 6a35fb3..0000000 --- a/components/url_formatter/BUILD.gn +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2015 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -static_library("url_formatter") { - sources = [ - "elide_url.cc", - "elide_url.h", - "url_fixer.cc", - "url_fixer.h", - "url_formatter.cc", - "url_formatter.h", - ] - - # TODO(jschuh): crbug.com/167187 fix size_t to int truncations. - configs += [ "//build/config/compiler:no_size_t_to_int_warning" ] - - deps = [ - "//base", - "//third_party/icu", - "//net", - "//ui/gfx", - "//url", - ] - - if (is_android) { - deps -= [ "//ui/gfx" ] - } -} - -source_set("unit_tests") { - testonly = true - sources = [ - "elide_url_unittest.cc", - "url_fixer_unittest.cc", - "url_formatter_unittest.cc", - ] - - deps = [ - "//base", - "//net", - "//testing/gtest", - "//ui/gfx", - "//url", - ":url_formatter", - ] - - if (is_android) { - deps -= [ "//ui/gfx" ] - } -} diff --git a/components/url_formatter/DEPS b/components/url_formatter/DEPS deleted file mode 100644 index 3c1754f..0000000 --- a/components/url_formatter/DEPS +++ /dev/null @@ -1,11 +0,0 @@ -include_rules = [ - # This is a shared component (Mandoline, iOS, content), and as such, MUST NOT - # depend on content or other components that do. - "-components/html_viewer", - "-content", - "-ios", - "-mandoline", - - "+net", - "+ui/gfx", -] diff --git a/components/url_formatter/OWNERS b/components/url_formatter/OWNERS deleted file mode 100644 index 49e5b76..0000000 --- a/components/url_formatter/OWNERS +++ /dev/null @@ -1,9 +0,0 @@ -pkasting@chromium.org - -# Backup reviewer -brettw@chromium.org - -# Changes to FormatUrlForSecurityDisplay require a security review to avoid -# introducing security bugs. -per-file elide_url.*=palmer@chromium.org -per-file elide_url.*=felt@chromium.org diff --git a/components/url_formatter/elide_url.cc b/components/url_formatter/elide_url.cc deleted file mode 100644 index 8d7a91f..0000000 --- a/components/url_formatter/elide_url.cc +++ /dev/null @@ -1,353 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "components/url_formatter/elide_url.h" - -#include "base/logging.h" -#include "base/strings/string_split.h" -#include "base/strings/utf_string_conversions.h" -#include "components/url_formatter/url_formatter.h" -#include "net/base/escape.h" -#include "net/base/registry_controlled_domains/registry_controlled_domain.h" -#include "ui/gfx/text_elider.h" -#include "ui/gfx/text_utils.h" -#include "url/gurl.h" -#include "url/url_constants.h" - -using base::UTF8ToUTF16; -using gfx::ElideText; -using gfx::GetStringWidthF; -using gfx::kEllipsisUTF16; -using gfx::kForwardSlash; - -namespace { - -#if !defined(OS_ANDROID) -const base::char16 kDot = '.'; - -// Build a path from the first |num_components| elements in |path_elements|. -// Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate. -base::string16 BuildPathFromComponents( - const base::string16& path_prefix, - const std::vector<base::string16>& path_elements, - const base::string16& filename, - size_t num_components) { - // Add the initial elements of the path. - base::string16 path = path_prefix; - - // Build path from first |num_components| elements. - for (size_t j = 0; j < num_components; ++j) - path += path_elements[j] + kForwardSlash; - - // Add |filename|, ellipsis if necessary. - if (num_components != (path_elements.size() - 1)) - path += base::string16(kEllipsisUTF16) + kForwardSlash; - path += filename; - - return path; -} - -// Takes a prefix (Domain, or Domain+subdomain) and a collection of path -// components and elides if possible. Returns a string containing the longest -// possible elided path, or an empty string if elision is not possible. -base::string16 ElideComponentizedPath( - const base::string16& url_path_prefix, - const std::vector<base::string16>& url_path_elements, - const base::string16& url_filename, - const base::string16& url_query, - const gfx::FontList& font_list, - float available_pixel_width) { - const size_t url_path_number_of_elements = url_path_elements.size(); - - CHECK(url_path_number_of_elements); - for (size_t i = url_path_number_of_elements - 1; i > 0; --i) { - base::string16 elided_path = BuildPathFromComponents( - url_path_prefix, url_path_elements, url_filename, i); - if (available_pixel_width >= GetStringWidthF(elided_path, font_list)) - return ElideText(elided_path + url_query, font_list, - available_pixel_width, gfx::ELIDE_TAIL); - } - - return base::string16(); -} - -// Splits the hostname in the |url| into sub-strings for the full hostname, -// the domain (TLD+1), and the subdomain (everything leading the domain). -void SplitHost(const GURL& url, - base::string16* url_host, - base::string16* url_domain, - base::string16* url_subdomain) { - // Get Host. - *url_host = UTF8ToUTF16(url.host()); - - // Get domain and registry information from the URL. - *url_domain = - UTF8ToUTF16(net::registry_controlled_domains::GetDomainAndRegistry( - url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES)); - if (url_domain->empty()) - *url_domain = *url_host; - - // Add port if required. - if (!url.port().empty()) { - *url_host += UTF8ToUTF16(":" + url.port()); - *url_domain += UTF8ToUTF16(":" + url.port()); - } - - // Get sub domain. - const size_t domain_start_index = url_host->find(*url_domain); - base::string16 kWwwPrefix = UTF8ToUTF16("www."); - if (domain_start_index != base::string16::npos) - *url_subdomain = url_host->substr(0, domain_start_index); - if ((*url_subdomain == kWwwPrefix || url_subdomain->empty() || - url.SchemeIsFile())) { - url_subdomain->clear(); - } -} - -#endif // !defined(OS_ANDROID) -} // namespace - -namespace url_formatter { - -#if !defined(OS_ANDROID) - -// TODO(pkasting): http://crbug.com/77883 This whole function gets -// kerning/ligatures/etc. issues potentially wrong by assuming that the width of -// a rendered string is always the sum of the widths of its substrings. Also I -// suspect it could be made simpler. -base::string16 ElideUrl(const GURL& url, - const gfx::FontList& font_list, - float available_pixel_width, - const std::string& languages) { - // Get a formatted string and corresponding parsing of the url. - url::Parsed parsed; - const base::string16 url_string = url_formatter::FormatUrl( - url, languages, url_formatter::kFormatUrlOmitAll, - net::UnescapeRule::SPACES, &parsed, nullptr, nullptr); - if (available_pixel_width <= 0) - return url_string; - - // If non-standard, return plain eliding. - if (!url.IsStandard()) - return ElideText(url_string, font_list, available_pixel_width, - gfx::ELIDE_TAIL); - - // Now start eliding url_string to fit within available pixel width. - // Fist pass - check to see whether entire url_string fits. - const float pixel_width_url_string = GetStringWidthF(url_string, font_list); - if (available_pixel_width >= pixel_width_url_string) - return url_string; - - // Get the path substring, including query and reference. - const size_t path_start_index = parsed.path.begin; - const size_t path_len = parsed.path.len; - base::string16 url_path_query_etc = url_string.substr(path_start_index); - base::string16 url_path = url_string.substr(path_start_index, path_len); - - // Return general elided text if url minus the query fits. - const base::string16 url_minus_query = - url_string.substr(0, path_start_index + path_len); - if (available_pixel_width >= GetStringWidthF(url_minus_query, font_list)) - return ElideText(url_string, font_list, available_pixel_width, - gfx::ELIDE_TAIL); - - base::string16 url_host; - base::string16 url_domain; - base::string16 url_subdomain; - SplitHost(url, &url_host, &url_domain, &url_subdomain); - - // If this is a file type, the path is now defined as everything after ":". - // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the - // domain is now C: - this is a nice hack for eliding to work pleasantly. - if (url.SchemeIsFile()) { - // Split the path string using ":" - const base::string16 kColon(1, ':'); - std::vector<base::string16> file_path_split = base::SplitString( - url_path, kColon, base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); - if (file_path_split.size() > 1) { // File is of type "file:///C:/.." - url_host.clear(); - url_domain.clear(); - url_subdomain.clear(); - - url_host = url_domain = file_path_split.at(0).substr(1) + kColon; - url_path_query_etc = url_path = file_path_split.at(1); - } - } - - // Second Pass - remove scheme - the rest fits. - const float pixel_width_url_host = GetStringWidthF(url_host, font_list); - const float pixel_width_url_path = - GetStringWidthF(url_path_query_etc, font_list); - if (available_pixel_width >= pixel_width_url_host + pixel_width_url_path) - return url_host + url_path_query_etc; - - // Third Pass: Subdomain, domain and entire path fits. - const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list); - const float pixel_width_url_subdomain = - GetStringWidthF(url_subdomain, font_list); - if (available_pixel_width >= - pixel_width_url_subdomain + pixel_width_url_domain + pixel_width_url_path) - return url_subdomain + url_domain + url_path_query_etc; - - // Query element. - base::string16 url_query; - const float kPixelWidthDotsTrailer = - GetStringWidthF(base::string16(kEllipsisUTF16), font_list); - if (parsed.query.is_nonempty()) { - url_query = UTF8ToUTF16("?") + url_string.substr(parsed.query.begin); - if (available_pixel_width >= - (pixel_width_url_subdomain + pixel_width_url_domain + - pixel_width_url_path - GetStringWidthF(url_query, font_list))) { - return ElideText(url_subdomain + url_domain + url_path_query_etc, - font_list, available_pixel_width, gfx::ELIDE_TAIL); - } - } - - // Parse url_path using '/'. - std::vector<base::string16> url_path_elements = - base::SplitString(url_path, base::string16(1, kForwardSlash), - base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); - - // Get filename - note that for a path ending with / - // such as www.google.com/intl/ads/, the file name is ads/. - base::string16 url_filename( - url_path_elements.empty() ? base::string16() : url_path_elements.back()); - size_t url_path_number_of_elements = url_path_elements.size(); - if (url_filename.empty() && (url_path_number_of_elements > 1)) { - // Path ends with a '/'. - --url_path_number_of_elements; - url_filename = - url_path_elements[url_path_number_of_elements - 1] + kForwardSlash; - } - - const size_t kMaxNumberOfUrlPathElementsAllowed = 1024; - if (url_path_number_of_elements <= 1 || - url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) { - // No path to elide, or too long of a path (could overflow in loop below) - // Just elide this as a text string. - return ElideText(url_subdomain + url_domain + url_path_query_etc, font_list, - available_pixel_width, gfx::ELIDE_TAIL); - } - - // Start eliding the path and replacing elements by ".../". - const base::string16 kEllipsisAndSlash = - base::string16(kEllipsisUTF16) + kForwardSlash; - const float pixel_width_ellipsis_slash = - GetStringWidthF(kEllipsisAndSlash, font_list); - - // Check with both subdomain and domain. - base::string16 elided_path = ElideComponentizedPath( - url_subdomain + url_domain, url_path_elements, url_filename, url_query, - font_list, available_pixel_width); - if (!elided_path.empty()) - return elided_path; - - // Check with only domain. - // If a subdomain is present, add an ellipsis before domain. - // This is added only if the subdomain pixel width is larger than - // the pixel width of kEllipsis. Otherwise, subdomain remains, - // which means that this case has been resolved earlier. - base::string16 url_elided_domain = url_subdomain + url_domain; - if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) { - if (!url_subdomain.empty()) - url_elided_domain = kEllipsisAndSlash[0] + url_domain; - else - url_elided_domain = url_domain; - - elided_path = ElideComponentizedPath(url_elided_domain, url_path_elements, - url_filename, url_query, font_list, - available_pixel_width); - - if (!elided_path.empty()) - return elided_path; - } - - // Return elided domain/.../filename anyway. - base::string16 final_elided_url_string(url_elided_domain); - const float url_elided_domain_width = - GetStringWidthF(url_elided_domain, font_list); - - // A hack to prevent trailing ".../...". - if ((available_pixel_width - url_elided_domain_width) > - pixel_width_ellipsis_slash + kPixelWidthDotsTrailer + - GetStringWidthF(base::ASCIIToUTF16("UV"), font_list)) { - final_elided_url_string += BuildPathFromComponents( - base::string16(), url_path_elements, url_filename, 1); - } else { - final_elided_url_string += url_path; - } - - return ElideText(final_elided_url_string, font_list, available_pixel_width, - gfx::ELIDE_TAIL); -} - -base::string16 ElideHost(const GURL& url, - const gfx::FontList& font_list, - float available_pixel_width) { - base::string16 url_host; - base::string16 url_domain; - base::string16 url_subdomain; - SplitHost(url, &url_host, &url_domain, &url_subdomain); - - const float pixel_width_url_host = GetStringWidthF(url_host, font_list); - if (available_pixel_width >= pixel_width_url_host) - return url_host; - - if (url_subdomain.empty()) - return url_domain; - - const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list); - float subdomain_width = available_pixel_width - pixel_width_url_domain; - if (subdomain_width <= 0) - return base::string16(kEllipsisUTF16) + kDot + url_domain; - - const base::string16 elided_subdomain = - ElideText(url_subdomain, font_list, subdomain_width, gfx::ELIDE_HEAD); - return elided_subdomain + url_domain; -} - -#endif // !defined(OS_ANDROID) - -base::string16 FormatUrlForSecurityDisplay(const GURL& url, - const std::string& languages) { - if (!url.is_valid() || url.is_empty() || !url.IsStandard()) - return url_formatter::FormatUrl(url, languages); - - const base::string16 colon(base::ASCIIToUTF16(":")); - const base::string16 scheme_separator( - base::ASCIIToUTF16(url::kStandardSchemeSeparator)); - - if (url.SchemeIsFile()) { - return base::ASCIIToUTF16(url::kFileScheme) + scheme_separator + - base::UTF8ToUTF16(url.path()); - } - - if (url.SchemeIsFileSystem()) { - const GURL* inner_url = url.inner_url(); - if (inner_url->SchemeIsFile()) { - return base::ASCIIToUTF16(url::kFileSystemScheme) + colon + - FormatUrlForSecurityDisplay(*inner_url, languages) + - base::UTF8ToUTF16(url.path()); - } - return base::ASCIIToUTF16(url::kFileSystemScheme) + colon + - FormatUrlForSecurityDisplay(*inner_url, languages); - } - - const GURL origin = url.GetOrigin(); - const std::string& scheme = origin.scheme(); - const std::string& host = origin.host(); - - base::string16 result = base::UTF8ToUTF16(scheme); - result += scheme_separator; - result += base::UTF8ToUTF16(host); - - const int port = origin.IntPort(); - const int default_port = url::DefaultPortForScheme( - scheme.c_str(), static_cast<int>(scheme.length())); - if (port != url::PORT_UNSPECIFIED && port != default_port) - result += colon + base::UTF8ToUTF16(origin.port()); - - return result; -} -} // namespace url_formatter diff --git a/components/url_formatter/elide_url.h b/components/url_formatter/elide_url.h deleted file mode 100644 index 528b20e..0000000 --- a/components/url_formatter/elide_url.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// -// This file defines utility functions for eliding URLs. - -#ifndef COMPONENTS_URL_FORMATTER_ELIDE_URL_H_ -#define COMPONENTS_URL_FORMATTER_ELIDE_URL_H_ - -#include <string> - -#include "base/strings/string16.h" - -class GURL; - -namespace gfx { -class FontList; -} - -namespace url_formatter { - -// ElideUrl and Elide host require -// gfx::GetStringWidthF which is not implemented in Android -#if !defined(OS_ANDROID) -// This function takes a GURL object and elides it. It returns a string -// which composed of parts from subdomain, domain, path, filename and query. -// A "..." is added automatically at the end if the elided string is bigger -// than the |available_pixel_width|. For |available_pixel_width| == 0, a -// formatted, but un-elided, string is returned. |languages| is a comma -// separated list of ISO 639 language codes and is used to determine what -// characters are understood by a user. It should come from -// |prefs::kAcceptLanguages|. -// -// Note: in RTL locales, if the URL returned by this function is going to be -// displayed in the UI, then it is likely that the string needs to be marked -// as an LTR string (using base::i18n::WrapStringWithLTRFormatting()) so that it -// is displayed properly in an RTL context. Please refer to -// http://crbug.com/6487 for more information. -base::string16 ElideUrl(const GURL& url, - const gfx::FontList& font_list, - float available_pixel_width, - const std::string& languages); - -// This function takes a GURL object and elides the host to fit within -// the given width. The function will never elide past the TLD+1 point, -// but after that, will leading-elide the domain name to fit the width. -// Example: http://sub.domain.com ---> "...domain.com", or "...b.domain.com" -// depending on the width. -base::string16 ElideHost(const GURL& host_url, - const gfx::FontList& font_list, - float available_pixel_width); -#endif // !defined(OS_ANDROID) - -// This is a convenience function for formatting a URL in a concise and -// human-friendly way, to help users make security-related decisions (or in -// other circumstances when people need to distinguish sites, origins, or -// otherwise-simplified URLs from each other). -// -// Internationalized domain names (IDN) may be presented in Unicode if -// |languages| accepts the Unicode representation (see -// |url_formatter::FormatUrl| for more details on the algorithm). -// -// - Omits the path for standard schemes, excepting file and filesystem. -// - Omits the port if it is the default for the scheme. -// -// Do not use this for URLs which will be parsed or sent to other applications. -base::string16 FormatUrlForSecurityDisplay(const GURL& origin, - const std::string& languages); - -} // namespace url_formatter - -#endif // COMPONENTS_URL_FORMATTER_ELIDE_URL_H_ diff --git a/components/url_formatter/elide_url_unittest.cc b/components/url_formatter/elide_url_unittest.cc deleted file mode 100644 index f043478..0000000 --- a/components/url_formatter/elide_url_unittest.cc +++ /dev/null @@ -1,324 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "components/url_formatter/elide_url.h" - -#include "base/ios/ios_util.h" -#include "base/strings/utf_string_conversions.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "ui/gfx/font_list.h" -#include "ui/gfx/text_elider.h" -#include "ui/gfx/text_utils.h" -#include "url/gurl.h" - -using base::UTF8ToUTF16; -using gfx::GetStringWidthF; -using gfx::kEllipsis; - -namespace { - -struct Testcase { - const std::string input; - const std::string output; -}; - -#if !defined(OS_ANDROID) -void RunUrlTest(Testcase* testcases, size_t num_testcases) { - static const gfx::FontList font_list; - for (size_t i = 0; i < num_testcases; ++i) { - const GURL url(testcases[i].input); - // Should we test with non-empty language list? - // That's kinda redundant with net_util_unittests. - const float available_width = - GetStringWidthF(UTF8ToUTF16(testcases[i].output), font_list); - EXPECT_EQ(UTF8ToUTF16(testcases[i].output), - url_formatter::ElideUrl(url, font_list, available_width, - std::string())); - } -} - -// Test eliding of commonplace URLs. -TEST(TextEliderTest, TestGeneralEliding) { - const std::string kEllipsisStr(kEllipsis); - Testcase testcases[] = { - {"http://www.google.com/intl/en/ads/", "www.google.com/intl/en/ads/"}, - {"http://www.google.com/intl/en/ads/", "www.google.com/intl/en/ads/"}, - {"http://www.google.com/intl/en/ads/", - "google.com/intl/" + kEllipsisStr + "/ads/"}, - {"http://www.google.com/intl/en/ads/", - "google.com/" + kEllipsisStr + "/ads/"}, - {"http://www.google.com/intl/en/ads/", "google.com/" + kEllipsisStr}, - {"http://www.google.com/intl/en/ads/", "goog" + kEllipsisStr}, - {"https://subdomain.foo.com/bar/filename.html", - "subdomain.foo.com/bar/filename.html"}, - {"https://subdomain.foo.com/bar/filename.html", - "subdomain.foo.com/" + kEllipsisStr + "/filename.html"}, - {"http://subdomain.foo.com/bar/filename.html", - kEllipsisStr + "foo.com/" + kEllipsisStr + "/filename.html"}, - {"http://www.google.com/intl/en/ads/?aLongQueryWhichIsNotRequired", - "www.google.com/intl/en/ads/?aLongQ" + kEllipsisStr}, - }; - - RunUrlTest(testcases, arraysize(testcases)); -} - -// When there is very little space available, the elision code will shorten -// both path AND file name to an ellipsis - ".../...". To avoid this result, -// there is a hack in place that simply treats them as one string in this -// case. -TEST(TextEliderTest, TestTrailingEllipsisSlashEllipsisHack) { - const std::string kEllipsisStr(kEllipsis); - - // Very little space, would cause double ellipsis. - gfx::FontList font_list; - GURL url("http://battersbox.com/directory/foo/peter_paul_and_mary.html"); - float available_width = GetStringWidthF( - UTF8ToUTF16("battersbox.com/" + kEllipsisStr + "/" + kEllipsisStr), - font_list); - - // Create the expected string, after elision. Depending on font size, the - // directory might become /dir... or /di... or/d... - it never should be - // shorter than that. (If it is, the font considers d... to be longer - // than .../... - that should never happen). - ASSERT_GT(GetStringWidthF(UTF8ToUTF16(kEllipsisStr + "/" + kEllipsisStr), - font_list), - GetStringWidthF(UTF8ToUTF16("d" + kEllipsisStr), font_list)); - GURL long_url("http://battersbox.com/directorynameisreallylongtoforcetrunc"); - base::string16 expected = url_formatter::ElideUrl( - long_url, font_list, available_width, std::string()); - // Ensure that the expected result still contains part of the directory name. - ASSERT_GT(expected.length(), std::string("battersbox.com/d").length()); - EXPECT_EQ(expected, url_formatter::ElideUrl(url, font_list, available_width, - std::string())); - - // More space available - elide directories, partially elide filename. - Testcase testcases[] = { - {"http://battersbox.com/directory/foo/peter_paul_and_mary.html", - "battersbox.com/" + kEllipsisStr + "/peter" + kEllipsisStr}, - }; - RunUrlTest(testcases, arraysize(testcases)); -} - -// Test eliding of empty strings, URLs with ports, passwords, queries, etc. -TEST(TextEliderTest, TestMoreEliding) { - const std::string kEllipsisStr(kEllipsis); - Testcase testcases[] = { - {"http://www.google.com/foo?bar", "www.google.com/foo?bar"}, - {"http://xyz.google.com/foo?bar", "xyz.google.com/foo?" + kEllipsisStr}, - {"http://xyz.google.com/foo?bar", "xyz.google.com/foo" + kEllipsisStr}, - {"http://xyz.google.com/foo?bar", "xyz.google.com/fo" + kEllipsisStr}, - {"http://a.b.com/pathname/c?d", "a.b.com/" + kEllipsisStr + "/c?d"}, - {"", ""}, - {"http://foo.bar..example.com...hello/test/filename.html", - "foo.bar..example.com...hello/" + kEllipsisStr + "/filename.html"}, - {"http://foo.bar../", "foo.bar.."}, - {"http://xn--1lq90i.cn/foo", "\xe5\x8c\x97\xe4\xba\xac.cn/foo"}, - {"http://me:mypass@secrethost.com:99/foo?bar#baz", - "secrethost.com:99/foo?bar#baz"}, - {"http://me:mypass@ss%xxfdsf.com/foo", "ss%25xxfdsf.com/foo"}, - {"mailto:elgoato@elgoato.com", "mailto:elgoato@elgoato.com"}, - {"javascript:click(0)", "javascript:click(0)"}, - {"https://chess.eecs.berkeley.edu:4430/login/arbitfilename", - "chess.eecs.berkeley.edu:4430/login/arbitfilename"}, - {"https://chess.eecs.berkeley.edu:4430/login/arbitfilename", - kEllipsisStr + "berkeley.edu:4430/" + kEllipsisStr + "/arbitfilename"}, - - // Unescaping. - {"http://www/%E4%BD%A0%E5%A5%BD?q=%E4%BD%A0%E5%A5%BD#\xe4\xbd\xa0", - "www/\xe4\xbd\xa0\xe5\xa5\xbd?q=\xe4\xbd\xa0\xe5\xa5\xbd#\xe4\xbd\xa0"}, - - // Invalid unescaping for path. The ref will always be valid UTF-8. We - // don't - // bother to do too many edge cases, since these are handled by the - // escaper - // unittest. - {"http://www/%E4%A0%E5%A5%BD?q=%E4%BD%A0%E5%A5%BD#\xe4\xbd\xa0", - "www/%E4%A0%E5%A5%BD?q=\xe4\xbd\xa0\xe5\xa5\xbd#\xe4\xbd\xa0"}, - }; - - RunUrlTest(testcases, arraysize(testcases)); -} - -// Test eliding of file: URLs. -TEST(TextEliderTest, TestFileURLEliding) { - const std::string kEllipsisStr(kEllipsis); - Testcase testcases[] = { - {"file:///C:/path1/path2/path3/filename", - "file:///C:/path1/path2/path3/filename"}, - {"file:///C:/path1/path2/path3/filename", "C:/path1/path2/path3/filename"}, -// GURL parses "file:///C:path" differently on windows than it does on posix. -#if defined(OS_WIN) - {"file:///C:path1/path2/path3/filename", - "C:/path1/path2/" + kEllipsisStr + "/filename"}, - {"file:///C:path1/path2/path3/filename", - "C:/path1/" + kEllipsisStr + "/filename"}, - {"file:///C:path1/path2/path3/filename", - "C:/" + kEllipsisStr + "/filename"}, -#endif // defined(OS_WIN) - {"file://filer/foo/bar/file", "filer/foo/bar/file"}, - {"file://filer/foo/bar/file", "filer/foo/" + kEllipsisStr + "/file"}, - {"file://filer/foo/bar/file", "filer/" + kEllipsisStr + "/file"}, - {"file://filer/foo/", "file://filer/foo/"}, - {"file://filer/foo/", "filer/foo/"}, - {"file://filer/foo/", "filer" + kEllipsisStr}, - // Eliding file URLs with nothing after the ':' shouldn't crash. - {"file:///aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:", "aaa" + kEllipsisStr}, - {"file:///aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:/", "aaa" + kEllipsisStr}, - }; - - RunUrlTest(testcases, arraysize(testcases)); -} - -TEST(TextEliderTest, TestHostEliding) { -#if defined(OS_IOS) - // TODO(eugenebut): Disable test on iOS9 crbug.com/513703 - if (base::ios::IsRunningOnIOS9OrLater()) { - LOG(WARNING) << "Test disabled on iOS9."; - return; - } -#endif - const std::string kEllipsisStr(kEllipsis); - Testcase testcases[] = { - {"http://google.com", "google.com"}, - {"http://subdomain.google.com", kEllipsisStr + ".google.com"}, - {"http://reallyreallyreallylongdomainname.com", - "reallyreallyreallylongdomainname.com"}, - {"http://a.b.c.d.e.f.com", kEllipsisStr + "f.com"}, - {"http://foo", "foo"}, - {"http://foo.bar", "foo.bar"}, - {"http://subdomain.foo.bar", kEllipsisStr + "in.foo.bar"}, -// IOS width calculations are off by a letter from other platforms for -// some strings from other platforms, probably for strings with too -// many kerned letters on the default font set. -#if !defined(OS_IOS) - {"http://subdomain.reallylongdomainname.com", - kEllipsisStr + "ain.reallylongdomainname.com"}, - {"http://a.b.c.d.e.f.com", kEllipsisStr + ".e.f.com"}, -#endif // !defined(OS_IOS) - }; - - for (size_t i = 0; i < arraysize(testcases); ++i) { - const float available_width = - GetStringWidthF(UTF8ToUTF16(testcases[i].output), gfx::FontList()); - EXPECT_EQ(UTF8ToUTF16(testcases[i].output), - url_formatter::ElideHost(GURL(testcases[i].input), - gfx::FontList(), available_width)); - } - - // Trying to elide to a really short length will still keep the full TLD+1 - EXPECT_EQ( - base::ASCIIToUTF16("google.com"), - url_formatter::ElideHost(GURL("http://google.com"), gfx::FontList(), 2)); - EXPECT_EQ(base::UTF8ToUTF16(kEllipsisStr + ".google.com"), - url_formatter::ElideHost(GURL("http://subdomain.google.com"), - gfx::FontList(), 2)); - EXPECT_EQ( - base::ASCIIToUTF16("foo.bar"), - url_formatter::ElideHost(GURL("http://foo.bar"), gfx::FontList(), 2)); -} - -#endif // !defined(OS_ANDROID) - -TEST(TextEliderTest, FormatUrlForSecurityDisplay) { - struct OriginTestData { - const char* const description; - const char* const input; - const wchar_t* const output; - }; - - const OriginTestData tests[] = { - {"Empty URL", "", L""}, - {"HTTP URL", "http://www.google.com/", L"http://www.google.com"}, - {"HTTPS URL", "https://www.google.com/", L"https://www.google.com"}, - {"Standard HTTP port", "http://www.google.com:80/", - L"http://www.google.com"}, - {"Standard HTTPS port", "https://www.google.com:443/", - L"https://www.google.com"}, - {"Standard HTTP port, IDN Chinese", - "http://\xe4\xb8\xad\xe5\x9b\xbd.icom.museum:80", - L"http://xn--fiqs8s.icom.museum"}, - {"HTTP URL, IDN Hebrew (RTL)", - "http://" - "\xd7\x90\xd7\x99\xd7\xa7\xd7\x95\xd7\xb4\xd7\x9d." - "\xd7\x99\xd7\xa9\xd7\xa8\xd7\x90\xd7\x9c.museum/", - L"http://xn--4dbklr2c8d.xn--4dbrk0ce.museum"}, - {"HTTP URL with query string, IDN Arabic (RTL)", - "http://\xd9\x85\xd8\xb5\xd8\xb1.icom.museum/foo.html?yes=no", - L"http://xn--wgbh1c.icom.museum"}, - {"Non-standard HTTP port", "http://www.google.com:9000/", - L"http://www.google.com:9000"}, - {"Non-standard HTTPS port", "https://www.google.com:9000/", - L"https://www.google.com:9000"}, - {"File URI", "file:///usr/example/file.html", - L"file:///usr/example/file.html"}, - {"File URI with hostname", "file://localhost/usr/example/file.html", - L"file:///usr/example/file.html"}, - {"UNC File URI 1", "file:///CONTOSO/accounting/money.xls", - L"file:///CONTOSO/accounting/money.xls"}, - {"UNC File URI 2", - "file:///C:/Program%20Files/Music/Web%20Sys/main.html?REQUEST=RADIO", - L"file:///C:/Program%20Files/Music/Web%20Sys/main.html"}, - {"HTTP URL with path", "http://www.google.com/test.html", - L"http://www.google.com"}, - {"HTTPS URL with path", "https://www.google.com/test.html", - L"https://www.google.com"}, - {"Unusual secure scheme (wss)", "wss://www.google.com/", - L"wss://www.google.com"}, - {"Unusual non-secure scheme (gopher)", "gopher://www.google.com/", - L"gopher://www.google.com"}, - {"Unlisted scheme (chrome)", "chrome://version", L"chrome://version"}, - {"HTTP IP address", "http://173.194.65.103", L"http://173.194.65.103"}, - {"HTTPS IP address", "https://173.194.65.103", L"https://173.194.65.103"}, - {"HTTP IPv6 address", "http://[FE80:0000:0000:0000:0202:B3FF:FE1E:8329]/", - L"http://[fe80::202:b3ff:fe1e:8329]"}, - {"HTTPS IPv6 address with port", "https://[2001:db8:0:1]:443/", - L"https://[2001:db8:0:1]"}, - {"HTTPS IP address, non-default port", "https://173.194.65.103:8443", - L"https://173.194.65.103:8443"}, - {"HTTP filesystem: URL with path", - "filesystem:http://www.google.com/temporary/test.html", - L"filesystem:http://www.google.com"}, - {"File filesystem: URL with path", - "filesystem:file://localhost/temporary/stuff/test.html?z=fun&goat=billy", - L"filesystem:file:///temporary/stuff/test.html"}, - {"Invalid scheme 1", "twelve://www.cyber.org/wow.php", - L"twelve://www.cyber.org/wow.php"}, - {"Invalid scheme 2", "://www.cyber.org/wow.php", - L"://www.cyber.org/wow.php"}, - {"Invalid host 1", "https://www.cyber../wow.php", L"https://www.cyber.."}, - {"Invalid host 2", "https://www...cyber/wow.php", L"https://www...cyber"}, - {"Invalid port 1", "https://173.194.65.103:000", - L"https://173.194.65.103:0"}, - {"Invalid port 2", "https://173.194.65.103:gruffle", - L"https://173.194.65.103:gruffle"}, - {"Invalid port 3", "https://173.194.65.103:/hello.aspx", - L"https://173.194.65.103"}, - {"Trailing dot in DNS name", "https://www.example.com./get/goat", - L"https://www.example.com."}, - {"Blob URL", - "blob:http%3A//www.html5rocks.com/4d4ff040-6d61-4446-86d3-13ca07ec9ab9", - L"blob:http%3A//www.html5rocks.com/" - L"4d4ff040-6d61-4446-86d3-13ca07ec9ab9"}, - }; - - const char languages[] = "zh-TW,en-US,en,am,ar-EG,ar"; - for (size_t i = 0; i < arraysize(tests); ++i) { - base::string16 formatted = url_formatter::FormatUrlForSecurityDisplay( - GURL(tests[i].input), std::string()); - EXPECT_EQ(base::WideToUTF16(tests[i].output), formatted) - << tests[i].description; - base::string16 formatted_with_languages = - url_formatter::FormatUrlForSecurityDisplay(GURL(tests[i].input), - languages); - EXPECT_EQ(base::WideToUTF16(tests[i].output), formatted_with_languages) - << tests[i].description; - } - - base::string16 formatted = - url_formatter::FormatUrlForSecurityDisplay(GURL(), std::string()); - EXPECT_EQ(base::string16(), formatted) - << "Explicitly test the 0-argument GURL constructor"; -} - -} // namespace diff --git a/components/url_formatter/url_fixer.cc b/components/url_formatter/url_fixer.cc deleted file mode 100644 index c49a902..0000000 --- a/components/url_formatter/url_fixer.cc +++ /dev/null @@ -1,673 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "components/url_formatter/url_fixer.h" - -#include <algorithm> - -#include "base/files/file_path.h" -#include "base/files/file_util.h" -#include "base/logging.h" -#if defined(OS_POSIX) -#include "base/path_service.h" -#endif -#include "base/strings/string_util.h" -#include "base/strings/utf_string_conversions.h" -#include "components/url_formatter/url_formatter.h" -#include "net/base/escape.h" -#include "net/base/filename_util.h" -#include "net/base/registry_controlled_domains/registry_controlled_domain.h" -#include "url/third_party/mozilla/url_parse.h" -#include "url/url_file.h" -#include "url/url_util.h" - -namespace url_formatter { - -const char* home_directory_override = nullptr; - -namespace { - -// Hardcode these constants to avoid dependences on //chrome and //content. -const char kChromeUIScheme[] = "chrome"; -const char kChromeUIDefaultHost[] = "version"; -const char kViewSourceScheme[] = "view-source"; - -// TODO(estade): Remove these ugly, ugly functions. They are only used in -// SegmentURL. A url::Parsed object keeps track of a bunch of indices into -// a url string, and these need to be updated when the URL is converted from -// UTF8 to UTF16. Instead of this after-the-fact adjustment, we should parse it -// in the correct string format to begin with. -url::Component UTF8ComponentToUTF16Component( - const std::string& text_utf8, - const url::Component& component_utf8) { - if (component_utf8.len == -1) - return url::Component(); - - std::string before_component_string = - text_utf8.substr(0, component_utf8.begin); - std::string component_string = - text_utf8.substr(component_utf8.begin, component_utf8.len); - base::string16 before_component_string_16 = - base::UTF8ToUTF16(before_component_string); - base::string16 component_string_16 = base::UTF8ToUTF16(component_string); - url::Component component_16(before_component_string_16.length(), - component_string_16.length()); - return component_16; -} - -void UTF8PartsToUTF16Parts(const std::string& text_utf8, - const url::Parsed& parts_utf8, - url::Parsed* parts) { - if (base::IsStringASCII(text_utf8)) { - *parts = parts_utf8; - return; - } - - parts->scheme = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.scheme); - parts->username = - UTF8ComponentToUTF16Component(text_utf8, parts_utf8.username); - parts->password = - UTF8ComponentToUTF16Component(text_utf8, parts_utf8.password); - parts->host = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.host); - parts->port = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.port); - parts->path = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.path); - parts->query = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.query); - parts->ref = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.ref); -} - -base::TrimPositions TrimWhitespaceUTF8(const std::string& input, - base::TrimPositions positions, - std::string* output) { - // This implementation is not so fast since it converts the text encoding - // twice. Please feel free to file a bug if this function hurts the - // performance of Chrome. - DCHECK(base::IsStringUTF8(input)); - base::string16 input16 = base::UTF8ToUTF16(input); - base::string16 output16; - base::TrimPositions result = - base::TrimWhitespace(input16, positions, &output16); - *output = base::UTF16ToUTF8(output16); - return result; -} - -// does some basic fixes for input that we want to test for file-ness -void PrepareStringForFileOps(const base::FilePath& text, - base::FilePath::StringType* output) { -#if defined(OS_WIN) - base::TrimWhitespace(text.value(), base::TRIM_ALL, output); - replace(output->begin(), output->end(), '/', '\\'); -#else - TrimWhitespaceUTF8(text.value(), base::TRIM_ALL, output); -#endif -} - -// Tries to create a full path from |text|. If the result is valid and the -// file exists, returns true and sets |full_path| to the result. Otherwise, -// returns false and leaves |full_path| unchanged. -bool ValidPathForFile(const base::FilePath::StringType& text, - base::FilePath* full_path) { - base::FilePath file_path = base::MakeAbsoluteFilePath(base::FilePath(text)); - if (file_path.empty()) - return false; - - if (!base::PathExists(file_path)) - return false; - - *full_path = file_path; - return true; -} - -#if defined(OS_POSIX) -// Given a path that starts with ~, return a path that starts with an -// expanded-out /user/foobar directory. -std::string FixupHomedir(const std::string& text) { - DCHECK(text.length() > 0 && text[0] == '~'); - - if (text.length() == 1 || text[1] == '/') { - base::FilePath file_path; - if (home_directory_override) - file_path = base::FilePath(home_directory_override); - else - PathService::Get(base::DIR_HOME, &file_path); - - // We'll probably break elsewhere if $HOME is undefined, but check here - // just in case. - if (file_path.value().empty()) - return text; - // Append requires to be a relative path, so we have to cut all preceeding - // '/' characters. - size_t i = 1; - while (i < text.length() && text[i] == '/') - ++i; - return file_path.Append(text.substr(i)).value(); - } - -// Otherwise, this is a path like ~foobar/baz, where we must expand to -// user foobar's home directory. Officially, we should use getpwent(), -// but that is a nasty blocking call. - -#if defined(OS_MACOSX) - static const char kHome[] = "/Users/"; -#else - static const char kHome[] = "/home/"; -#endif - return kHome + text.substr(1); -} -#endif - -// Tries to create a file: URL from |text| if it looks like a filename, even if -// it doesn't resolve as a valid path or to an existing file. Returns a -// (possibly invalid) file: URL in |fixed_up_url| for input beginning -// with a drive specifier or "\\". Returns the unchanged input in other cases -// (including file: URLs: these don't look like filenames). -std::string FixupPath(const std::string& text) { - DCHECK(!text.empty()); - - base::FilePath::StringType filename; -#if defined(OS_WIN) - base::FilePath input_path(base::UTF8ToWide(text)); - PrepareStringForFileOps(input_path, &filename); - - // Fixup Windows-style drive letters, where "C:" gets rewritten to "C|". - if (filename.length() > 1 && filename[1] == '|') - filename[1] = ':'; -#elif defined(OS_POSIX) - base::FilePath input_path(text); - PrepareStringForFileOps(input_path, &filename); - if (filename.length() > 0 && filename[0] == '~') - filename = FixupHomedir(filename); -#endif - - // Here, we know the input looks like a file. - GURL file_url = net::FilePathToFileURL(base::FilePath(filename)); - if (file_url.is_valid()) { - return base::UTF16ToUTF8(url_formatter::FormatUrl( - file_url, std::string(), url_formatter::kFormatUrlOmitUsernamePassword, - net::UnescapeRule::NORMAL, nullptr, nullptr, nullptr)); - } - - // Invalid file URL, just return the input. - return text; -} - -// Checks |domain| to see if a valid TLD is already present. If not, appends -// |desired_tld| to the domain, and prepends "www." unless it's already present. -void AddDesiredTLD(const std::string& desired_tld, std::string* domain) { - if (desired_tld.empty() || domain->empty()) - return; - - // Check the TLD. If the return value is positive, we already have a TLD, so - // abort. If the return value is std::string::npos, there's no valid host, - // but we can try to append a TLD anyway, since the host may become valid once - // the TLD is attached -- for example, "999999999999" is detected as a broken - // IP address and marked invalid, but attaching ".com" makes it legal. When - // the return value is 0, there's a valid host with no known TLD, so we can - // definitely append the user's TLD. We disallow unknown registries here so - // users can input "mail.yahoo" and hit ctrl-enter to get - // "www.mail.yahoo.com". - const size_t registry_length = - net::registry_controlled_domains::GetRegistryLength( - *domain, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, - net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); - if ((registry_length != 0) && (registry_length != std::string::npos)) - return; - - // Add the suffix at the end of the domain. - const size_t domain_length(domain->length()); - DCHECK_GT(domain_length, 0U); - DCHECK_NE(desired_tld[0], '.'); - if ((*domain)[domain_length - 1] != '.') - domain->push_back('.'); - domain->append(desired_tld); - - // Now, if the domain begins with "www.", stop. - const std::string prefix("www."); - if (domain->compare(0, prefix.length(), prefix) != 0) { - // Otherwise, add www. to the beginning of the URL. - domain->insert(0, prefix); - } -} - -inline void FixupUsername(const std::string& text, - const url::Component& part, - std::string* url) { - if (!part.is_valid()) - return; - - // We don't fix up the username at the moment. - url->append(text, part.begin, part.len); - // Do not append the trailing '@' because we might need to include the user's - // password. FixupURL itself will append the '@' for us. -} - -inline void FixupPassword(const std::string& text, - const url::Component& part, - std::string* url) { - if (!part.is_valid()) - return; - - // We don't fix up the password at the moment. - url->append(":"); - url->append(text, part.begin, part.len); -} - -void FixupHost(const std::string& text, - const url::Component& part, - bool has_scheme, - const std::string& desired_tld, - std::string* url) { - if (!part.is_valid()) - return; - - // Make domain valid. - // Strip all leading dots and all but one trailing dot, unless the user only - // typed dots, in which case their input is totally invalid and we should just - // leave it unchanged. - std::string domain(text, part.begin, part.len); - const size_t first_nondot(domain.find_first_not_of('.')); - if (first_nondot != std::string::npos) { - domain.erase(0, first_nondot); - size_t last_nondot(domain.find_last_not_of('.')); - DCHECK(last_nondot != std::string::npos); - last_nondot += 2; // Point at second period in ending string - if (last_nondot < domain.length()) - domain.erase(last_nondot); - } - - // Add any user-specified TLD, if applicable. - AddDesiredTLD(desired_tld, &domain); - - url->append(domain); -} - -void FixupPort(const std::string& text, - const url::Component& part, - std::string* url) { - if (!part.is_valid()) - return; - - // We don't fix up the port at the moment. - url->append(":"); - url->append(text, part.begin, part.len); -} - -inline void FixupPath(const std::string& text, - const url::Component& part, - std::string* url) { - if (!part.is_valid() || part.len == 0) { - // We should always have a path. - url->append("/"); - return; - } - - // Append the path as is. - url->append(text, part.begin, part.len); -} - -inline void FixupQuery(const std::string& text, - const url::Component& part, - std::string* url) { - if (!part.is_valid()) - return; - - // We don't fix up the query at the moment. - url->append("?"); - url->append(text, part.begin, part.len); -} - -inline void FixupRef(const std::string& text, - const url::Component& part, - std::string* url) { - if (!part.is_valid()) - return; - - // We don't fix up the ref at the moment. - url->append("#"); - url->append(text, part.begin, part.len); -} - -bool HasPort(const std::string& original_text, - const url::Component& scheme_component) { - // Find the range between the ":" and the "/". - size_t port_start = scheme_component.end() + 1; - size_t port_end = port_start; - while ((port_end < original_text.length()) && - !url::IsAuthorityTerminator(original_text[port_end])) - ++port_end; - if (port_end == port_start) - return false; - - // Scan the range to see if it is entirely digits. - for (size_t i = port_start; i < port_end; ++i) { - if (!base::IsAsciiDigit(original_text[i])) - return false; - } - - return true; -} - -// Try to extract a valid scheme from the beginning of |text|. -// If successful, set |scheme_component| to the text range where the scheme -// was located, and fill |canon_scheme| with its canonicalized form. -// Otherwise, return false and leave the outputs in an indeterminate state. -bool GetValidScheme(const std::string& text, - url::Component* scheme_component, - std::string* canon_scheme) { - canon_scheme->clear(); - - // Locate everything up to (but not including) the first ':' - if (!url::ExtractScheme(text.data(), static_cast<int>(text.length()), - scheme_component)) { - return false; - } - - // Make sure the scheme contains only valid characters, and convert - // to lowercase. This also catches IPv6 literals like [::1], because - // brackets are not in the whitelist. - url::StdStringCanonOutput canon_scheme_output(canon_scheme); - url::Component canon_scheme_component; - if (!url::CanonicalizeScheme(text.data(), *scheme_component, - &canon_scheme_output, &canon_scheme_component)) { - return false; - } - - // Strip the ':', and any trailing buffer space. - DCHECK_EQ(0, canon_scheme_component.begin); - canon_scheme->erase(canon_scheme_component.len); - - // We need to fix up the segmentation for "www.example.com:/". For this - // case, we guess that schemes with a "." are not actually schemes. - if (canon_scheme->find('.') != std::string::npos) - return false; - - // We need to fix up the segmentation for "www:123/". For this case, we - // will add an HTTP scheme later and make the URL parser happy. - // TODO(pkasting): Maybe we should try to use GURL's parser for this? - if (HasPort(text, *scheme_component)) - return false; - - // Everything checks out. - return true; -} - -// Performs the work for url_formatter::SegmentURL. |text| may be modified on -// output on success: a semicolon following a valid scheme is replaced with a -// colon. -std::string SegmentURLInternal(std::string* text, url::Parsed* parts) { - // Initialize the result. - *parts = url::Parsed(); - - std::string trimmed; - TrimWhitespaceUTF8(*text, base::TRIM_ALL, &trimmed); - if (trimmed.empty()) - return std::string(); // Nothing to segment. - -#if defined(OS_WIN) - int trimmed_length = static_cast<int>(trimmed.length()); - if (url::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) || - url::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, true)) - return "file"; -#elif defined(OS_POSIX) - if (base::FilePath::IsSeparator(trimmed.data()[0]) || - trimmed.data()[0] == '~') - return "file"; -#endif - - // Otherwise, we need to look at things carefully. - std::string scheme; - if (!GetValidScheme(*text, &parts->scheme, &scheme)) { - // Try again if there is a ';' in the text. If changing it to a ':' results - // in a scheme being found, continue processing with the modified text. - bool found_scheme = false; - size_t semicolon = text->find(';'); - if (semicolon != 0 && semicolon != std::string::npos) { - (*text)[semicolon] = ':'; - if (GetValidScheme(*text, &parts->scheme, &scheme)) - found_scheme = true; - else - (*text)[semicolon] = ';'; - } - if (!found_scheme) { - // Couldn't determine the scheme, so just pick one. - parts->scheme.reset(); - scheme = - base::StartsWith(*text, "ftp.", base::CompareCase::INSENSITIVE_ASCII) - ? url::kFtpScheme - : url::kHttpScheme; - } - } - - // Proceed with about and chrome schemes, but not file or nonstandard schemes. - if ((scheme != url::kAboutScheme) && (scheme != kChromeUIScheme) && - ((scheme == url::kFileScheme) || - !url::IsStandard( - scheme.c_str(), - url::Component(0, static_cast<int>(scheme.length()))))) { - return scheme; - } - - if (scheme == url::kFileSystemScheme) { - // Have the GURL parser do the heavy lifting for us. - url::ParseFileSystemURL(text->data(), static_cast<int>(text->length()), - parts); - return scheme; - } - - if (parts->scheme.is_valid()) { - // Have the GURL parser do the heavy lifting for us. - url::ParseStandardURL(text->data(), static_cast<int>(text->length()), - parts); - return scheme; - } - - // We need to add a scheme in order for ParseStandardURL to be happy. - // Find the first non-whitespace character. - std::string::iterator first_nonwhite = text->begin(); - while ((first_nonwhite != text->end()) && - base::IsUnicodeWhitespace(*first_nonwhite)) - ++first_nonwhite; - - // Construct the text to parse by inserting the scheme. - std::string inserted_text(scheme); - inserted_text.append(url::kStandardSchemeSeparator); - std::string text_to_parse(text->begin(), first_nonwhite); - text_to_parse.append(inserted_text); - text_to_parse.append(first_nonwhite, text->end()); - - // Have the GURL parser do the heavy lifting for us. - url::ParseStandardURL(text_to_parse.data(), - static_cast<int>(text_to_parse.length()), parts); - - // Offset the results of the parse to match the original text. - const int offset = -static_cast<int>(inserted_text.length()); - OffsetComponent(offset, &parts->scheme); - OffsetComponent(offset, &parts->username); - OffsetComponent(offset, &parts->password); - OffsetComponent(offset, &parts->host); - OffsetComponent(offset, &parts->port); - OffsetComponent(offset, &parts->path); - OffsetComponent(offset, &parts->query); - OffsetComponent(offset, &parts->ref); - - return scheme; -} - -} // namespace - -std::string SegmentURL(const std::string& text, url::Parsed* parts) { - std::string mutable_text(text); - return SegmentURLInternal(&mutable_text, parts); -} - -base::string16 SegmentURL(const base::string16& text, url::Parsed* parts) { - std::string text_utf8 = base::UTF16ToUTF8(text); - url::Parsed parts_utf8; - std::string scheme_utf8 = SegmentURL(text_utf8, &parts_utf8); - UTF8PartsToUTF16Parts(text_utf8, parts_utf8, parts); - return base::UTF8ToUTF16(scheme_utf8); -} - -GURL FixupURL(const std::string& text, const std::string& desired_tld) { - std::string trimmed; - TrimWhitespaceUTF8(text, base::TRIM_ALL, &trimmed); - if (trimmed.empty()) - return GURL(); // Nothing here. - - // Segment the URL. - url::Parsed parts; - std::string scheme(SegmentURLInternal(&trimmed, &parts)); - - // For view-source: URLs, we strip "view-source:", do fixup, and stick it back - // on. This allows us to handle things like "view-source:google.com". - if (scheme == kViewSourceScheme) { - // Reject "view-source:view-source:..." to avoid deep recursion. - std::string view_source(kViewSourceScheme + std::string(":")); - if (!base::StartsWith(text, view_source + view_source, - base::CompareCase::INSENSITIVE_ASCII)) { - return GURL(kViewSourceScheme + std::string(":") + - FixupURL(trimmed.substr(scheme.length() + 1), desired_tld) - .possibly_invalid_spec()); - } - } - - // We handle the file scheme separately. - if (scheme == url::kFileScheme) - return GURL(parts.scheme.is_valid() ? text : FixupPath(text)); - - // We handle the filesystem scheme separately. - if (scheme == url::kFileSystemScheme) { - if (parts.inner_parsed() && parts.inner_parsed()->scheme.is_valid()) - return GURL(text); - return GURL(); - } - - // Parse and rebuild about: and chrome: URLs, except about:blank. - bool chrome_url = - !base::LowerCaseEqualsASCII(trimmed, url::kAboutBlankURL) && - ((scheme == url::kAboutScheme) || (scheme == kChromeUIScheme)); - - // For some schemes whose layouts we understand, we rebuild it. - if (chrome_url || - url::IsStandard(scheme.c_str(), - url::Component(0, static_cast<int>(scheme.length())))) { - // Replace the about: scheme with the chrome: scheme. - std::string url(chrome_url ? kChromeUIScheme : scheme); - url.append(url::kStandardSchemeSeparator); - - // We need to check whether the |username| is valid because it is our - // responsibility to append the '@' to delineate the user information from - // the host portion of the URL. - if (parts.username.is_valid()) { - FixupUsername(trimmed, parts.username, &url); - FixupPassword(trimmed, parts.password, &url); - url.append("@"); - } - - FixupHost(trimmed, parts.host, parts.scheme.is_valid(), desired_tld, &url); - if (chrome_url && !parts.host.is_valid()) - url.append(kChromeUIDefaultHost); - FixupPort(trimmed, parts.port, &url); - FixupPath(trimmed, parts.path, &url); - FixupQuery(trimmed, parts.query, &url); - FixupRef(trimmed, parts.ref, &url); - - return GURL(url); - } - - // In the worst-case, we insert a scheme if the URL lacks one. - if (!parts.scheme.is_valid()) { - std::string fixed_scheme(scheme); - fixed_scheme.append(url::kStandardSchemeSeparator); - trimmed.insert(0, fixed_scheme); - } - - return GURL(trimmed); -} - -// The rules are different here than for regular fixup, since we need to handle -// input like "hello.html" and know to look in the current directory. Regular -// fixup will look for cues that it is actually a file path before trying to -// figure out what file it is. If our logic doesn't work, we will fall back on -// regular fixup. -GURL FixupRelativeFile(const base::FilePath& base_dir, - const base::FilePath& text) { - base::FilePath old_cur_directory; - if (!base_dir.empty()) { - // Save the old current directory before we move to the new one. - base::GetCurrentDirectory(&old_cur_directory); - base::SetCurrentDirectory(base_dir); - } - - // Allow funny input with extra whitespace and the wrong kind of slashes. - base::FilePath::StringType trimmed; - PrepareStringForFileOps(text, &trimmed); - - bool is_file = true; - // Avoid recognizing definite non-file URLs as file paths. - GURL gurl(trimmed); - if (gurl.is_valid() && gurl.IsStandard()) - is_file = false; - base::FilePath full_path; - if (is_file && !ValidPathForFile(trimmed, &full_path)) { -// Not a path as entered, try unescaping it in case the user has -// escaped things. We need to go through 8-bit since the escaped values -// only represent 8-bit values. -#if defined(OS_WIN) - std::wstring unescaped = base::UTF8ToWide(net::UnescapeURLComponent( - base::WideToUTF8(trimmed), - net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS)); -#elif defined(OS_POSIX) - std::string unescaped = net::UnescapeURLComponent( - trimmed, - net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); -#endif - - if (!ValidPathForFile(unescaped, &full_path)) - is_file = false; - } - - // Put back the current directory if we saved it. - if (!base_dir.empty()) - base::SetCurrentDirectory(old_cur_directory); - - if (is_file) { - GURL file_url = net::FilePathToFileURL(full_path); - if (file_url.is_valid()) - return GURL(base::UTF16ToUTF8(url_formatter::FormatUrl( - file_url, std::string(), - url_formatter::kFormatUrlOmitUsernamePassword, - net::UnescapeRule::NORMAL, nullptr, nullptr, nullptr))); - // Invalid files fall through to regular processing. - } - -// Fall back on regular fixup for this input. -#if defined(OS_WIN) - std::string text_utf8 = base::WideToUTF8(text.value()); -#elif defined(OS_POSIX) - std::string text_utf8 = text.value(); -#endif - return FixupURL(text_utf8, std::string()); -} - -void OffsetComponent(int offset, url::Component* part) { - DCHECK(part); - - if (part->is_valid()) { - // Offset the location of this component. - part->begin += offset; - - // This part might not have existed in the original text. - if (part->begin < 0) - part->reset(); - } -} - -bool IsEquivalentScheme(const std::string& scheme1, - const std::string& scheme2) { - return scheme1 == scheme2 || - (scheme1 == url::kAboutScheme && scheme2 == kChromeUIScheme) || - (scheme1 == kChromeUIScheme && scheme2 == url::kAboutScheme); -} - -} // namespace url_formatter diff --git a/components/url_formatter/url_fixer.h b/components/url_formatter/url_fixer.h deleted file mode 100644 index b7c592d..0000000 --- a/components/url_formatter/url_fixer.h +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef COMPONENTS_URL_FORMATTER_URL_FIXER_H_ -#define COMPONENTS_URL_FORMATTER_URL_FIXER_H_ - -#include <string> - -#include "base/strings/string16.h" -#include "url/gurl.h" - -namespace base { -class FilePath; -} - -namespace url { -struct Component; -struct Parsed; -} - -// This object is designed to convert various types of input into URLs that we -// know are valid. For example, user typing in the URL bar or command line -// options. This is NOT the place for converting between different types of URLs -// or parsing them, see net_util.h for that. -namespace url_formatter { - -// Segments the given text string into parts of a URL. This is most useful for -// schemes such as http, https, and ftp where |SegmentURL| will find many -// segments. Currently does not segment "file" schemes. -// Returns the canonicalized scheme, or the empty string when |text| is only -// whitespace. -std::string SegmentURL(const std::string& text, url::Parsed* parts); -base::string16 SegmentURL(const base::string16& text, url::Parsed* parts); - -// Converts |text| to a fixed-up URL and returns it. Attempts to make some -// "smart" adjustments to obviously-invalid input where possible. -// |text| may be an absolute path to a file, which will get converted to a -// "file:" URL. -// -// The result will be a "more" valid URL than the input. It may still not be -// valid, so check the return value's validity or use possibly_invalid_spec(). -// -// Schemes "about" and "chrome" are normalized to "chrome://", with slashes. -// "about:blank" is unaltered, as Webkit allows frames to access about:blank. -// Additionally, if a chrome URL does not have a valid host, as in "about:", the -// returned URL will have the host "version", as in "chrome://version". -// -// If |desired_tld| is non-empty, it represents the TLD the user wishes to -// append in the case of an incomplete domain. We check that this is not a file -// path and there does not appear to be a valid TLD already, then append -// |desired_tld| to the domain and prepend "www." (unless it, or a scheme, are -// already present.) This TLD should not have a leading '.' (use "com" instead -// of ".com"). -GURL FixupURL(const std::string& text, const std::string& desired_tld); - -// Converts |text| to a fixed-up URL, allowing it to be a relative path on the -// local filesystem. Begin searching in |base_dir|; if empty, use the current -// working directory. If this resolves to a file on disk, convert it to a -// "file:" URL in |fixed_up_url|; otherwise, fall back to the behavior of -// FixupURL(). -// -// For "regular" input, even if it is possibly a file with a full path, you -// should use FixupURL() directly. This function should only be used when -// relative path handling is desired, as for command line processing. -GURL FixupRelativeFile(const base::FilePath& base_dir, - const base::FilePath& text); - -// Offsets the beginning index of |part| by |offset|, which is allowed to be -// negative. In some cases, the desired component does not exist at the given -// offset. For example, when converting from "http://foo" to "foo", the scheme -// component no longer exists. In such a case, the beginning index is set to 0. -// Does nothing if |part| is invalid. -void OffsetComponent(int offset, url::Component* part); - -// Returns true if |scheme1| is equivalent to |scheme2|. -// Generally this is true if the two schemes are actually identical, but it's -// also true when one scheme is "about" and the other "chrome". -bool IsEquivalentScheme(const std::string& scheme1, const std::string& scheme2); - -// For paths like ~, we use $HOME for the current user's home directory. -// For tests, we allow our idea of $HOME to be overriden by this variable. -extern const char* home_directory_override; - -} // namespace url_formatter - -#endif // COMPONENTS_URL_FORMATTER_URL_FIXER_H_ diff --git a/components/url_formatter/url_fixer_unittest.cc b/components/url_formatter/url_fixer_unittest.cc deleted file mode 100644 index 900b553..0000000 --- a/components/url_formatter/url_fixer_unittest.cc +++ /dev/null @@ -1,537 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <stdlib.h> - -#include "base/base_paths.h" -#include "base/basictypes.h" -#include "base/files/file_path.h" -#include "base/files/file_util.h" -#include "base/files/scoped_temp_dir.h" -#include "base/strings/string_util.h" -#include "base/strings/utf_string_conversions.h" -#include "components/url_formatter/url_fixer.h" -#include "net/base/filename_util.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "url/gurl.h" -#include "url/third_party/mozilla/url_parse.h" - -namespace url { - -std::ostream& operator<<(std::ostream& os, const Component& part) { - return os << "(begin=" << part.begin << ", len=" << part.len << ")"; -} - -} // namespace url - -struct SegmentCase { - const std::string input; - const std::string result; - const url::Component scheme; - const url::Component username; - const url::Component password; - const url::Component host; - const url::Component port; - const url::Component path; - const url::Component query; - const url::Component ref; -}; - -static const SegmentCase segment_cases[] = { - { "http://www.google.com/", "http", - url::Component(0, 4), // scheme - url::Component(), // username - url::Component(), // password - url::Component(7, 14), // host - url::Component(), // port - url::Component(21, 1), // path - url::Component(), // query - url::Component(), // ref - }, - { "aBoUt:vErSiOn", "about", - url::Component(0, 5), // scheme - url::Component(), // username - url::Component(), // password - url::Component(6, 7), // host - url::Component(), // port - url::Component(), // path - url::Component(), // query - url::Component(), // ref - }, - { "about:host/path?query#ref", "about", - url::Component(0, 5), // scheme - url::Component(), // username - url::Component(), // password - url::Component(6, 4), // host - url::Component(), // port - url::Component(10, 5), // path - url::Component(16, 5), // query - url::Component(22, 3), // ref - }, - { "about://host/path?query#ref", "about", - url::Component(0, 5), // scheme - url::Component(), // username - url::Component(), // password - url::Component(8, 4), // host - url::Component(), // port - url::Component(12, 5), // path - url::Component(18, 5), // query - url::Component(24, 3), // ref - }, - { "chrome:host/path?query#ref", "chrome", - url::Component(0, 6), // scheme - url::Component(), // username - url::Component(), // password - url::Component(7, 4), // host - url::Component(), // port - url::Component(11, 5), // path - url::Component(17, 5), // query - url::Component(23, 3), // ref - }, - { "chrome://host/path?query#ref", "chrome", - url::Component(0, 6), // scheme - url::Component(), // username - url::Component(), // password - url::Component(9, 4), // host - url::Component(), // port - url::Component(13, 5), // path - url::Component(19, 5), // query - url::Component(25, 3), // ref - }, - { " www.google.com:124?foo#", "http", - url::Component(), // scheme - url::Component(), // username - url::Component(), // password - url::Component(4, 14), // host - url::Component(19, 3), // port - url::Component(), // path - url::Component(23, 3), // query - url::Component(27, 0), // ref - }, - { "user@www.google.com", "http", - url::Component(), // scheme - url::Component(0, 4), // username - url::Component(), // password - url::Component(5, 14), // host - url::Component(), // port - url::Component(), // path - url::Component(), // query - url::Component(), // ref - }, - { "ftp:/user:P:a$$Wd@..ftp.google.com...::23///pub?foo#bar", "ftp", - url::Component(0, 3), // scheme - url::Component(5, 4), // username - url::Component(10, 7), // password - url::Component(18, 20), // host - url::Component(39, 2), // port - url::Component(41, 6), // path - url::Component(48, 3), // query - url::Component(52, 3), // ref - }, - { "[2001:db8::1]/path", "http", - url::Component(), // scheme - url::Component(), // username - url::Component(), // password - url::Component(0, 13), // host - url::Component(), // port - url::Component(13, 5), // path - url::Component(), // query - url::Component(), // ref - }, - { "[::1]", "http", - url::Component(), // scheme - url::Component(), // username - url::Component(), // password - url::Component(0, 5), // host - url::Component(), // port - url::Component(), // path - url::Component(), // query - url::Component(), // ref - }, - // Incomplete IPv6 addresses (will not canonicalize). - { "[2001:4860:", "http", - url::Component(), // scheme - url::Component(), // username - url::Component(), // password - url::Component(0, 11), // host - url::Component(), // port - url::Component(), // path - url::Component(), // query - url::Component(), // ref - }, - { "[2001:4860:/foo", "http", - url::Component(), // scheme - url::Component(), // username - url::Component(), // password - url::Component(0, 11), // host - url::Component(), // port - url::Component(11, 4), // path - url::Component(), // query - url::Component(), // ref - }, - { "http://:b005::68]", "http", - url::Component(0, 4), // scheme - url::Component(), // username - url::Component(), // password - url::Component(7, 10), // host - url::Component(), // port - url::Component(), // path - url::Component(), // query - url::Component(), // ref - }, - // Can't do anything useful with this. - { ":b005::68]", "", - url::Component(0, 0), // scheme - url::Component(), // username - url::Component(), // password - url::Component(), // host - url::Component(), // port - url::Component(), // path - url::Component(), // query - url::Component(), // ref - }, -}; - -typedef testing::Test URLFixerTest; - -TEST(URLFixerTest, SegmentURL) { - std::string result; - url::Parsed parts; - - for (size_t i = 0; i < arraysize(segment_cases); ++i) { - SegmentCase value = segment_cases[i]; - result = url_formatter::SegmentURL(value.input, &parts); - EXPECT_EQ(value.result, result); - EXPECT_EQ(value.scheme, parts.scheme); - EXPECT_EQ(value.username, parts.username); - EXPECT_EQ(value.password, parts.password); - EXPECT_EQ(value.host, parts.host); - EXPECT_EQ(value.port, parts.port); - EXPECT_EQ(value.path, parts.path); - EXPECT_EQ(value.query, parts.query); - EXPECT_EQ(value.ref, parts.ref); - } -} - -// Creates a file and returns its full name as well as the decomposed -// version. Example: -// full_path = "c:\foo\bar.txt" -// dir = "c:\foo" -// file_name = "bar.txt" -static bool MakeTempFile(const base::FilePath& dir, - const base::FilePath& file_name, - base::FilePath* full_path) { - *full_path = dir.Append(file_name); - return base::WriteFile(*full_path, "", 0) == 0; -} - -// Returns true if the given URL is a file: URL that matches the given file -static bool IsMatchingFileURL(const std::string& url, - const base::FilePath& full_file_path) { - if (url.length() <= 8) - return false; - if (std::string("file:///") != url.substr(0, 8)) - return false; // no file:/// prefix - if (url.find('\\') != std::string::npos) - return false; // contains backslashes - - base::FilePath derived_path; - net::FileURLToFilePath(GURL(url), &derived_path); - - return base::FilePath::CompareEqualIgnoreCase(derived_path.value(), - full_file_path.value()); -} - -struct FixupCase { - const std::string input; - const std::string output; -} fixup_cases[] = { - {"www.google.com", "http://www.google.com/"}, - {" www.google.com ", "http://www.google.com/"}, - {" foo.com/asdf bar", "http://foo.com/asdf%20%20bar"}, - {"..www.google.com..", "http://www.google.com./"}, - {"http://......", "http://....../"}, - {"http://host.com:ninety-two/", "http://host.com:ninety-two/"}, - {"http://host.com:ninety-two?foo", "http://host.com:ninety-two/?foo"}, - {"google.com:123", "http://google.com:123/"}, - {"about:", "chrome://version/"}, - {"about:foo", "chrome://foo/"}, - {"about:version", "chrome://version/"}, - {"about:blank", "about:blank"}, - {"about:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"}, - {"about://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"}, - {"chrome:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"}, - {"chrome://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"}, - {"www:123", "http://www:123/"}, - {" www:123", "http://www:123/"}, - {"www.google.com?foo", "http://www.google.com/?foo"}, - {"www.google.com#foo", "http://www.google.com/#foo"}, - {"www.google.com?", "http://www.google.com/?"}, - {"www.google.com#", "http://www.google.com/#"}, - {"www.google.com:123?foo#bar", "http://www.google.com:123/?foo#bar"}, - {"user@www.google.com", "http://user@www.google.com/"}, - {"\xE6\xB0\xB4.com", "http://xn--1rw.com/"}, - // It would be better if this next case got treated as http, but I don't see - // a clean way to guess this isn't the new-and-exciting "user" scheme. - {"user:passwd@www.google.com:8080/", "user:passwd@www.google.com:8080/"}, - // {"file:///c:/foo/bar%20baz.txt", "file:///C:/foo/bar%20baz.txt"}, - {"ftp.google.com", "ftp://ftp.google.com/"}, - {" ftp.google.com", "ftp://ftp.google.com/"}, - {"FTP.GooGle.com", "ftp://ftp.google.com/"}, - {"ftpblah.google.com", "http://ftpblah.google.com/"}, - {"ftp", "http://ftp/"}, - {"google.ftp.com", "http://google.ftp.com/"}, - // URLs which end with 0x85 (NEL in ISO-8859). - {"http://foo.com/s?q=\xd0\x85", "http://foo.com/s?q=%D0%85"}, - {"http://foo.com/s?q=\xec\x97\x85", "http://foo.com/s?q=%EC%97%85"}, - {"http://foo.com/s?q=\xf0\x90\x80\x85", "http://foo.com/s?q=%F0%90%80%85"}, - // URLs which end with 0xA0 (non-break space in ISO-8859). - {"http://foo.com/s?q=\xd0\xa0", "http://foo.com/s?q=%D0%A0"}, - {"http://foo.com/s?q=\xec\x97\xa0", "http://foo.com/s?q=%EC%97%A0"}, - {"http://foo.com/s?q=\xf0\x90\x80\xa0", "http://foo.com/s?q=%F0%90%80%A0"}, - // URLs containing IPv6 literals. - {"[2001:db8::2]", "http://[2001:db8::2]/"}, - {"[::]:80", "http://[::]/"}, - {"[::]:80/path", "http://[::]/path"}, - {"[::]:180/path", "http://[::]:180/path"}, - // TODO(pmarks): Maybe we should parse bare IPv6 literals someday. - {"::1", "::1"}, - // Semicolon as scheme separator for standard schemes. - {"http;//www.google.com/", "http://www.google.com/"}, - {"about;chrome", "chrome://chrome/"}, - // Semicolon left as-is for non-standard schemes. - {"whatsup;//fool", "whatsup://fool"}, - // Semicolon left as-is in URL itself. - {"http://host/port?query;moar", "http://host/port?query;moar"}, - // Fewer slashes than expected. - {"http;www.google.com/", "http://www.google.com/"}, - {"http;/www.google.com/", "http://www.google.com/"}, - // Semicolon at start. - {";http://www.google.com/", "http://%3Bhttp//www.google.com/"}, -}; - -TEST(URLFixerTest, FixupURL) { - for (size_t i = 0; i < arraysize(fixup_cases); ++i) { - FixupCase value = fixup_cases[i]; - EXPECT_EQ(value.output, - url_formatter::FixupURL(value.input, "").possibly_invalid_spec()) - << "input: " << value.input; - } - - // Check the TLD-appending functionality. - FixupCase tld_cases[] = { - {"somedomainthatwillnotbeagtld", - "http://www.somedomainthatwillnotbeagtld.com/"}, - {"somedomainthatwillnotbeagtld.", - "http://www.somedomainthatwillnotbeagtld.com/"}, - {"somedomainthatwillnotbeagtld..", - "http://www.somedomainthatwillnotbeagtld.com/"}, - {".somedomainthatwillnotbeagtld", - "http://www.somedomainthatwillnotbeagtld.com/"}, - {"www.somedomainthatwillnotbeagtld", - "http://www.somedomainthatwillnotbeagtld.com/"}, - {"somedomainthatwillnotbeagtld.com", - "http://somedomainthatwillnotbeagtld.com/"}, - {"http://somedomainthatwillnotbeagtld", - "http://www.somedomainthatwillnotbeagtld.com/"}, - {"..somedomainthatwillnotbeagtld..", - "http://www.somedomainthatwillnotbeagtld.com/"}, - {"http://www.somedomainthatwillnotbeagtld", - "http://www.somedomainthatwillnotbeagtld.com/"}, - {"9999999999999999", "http://www.9999999999999999.com/"}, - {"somedomainthatwillnotbeagtld/foo", - "http://www.somedomainthatwillnotbeagtld.com/foo"}, - {"somedomainthatwillnotbeagtld.com/foo", - "http://somedomainthatwillnotbeagtld.com/foo"}, - {"somedomainthatwillnotbeagtld/?foo=.com", - "http://www.somedomainthatwillnotbeagtld.com/?foo=.com"}, - {"www.somedomainthatwillnotbeagtld/?foo=www.", - "http://www.somedomainthatwillnotbeagtld.com/?foo=www."}, - {"somedomainthatwillnotbeagtld.com/?foo=.com", - "http://somedomainthatwillnotbeagtld.com/?foo=.com"}, - {"http://www.somedomainthatwillnotbeagtld.com", - "http://www.somedomainthatwillnotbeagtld.com/"}, - {"somedomainthatwillnotbeagtld:123", - "http://www.somedomainthatwillnotbeagtld.com:123/"}, - {"http://somedomainthatwillnotbeagtld:123", - "http://www.somedomainthatwillnotbeagtld.com:123/"}, - }; - for (size_t i = 0; i < arraysize(tld_cases); ++i) { - FixupCase value = tld_cases[i]; - EXPECT_EQ(value.output, url_formatter::FixupURL(value.input, "com") - .possibly_invalid_spec()); - } -} - -// Test different types of file inputs to URIFixerUpper::FixupURL. This -// doesn't go into the nice array of fixups above since the file input -// has to exist. -TEST(URLFixerTest, FixupFile) { - // this "original" filename is the one we tweak to get all the variations - base::ScopedTempDir temp_dir_; - ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); - base::FilePath original; - ASSERT_TRUE(MakeTempFile( - temp_dir_.path(), - base::FilePath(FILE_PATH_LITERAL("url fixer upper existing file.txt")), - &original)); - - // reference path - GURL golden(net::FilePathToFileURL(original)); - - // c:\foo\bar.txt -> file:///c:/foo/bar.txt (basic) - GURL fixedup(url_formatter::FixupURL(original.AsUTF8Unsafe(), std::string())); - EXPECT_EQ(golden, fixedup); - - // TODO(port): Make some equivalent tests for posix. -#if defined(OS_WIN) - // c|/foo\bar.txt -> file:///c:/foo/bar.txt (pipe allowed instead of colon) - std::string cur(base::WideToUTF8(original.value())); - EXPECT_EQ(':', cur[1]); - cur[1] = '|'; - EXPECT_EQ(golden, url_formatter::FixupURL(cur, std::string())); - - FixupCase cases[] = { - {"c:\\Non-existent%20file.txt", "file:///C:/Non-existent%2520file.txt"}, - - // \\foo\bar.txt -> file://foo/bar.txt - // UNC paths, this file won't exist, but since there are no escapes, it - // should be returned just converted to a file: URL. - {"\\\\NonexistentHost\\foo\\bar.txt", "file://nonexistenthost/foo/bar.txt"}, - // We do this strictly, like IE8, which only accepts this form using - // backslashes and not forward ones. Turning "//foo" into "http" matches - // Firefox and IE, silly though it may seem (it falls out of adding "http" - // as the default protocol if you haven't entered one). - {"//NonexistentHost\\foo/bar.txt", "http://nonexistenthost/foo/bar.txt"}, - {"file:///C:/foo/bar", "file:///C:/foo/bar"}, - - // Much of the work here comes from GURL's canonicalization stage. - {"file://C:/foo/bar", "file:///C:/foo/bar"}, - {"file:c:", "file:///C:/"}, - {"file:c:WINDOWS", "file:///C:/WINDOWS"}, - {"file:c|Program Files", "file:///C:/Program%20Files"}, - {"file:/file", "file://file/"}, - {"file:////////c:\\foo", "file:///C:/foo"}, - {"file://server/folder/file", "file://server/folder/file"}, - - // These are fixups we don't do, but could consider: - // {"file:///foo:/bar", "file://foo/bar"}, - // {"file:/\\/server\\folder/file", "file://server/folder/file"}, - }; -#elif defined(OS_POSIX) - -#if defined(OS_MACOSX) -#define HOME "/Users/" -#else -#define HOME "/home/" -#endif - url_formatter::home_directory_override = "/foo"; - FixupCase cases[] = { - // File URLs go through GURL, which tries to escape intelligently. - {"/A%20non-existent file.txt", "file:///A%2520non-existent%20file.txt"}, - // A plain "/" refers to the root. - {"/", "file:///"}, - - // These rely on the above home_directory_override. - {"~", "file:///foo"}, - {"~/bar", "file:///foo/bar"}, - - // References to other users' homedirs. - {"~foo", "file://" HOME "foo"}, - {"~x/blah", "file://" HOME "x/blah"}, - }; -#endif - - for (size_t i = 0; i < arraysize(cases); i++) { - EXPECT_EQ(cases[i].output, - url_formatter::FixupURL(cases[i].input, std::string()) - .possibly_invalid_spec()); - } - - EXPECT_TRUE(base::DeleteFile(original, false)); -} - -TEST(URLFixerTest, FixupRelativeFile) { - base::FilePath full_path; - base::FilePath file_part( - FILE_PATH_LITERAL("url_fixer_upper_existing_file.txt")); - base::ScopedTempDir temp_dir_; - ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); - ASSERT_TRUE(MakeTempFile(temp_dir_.path(), file_part, &full_path)); - full_path = base::MakeAbsoluteFilePath(full_path); - ASSERT_FALSE(full_path.empty()); - - // make sure we pass through good URLs - for (size_t i = 0; i < arraysize(fixup_cases); ++i) { - FixupCase value = fixup_cases[i]; - base::FilePath input = base::FilePath::FromUTF8Unsafe(value.input); - EXPECT_EQ(value.output, - url_formatter::FixupRelativeFile(temp_dir_.path(), - input).possibly_invalid_spec()); - } - - // make sure the existing file got fixed-up to a file URL, and that there - // are no backslashes - EXPECT_TRUE(IsMatchingFileURL( - url_formatter::FixupRelativeFile(temp_dir_.path(), - file_part).possibly_invalid_spec(), full_path)); - EXPECT_TRUE(base::DeleteFile(full_path, false)); - - // create a filename we know doesn't exist and make sure it doesn't get - // fixed up to a file URL - base::FilePath nonexistent_file( - FILE_PATH_LITERAL("url_fixer_upper_nonexistent_file.txt")); - std::string fixedup(url_formatter::FixupRelativeFile( - temp_dir_.path(), nonexistent_file).possibly_invalid_spec()); - EXPECT_NE(std::string("file:///"), fixedup.substr(0, 8)); - EXPECT_FALSE(IsMatchingFileURL(fixedup, nonexistent_file)); - - // make a subdir to make sure relative paths with directories work, also - // test spaces: - // "app_dir\url fixer-upper dir\url fixer-upper existing file.txt" - base::FilePath sub_dir(FILE_PATH_LITERAL("url fixer-upper dir")); - base::FilePath sub_file( - FILE_PATH_LITERAL("url fixer-upper existing file.txt")); - base::FilePath new_dir = temp_dir_.path().Append(sub_dir); - base::CreateDirectory(new_dir); - ASSERT_TRUE(MakeTempFile(new_dir, sub_file, &full_path)); - full_path = base::MakeAbsoluteFilePath(full_path); - ASSERT_FALSE(full_path.empty()); - - // test file in the subdir - base::FilePath relative_file = sub_dir.Append(sub_file); - EXPECT_TRUE(IsMatchingFileURL( - url_formatter::FixupRelativeFile(temp_dir_.path(), - relative_file).possibly_invalid_spec(), full_path)); - - // test file in the subdir with different slashes and escaping. - base::FilePath::StringType relative_file_str = sub_dir.value() + - FILE_PATH_LITERAL("/") + sub_file.value(); - base::ReplaceSubstringsAfterOffset(&relative_file_str, 0, - FILE_PATH_LITERAL(" "), FILE_PATH_LITERAL("%20")); - EXPECT_TRUE(IsMatchingFileURL( - url_formatter::FixupRelativeFile(temp_dir_.path(), - base::FilePath(relative_file_str)).possibly_invalid_spec(), - full_path)); - - // test relative directories and duplicate slashes - // (should resolve to the same file as above) - relative_file_str = sub_dir.value() + FILE_PATH_LITERAL("/../") + - sub_dir.value() + FILE_PATH_LITERAL("///./") + sub_file.value(); - EXPECT_TRUE(IsMatchingFileURL( - url_formatter::FixupRelativeFile(temp_dir_.path(), - base::FilePath(relative_file_str)).possibly_invalid_spec(), - full_path)); - - // done with the subdir - EXPECT_TRUE(base::DeleteFile(full_path, false)); - EXPECT_TRUE(base::DeleteFile(new_dir, true)); - - // Test that an obvious HTTP URL isn't accidentally treated as an absolute - // file path (on account of system-specific craziness). - base::FilePath empty_path; - base::FilePath http_url_path(FILE_PATH_LITERAL("http://../")); - EXPECT_TRUE(url_formatter::FixupRelativeFile(empty_path, http_url_path) - .SchemeIs("http")); -} diff --git a/components/url_formatter/url_formatter.cc b/components/url_formatter/url_formatter.cc deleted file mode 100644 index cc209d8..0000000 --- a/components/url_formatter/url_formatter.cc +++ /dev/null @@ -1,807 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "components/url_formatter/url_formatter.h" - -#include <algorithm> -#include <map> -#include <utility> - -#include "base/lazy_instance.h" -#include "base/logging.h" -#include "base/macros.h" -#include "base/memory/singleton.h" -#include "base/stl_util.h" -#include "base/strings/string_tokenizer.h" -#include "base/strings/string_util.h" -#include "base/strings/utf_offset_string_conversions.h" -#include "base/strings/utf_string_conversions.h" -#include "base/synchronization/lock.h" -#include "third_party/icu/source/common/unicode/uidna.h" -#include "third_party/icu/source/common/unicode/uniset.h" -#include "third_party/icu/source/common/unicode/uscript.h" -#include "third_party/icu/source/i18n/unicode/regex.h" -#include "third_party/icu/source/i18n/unicode/ulocdata.h" -#include "url/gurl.h" -#include "url/third_party/mozilla/url_parse.h" - -namespace url_formatter { - -namespace { - -base::string16 IDNToUnicodeWithAdjustments( - const std::string& host, - const std::string& languages, - base::OffsetAdjuster::Adjustments* adjustments); -bool IDNToUnicodeOneComponent(const base::char16* comp, - size_t comp_len, - const std::string& languages, - base::string16* out); - -class AppendComponentTransform { - public: - AppendComponentTransform() {} - virtual ~AppendComponentTransform() {} - - virtual base::string16 Execute( - const std::string& component_text, - base::OffsetAdjuster::Adjustments* adjustments) const = 0; - - // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an - // accessible copy constructor in order to call AppendFormattedComponent() - // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). -}; - -class HostComponentTransform : public AppendComponentTransform { - public: - explicit HostComponentTransform(const std::string& languages) - : languages_(languages) {} - - private: - base::string16 Execute( - const std::string& component_text, - base::OffsetAdjuster::Adjustments* adjustments) const override { - return IDNToUnicodeWithAdjustments(component_text, languages_, adjustments); - } - - const std::string& languages_; -}; - -class NonHostComponentTransform : public AppendComponentTransform { - public: - explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules) - : unescape_rules_(unescape_rules) {} - - private: - base::string16 Execute( - const std::string& component_text, - base::OffsetAdjuster::Adjustments* adjustments) const override { - return (unescape_rules_ == net::UnescapeRule::NONE) - ? base::UTF8ToUTF16WithAdjustments(component_text, adjustments) - : net::UnescapeAndDecodeUTF8URLComponentWithAdjustments( - component_text, unescape_rules_, adjustments); - } - - const net::UnescapeRule::Type unescape_rules_; -}; - -// Transforms the portion of |spec| covered by |original_component| according to -// |transform|. Appends the result to |output|. If |output_component| is -// non-NULL, its start and length are set to the transformed component's new -// start and length. If |adjustments| is non-NULL, appends adjustments (if -// any) that reflect the transformation the original component underwent to -// become the transformed value appended to |output|. -void AppendFormattedComponent(const std::string& spec, - const url::Component& original_component, - const AppendComponentTransform& transform, - base::string16* output, - url::Component* output_component, - base::OffsetAdjuster::Adjustments* adjustments) { - DCHECK(output); - if (original_component.is_nonempty()) { - size_t original_component_begin = - static_cast<size_t>(original_component.begin); - size_t output_component_begin = output->length(); - std::string component_str(spec, original_component_begin, - static_cast<size_t>(original_component.len)); - - // Transform |component_str| and modify |adjustments| appropriately. - base::OffsetAdjuster::Adjustments component_transform_adjustments; - output->append( - transform.Execute(component_str, &component_transform_adjustments)); - - // Shift all the adjustments made for this component so the offsets are - // valid for the original string and add them to |adjustments|. - for (base::OffsetAdjuster::Adjustments::iterator comp_iter = - component_transform_adjustments.begin(); - comp_iter != component_transform_adjustments.end(); ++comp_iter) - comp_iter->original_offset += original_component_begin; - if (adjustments) { - adjustments->insert(adjustments->end(), - component_transform_adjustments.begin(), - component_transform_adjustments.end()); - } - - // Set positions of the parsed component. - if (output_component) { - output_component->begin = static_cast<int>(output_component_begin); - output_component->len = - static_cast<int>(output->length() - output_component_begin); - } - } else if (output_component) { - output_component->reset(); - } -} - -// If |component| is valid, its begin is incremented by |delta|. -void AdjustComponent(int delta, url::Component* component) { - if (!component->is_valid()) - return; - - DCHECK(delta >= 0 || component->begin >= -delta); - component->begin += delta; -} - -// Adjusts all the components of |parsed| by |delta|, except for the scheme. -void AdjustAllComponentsButScheme(int delta, url::Parsed* parsed) { - AdjustComponent(delta, &(parsed->username)); - AdjustComponent(delta, &(parsed->password)); - AdjustComponent(delta, &(parsed->host)); - AdjustComponent(delta, &(parsed->port)); - AdjustComponent(delta, &(parsed->path)); - AdjustComponent(delta, &(parsed->query)); - AdjustComponent(delta, &(parsed->ref)); -} - -// Helper for FormatUrlWithOffsets(). -base::string16 FormatViewSourceUrl( - const GURL& url, - const std::string& languages, - FormatUrlTypes format_types, - net::UnescapeRule::Type unescape_rules, - url::Parsed* new_parsed, - size_t* prefix_end, - base::OffsetAdjuster::Adjustments* adjustments) { - DCHECK(new_parsed); - const char kViewSource[] = "view-source:"; - const size_t kViewSourceLength = arraysize(kViewSource) - 1; - - // Format the underlying URL and record adjustments. - const std::string& url_str(url.possibly_invalid_spec()); - adjustments->clear(); - base::string16 result( - base::ASCIIToUTF16(kViewSource) + - FormatUrlWithAdjustments(GURL(url_str.substr(kViewSourceLength)), - languages, format_types, unescape_rules, - new_parsed, prefix_end, adjustments)); - // Revise |adjustments| by shifting to the offsets to prefix that the above - // call to FormatUrl didn't get to see. - for (base::OffsetAdjuster::Adjustments::iterator it = adjustments->begin(); - it != adjustments->end(); ++it) - it->original_offset += kViewSourceLength; - - // Adjust positions of the parsed components. - if (new_parsed->scheme.is_nonempty()) { - // Assume "view-source:real-scheme" as a scheme. - new_parsed->scheme.len += kViewSourceLength; - } else { - new_parsed->scheme.begin = 0; - new_parsed->scheme.len = kViewSourceLength - 1; - } - AdjustAllComponentsButScheme(kViewSourceLength, new_parsed); - - if (prefix_end) - *prefix_end += kViewSourceLength; - - return result; -} - -// TODO(brettw) bug 734373: check the scripts for each host component and -// don't un-IDN-ize if there is more than one. Alternatively, only IDN for -// scripts that the user has installed. For now, just put the entire -// path through IDN. Maybe this feature can be implemented in ICU itself? -// -// We may want to skip this step in the case of file URLs to allow unicode -// UNC hostnames regardless of encodings. -base::string16 IDNToUnicodeWithAdjustments( - const std::string& host, - const std::string& languages, - base::OffsetAdjuster::Adjustments* adjustments) { - if (adjustments) - adjustments->clear(); - // Convert the ASCII input to a base::string16 for ICU. - base::string16 input16; - input16.reserve(host.length()); - input16.insert(input16.end(), host.begin(), host.end()); - - // Do each component of the host separately, since we enforce script matching - // on a per-component basis. - base::string16 out16; - for (size_t component_start = 0, component_end; - component_start < input16.length(); - component_start = component_end + 1) { - // Find the end of the component. - component_end = input16.find('.', component_start); - if (component_end == base::string16::npos) - component_end = input16.length(); // For getting the last component. - size_t component_length = component_end - component_start; - size_t new_component_start = out16.length(); - bool converted_idn = false; - if (component_end > component_start) { - // Add the substring that we just found. - converted_idn = - IDNToUnicodeOneComponent(input16.data() + component_start, - component_length, languages, &out16); - } - size_t new_component_length = out16.length() - new_component_start; - - if (converted_idn && adjustments) { - adjustments->push_back(base::OffsetAdjuster::Adjustment( - component_start, component_length, new_component_length)); - } - - // Need to add the dot we just found (if we found one). - if (component_end < input16.length()) - out16.push_back('.'); - } - return out16; -} - -// Does some simple normalization of scripts so we can allow certain scripts -// to exist together. -// TODO(brettw) bug 880223: we should allow some other languages to be -// oombined such as Chinese and Latin. We will probably need a more -// complicated system of language pairs to have more fine-grained control. -UScriptCode NormalizeScript(UScriptCode code) { - switch (code) { - case USCRIPT_KATAKANA: - case USCRIPT_HIRAGANA: - case USCRIPT_KATAKANA_OR_HIRAGANA: - case USCRIPT_HANGUL: // This one is arguable. - return USCRIPT_HAN; - default: - return code; - } -} - -bool IsIDNComponentInSingleScript(const base::char16* str, int str_len) { - UScriptCode first_script = USCRIPT_INVALID_CODE; - bool is_first = true; - - int i = 0; - while (i < str_len) { - unsigned code_point; - U16_NEXT(str, i, str_len, code_point); - - UErrorCode err = U_ZERO_ERROR; - UScriptCode cur_script = uscript_getScript(code_point, &err); - if (err != U_ZERO_ERROR) - return false; // Report mixed on error. - cur_script = NormalizeScript(cur_script); - - // TODO(brettw) We may have to check for USCRIPT_INHERENT as well. - if (is_first && cur_script != USCRIPT_COMMON) { - first_script = cur_script; - is_first = false; - } else { - if (cur_script != USCRIPT_COMMON && cur_script != first_script) - return false; - } - } - return true; -} - -// Check if the script of a language can be 'safely' mixed with -// Latin letters in the ASCII range. -bool IsCompatibleWithASCIILetters(const std::string& lang) { - // For now, just list Chinese, Japanese and Korean (positive list). - // An alternative is negative-listing (languages using Greek and - // Cyrillic letters), but it can be more dangerous. - return !lang.substr(0, 2).compare("zh") || !lang.substr(0, 2).compare("ja") || - !lang.substr(0, 2).compare("ko"); -} - -typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap; - -class LangToExemplarSet { - public: - static LangToExemplarSet* GetInstance() { - return Singleton<LangToExemplarSet>::get(); - } - - private: - LangToExemplarSetMap map; - LangToExemplarSet() {} - ~LangToExemplarSet() { - STLDeleteContainerPairSecondPointers(map.begin(), map.end()); - } - - friend class Singleton<LangToExemplarSet>; - friend struct DefaultSingletonTraits<LangToExemplarSet>; - friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**); - friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*); - - DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet); -}; - -bool GetExemplarSetForLang(const std::string& lang, - icu::UnicodeSet** lang_set) { - const LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; - LangToExemplarSetMap::const_iterator pos = map.find(lang); - if (pos != map.end()) { - *lang_set = pos->second; - return true; - } - return false; -} - -void SetExemplarSetForLang(const std::string& lang, icu::UnicodeSet* lang_set) { - LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; - map.insert(std::make_pair(lang, lang_set)); -} - -static base::LazyInstance<base::Lock>::Leaky g_lang_set_lock = - LAZY_INSTANCE_INITIALIZER; - -// Returns true if all the characters in component_characters are used by -// the language |lang|. -bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters, - const std::string& lang) { - CR_DEFINE_STATIC_LOCAL(const icu::UnicodeSet, kASCIILetters, ('a', 'z')); - icu::UnicodeSet* lang_set = nullptr; - // We're called from both the UI thread and the history thread. - { - base::AutoLock lock(g_lang_set_lock.Get()); - if (!GetExemplarSetForLang(lang, &lang_set)) { - UErrorCode status = U_ZERO_ERROR; - ULocaleData* uld = ulocdata_open(lang.c_str(), &status); - // TODO(jungshik) Turn this check on when the ICU data file is - // rebuilt with the minimal subset of locale data for languages - // to which Chrome is not localized but which we offer in the list - // of languages selectable for Accept-Languages. With the rebuilt ICU - // data, ulocdata_open never should fall back to the default locale. - // (issue 2078) - // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING); - if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) { - lang_set = reinterpret_cast<icu::UnicodeSet*>(ulocdata_getExemplarSet( - uld, nullptr, 0, ULOCDATA_ES_STANDARD, &status)); - // On success, if |lang| is compatible with ASCII Latin letters, add - // them. - if (lang_set && IsCompatibleWithASCIILetters(lang)) - lang_set->addAll(kASCIILetters); - } - - if (!lang_set) - lang_set = new icu::UnicodeSet(1, 0); - - lang_set->freeze(); - SetExemplarSetForLang(lang, lang_set); - ulocdata_close(uld); - } - } - return !lang_set->isEmpty() && lang_set->containsAll(component_characters); -} - -// Returns true if the given Unicode host component is safe to display to the -// user. -bool IsIDNComponentSafe(const base::char16* str, - int str_len, - const std::string& languages) { - // Most common cases (non-IDN) do not reach here so that we don't - // need a fast return path. - // TODO(jungshik) : Check if there's any character inappropriate - // (although allowed) for domain names. - // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and - // http://www.unicode.org/reports/tr39/data/xidmodifications.txt - // For now, we borrow the list from Mozilla and tweaked it slightly. - // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because - // they're gonna be canonicalized to U+0020 and full stop before - // reaching here.) - // The original list is available at - // http://kb.mozillazine.org/Network.IDN.blacklist_chars and - // at - // http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703 - - UErrorCode status = U_ZERO_ERROR; -#ifdef U_WCHAR_IS_UTF16 - icu::UnicodeSet dangerous_characters( - icu::UnicodeString( - L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" - L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" - L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" - L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" - L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" - L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" - L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" - L"[\ufffa-\ufffd]\U0001f50f\U0001f510\U0001f512\U0001f513]"), - status); - DCHECK(U_SUCCESS(status)); - icu::RegexMatcher dangerous_patterns( - icu::UnicodeString( - // Lone katakana no, so, or n - L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" - // Repeating Japanese accent characters - L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), - 0, status); -#else - icu::UnicodeSet dangerous_characters( - icu::UnicodeString( - "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" - "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" - "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" - "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" - "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" - "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe" - "14" - "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\uff" - "f9]" - "[\\ufffa-\\ufffd]\\U0001f50f\\U0001f510\\U0001f512\\U0001f513]", - -1, US_INV), - status); - DCHECK(U_SUCCESS(status)); - icu::RegexMatcher dangerous_patterns( - icu::UnicodeString( - // Lone katakana no, so, or n - "[^\\p{Katakana}][\\u30ce\\u30f3\\u30bd][^\\p{Katakana}]" - // Repeating Japanese accent characters - "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), - 0, status); -#endif - DCHECK(U_SUCCESS(status)); - icu::UnicodeSet component_characters; - icu::UnicodeString component_string(str, str_len); - component_characters.addAll(component_string); - if (dangerous_characters.containsSome(component_characters)) - return false; - - DCHECK(U_SUCCESS(status)); - dangerous_patterns.reset(component_string); - if (dangerous_patterns.find()) - return false; - - // If the language list is empty, the result is completely determined - // by whether a component is a single script or not. This will block - // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are - // allowed with |languages| (while it blocks Chinese + Latin letters with - // an accent as should be the case), but we want to err on the safe side - // when |languages| is empty. - if (languages.empty()) - return IsIDNComponentInSingleScript(str, str_len); - - // |common_characters| is made up of ASCII numbers, hyphen, plus and - // underscore that are used across scripts and allowed in domain names. - // (sync'd with characters allowed in url_canon_host with square - // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. - icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), - status); - DCHECK(U_SUCCESS(status)); - // Subtract common characters because they're always allowed so that - // we just have to check if a language-specific set contains - // the remainder. - component_characters.removeAll(common_characters); - - base::StringTokenizer t(languages, ","); - while (t.GetNext()) { - if (IsComponentCoveredByLang(component_characters, t.token())) - return true; - } - return false; -} - -// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to -// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46(). -// -// We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with -// the backward compatibility in mind. What it does: -// -// 1. Use the up-to-date Unicode data. -// 2. Define a case folding/mapping with the up-to-date Unicode data as -// in IDNA 2003. -// 3. Use transitional mechanism for 4 deviation characters (sharp-s, -// final sigma, ZWJ and ZWNJ) for now. -// 4. Continue to allow symbols and punctuations. -// 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules. -// 6. Do not apply STD3 rules -// 7. Do not allow unassigned code points. -// -// It also closely matches what IE 10 does except for the BiDi check ( -// http://goo.gl/3XBhqw ). -// See http://http://unicode.org/reports/tr46/ and references therein -// for more details. -struct UIDNAWrapper { - UIDNAWrapper() { - UErrorCode err = U_ZERO_ERROR; - // TODO(jungshik): Change options as different parties (browsers, - // registrars, search engines) converge toward a consensus. - value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err); - if (U_FAILURE(err)) - value = NULL; - } - - UIDNA* value; -}; - -static base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = - LAZY_INSTANCE_INITIALIZER; - -// Converts one component of a host (between dots) to IDN if safe. The result -// will be APPENDED to the given output string and will be the same as the input -// if it is not IDN or the IDN is unsafe to display. Returns whether any -// conversion was performed. -bool IDNToUnicodeOneComponent(const base::char16* comp, - size_t comp_len, - const std::string& languages, - base::string16* out) { - DCHECK(out); - if (comp_len == 0) - return false; - - // Only transform if the input can be an IDN component. - static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; - if ((comp_len > arraysize(kIdnPrefix)) && - !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { - UIDNA* uidna = g_uidna.Get().value; - DCHECK(uidna != NULL); - size_t original_length = out->length(); - int output_length = 64; - UIDNAInfo info = UIDNA_INFO_INITIALIZER; - UErrorCode status; - do { - out->resize(original_length + output_length); - status = U_ZERO_ERROR; - // This returns the actual length required. If this is more than 64 - // code units, |status| will be U_BUFFER_OVERFLOW_ERROR and we'll try - // the conversion again, but with a sufficiently large buffer. - output_length = uidna_labelToUnicode( - uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length], - output_length, &info, &status); - } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0)); - - if (U_SUCCESS(status) && info.errors == 0) { - // Converted successfully. Ensure that the converted component - // can be safely displayed to the user. - out->resize(original_length + output_length); - if (IsIDNComponentSafe(out->data() + original_length, output_length, - languages)) - return true; - } - - // Something went wrong. Revert to original string. - out->resize(original_length); - } - - // We get here with no IDN or on error, in which case we just append the - // literal input. - out->append(comp, comp_len); - return false; -} - -} // namespace - -const FormatUrlType kFormatUrlOmitNothing = 0; -const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; -const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; -const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; -const FormatUrlType kFormatUrlOmitAll = - kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP | - kFormatUrlOmitTrailingSlashOnBareHostname; - -base::string16 FormatUrl(const GURL& url, - const std::string& languages, - FormatUrlTypes format_types, - net::UnescapeRule::Type unescape_rules, - url::Parsed* new_parsed, - size_t* prefix_end, - size_t* offset_for_adjustment) { - std::vector<size_t> offsets; - if (offset_for_adjustment) - offsets.push_back(*offset_for_adjustment); - base::string16 result = - FormatUrlWithOffsets(url, languages, format_types, unescape_rules, - new_parsed, prefix_end, &offsets); - if (offset_for_adjustment) - *offset_for_adjustment = offsets[0]; - return result; -} - -base::string16 FormatUrlWithOffsets( - const GURL& url, - const std::string& languages, - FormatUrlTypes format_types, - net::UnescapeRule::Type unescape_rules, - url::Parsed* new_parsed, - size_t* prefix_end, - std::vector<size_t>* offsets_for_adjustment) { - base::OffsetAdjuster::Adjustments adjustments; - const base::string16& format_url_return_value = - FormatUrlWithAdjustments(url, languages, format_types, unescape_rules, - new_parsed, prefix_end, &adjustments); - base::OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment); - if (offsets_for_adjustment) { - std::for_each( - offsets_for_adjustment->begin(), offsets_for_adjustment->end(), - base::LimitOffset<std::string>(format_url_return_value.length())); - } - return format_url_return_value; -} - -base::string16 FormatUrlWithAdjustments( - const GURL& url, - const std::string& languages, - FormatUrlTypes format_types, - net::UnescapeRule::Type unescape_rules, - url::Parsed* new_parsed, - size_t* prefix_end, - base::OffsetAdjuster::Adjustments* adjustments) { - DCHECK(adjustments != NULL); - adjustments->clear(); - url::Parsed parsed_temp; - if (!new_parsed) - new_parsed = &parsed_temp; - else - *new_parsed = url::Parsed(); - - // Special handling for view-source:. Don't use content::kViewSourceScheme - // because this library shouldn't depend on chrome. - const char kViewSource[] = "view-source"; - // Reject "view-source:view-source:..." to avoid deep recursion. - const char kViewSourceTwice[] = "view-source:view-source:"; - if (url.SchemeIs(kViewSource) && - !base::StartsWith(url.possibly_invalid_spec(), kViewSourceTwice, - base::CompareCase::INSENSITIVE_ASCII)) { - return FormatViewSourceUrl(url, languages, format_types, unescape_rules, - new_parsed, prefix_end, adjustments); - } - - // We handle both valid and invalid URLs (this will give us the spec - // regardless of validity). - const std::string& spec = url.possibly_invalid_spec(); - const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); - - // Scheme & separators. These are ASCII. - base::string16 url_string; - url_string.insert( - url_string.end(), spec.begin(), - spec.begin() + parsed.CountCharactersBefore(url::Parsed::USERNAME, true)); - const char kHTTP[] = "http://"; - const char kFTP[] = "ftp."; - // url_formatter::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This - // means that if we trim "http://" off a URL whose host starts with "ftp." and - // the user inputs this into any field subject to fixup (which is basically - // all input fields), the meaning would be changed. (In fact, often the - // formatted URL is directly pre-filled into an input field.) For this reason - // we avoid stripping "http://" in this case. - bool omit_http = - (format_types & kFormatUrlOmitHTTP) && - base::EqualsASCII(url_string, kHTTP) && - !base::StartsWith(url.host(), kFTP, base::CompareCase::SENSITIVE); - new_parsed->scheme = parsed.scheme; - - // Username & password. - if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { - // Remove the username and password fields. We don't want to display those - // to the user since they can be used for attacks, - // e.g. "http://google.com:search@evil.ru/" - new_parsed->username.reset(); - new_parsed->password.reset(); - // Update the adjustments based on removed username and/or password. - if (parsed.username.is_nonempty() || parsed.password.is_nonempty()) { - if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { - // The seeming off-by-two is to account for the ':' after the username - // and '@' after the password. - adjustments->push_back(base::OffsetAdjuster::Adjustment( - static_cast<size_t>(parsed.username.begin), - static_cast<size_t>(parsed.username.len + parsed.password.len + 2), - 0)); - } else { - const url::Component* nonempty_component = - parsed.username.is_nonempty() ? &parsed.username : &parsed.password; - // The seeming off-by-one is to account for the '@' after the - // username/password. - adjustments->push_back(base::OffsetAdjuster::Adjustment( - static_cast<size_t>(nonempty_component->begin), - static_cast<size_t>(nonempty_component->len + 1), 0)); - } - } - } else { - AppendFormattedComponent(spec, parsed.username, - NonHostComponentTransform(unescape_rules), - &url_string, &new_parsed->username, adjustments); - if (parsed.password.is_valid()) - url_string.push_back(':'); - AppendFormattedComponent(spec, parsed.password, - NonHostComponentTransform(unescape_rules), - &url_string, &new_parsed->password, adjustments); - if (parsed.username.is_valid() || parsed.password.is_valid()) - url_string.push_back('@'); - } - if (prefix_end) - *prefix_end = static_cast<size_t>(url_string.length()); - - // Host. - AppendFormattedComponent(spec, parsed.host, HostComponentTransform(languages), - &url_string, &new_parsed->host, adjustments); - - // Port. - if (parsed.port.is_nonempty()) { - url_string.push_back(':'); - new_parsed->port.begin = url_string.length(); - url_string.insert(url_string.end(), spec.begin() + parsed.port.begin, - spec.begin() + parsed.port.end()); - new_parsed->port.len = url_string.length() - new_parsed->port.begin; - } else { - new_parsed->port.reset(); - } - - // Path & query. Both get the same general unescape & convert treatment. - if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || - !CanStripTrailingSlash(url)) { - AppendFormattedComponent(spec, parsed.path, - NonHostComponentTransform(unescape_rules), - &url_string, &new_parsed->path, adjustments); - } else { - if (parsed.path.len > 0) { - adjustments->push_back(base::OffsetAdjuster::Adjustment( - parsed.path.begin, parsed.path.len, 0)); - } - } - if (parsed.query.is_valid()) - url_string.push_back('?'); - AppendFormattedComponent(spec, parsed.query, - NonHostComponentTransform(unescape_rules), - &url_string, &new_parsed->query, adjustments); - - // Ref. This is valid, unescaped UTF-8, so we can just convert. - if (parsed.ref.is_valid()) - url_string.push_back('#'); - AppendFormattedComponent(spec, parsed.ref, - NonHostComponentTransform(net::UnescapeRule::NONE), - &url_string, &new_parsed->ref, adjustments); - - // If we need to strip out http do it after the fact. - if (omit_http && base::StartsWith(url_string, base::ASCIIToUTF16(kHTTP), - base::CompareCase::SENSITIVE)) { - const size_t kHTTPSize = arraysize(kHTTP) - 1; - url_string = url_string.substr(kHTTPSize); - // Because offsets in the |adjustments| are already calculated with respect - // to the string with the http:// prefix in it, those offsets remain correct - // after stripping the prefix. The only thing necessary is to add an - // adjustment to reflect the stripped prefix. - adjustments->insert(adjustments->begin(), - base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); - - if (prefix_end) - *prefix_end -= kHTTPSize; - - // Adjust new_parsed. - DCHECK(new_parsed->scheme.is_valid()); - int delta = -(new_parsed->scheme.len + 3); // +3 for ://. - new_parsed->scheme.reset(); - AdjustAllComponentsButScheme(delta, new_parsed); - } - - return url_string; -} - -bool CanStripTrailingSlash(const GURL& url) { - // Omit the path only for standard, non-file URLs with nothing but "/" after - // the hostname. - return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() && - !url.has_query() && !url.has_ref() && url.path() == "/"; -} - -void AppendFormattedHost(const GURL& url, - const std::string& languages, - base::string16* output) { - AppendFormattedComponent( - url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host, - HostComponentTransform(languages), output, NULL, NULL); -} - -base::string16 IDNToUnicode(const std::string& host, - const std::string& languages) { - return IDNToUnicodeWithAdjustments(host, languages, NULL); -} - -} // url_formatter diff --git a/components/url_formatter/url_formatter.gyp b/components/url_formatter/url_formatter.gyp deleted file mode 100644 index 9375e96..0000000 --- a/components/url_formatter/url_formatter.gyp +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2015 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -{ - 'targets': [ - { - # GN version: //components/url_formatter - 'target_name': 'url_formatter', - 'type': 'static_library', - 'dependencies': [ - '../../base/base.gyp:base', - '../../net/net.gyp:net', - '../../third_party/icu/icu.gyp:icui18n', - '../../third_party/icu/icu.gyp:icuuc', - '../../url/url.gyp:url_lib', - ], - 'sources': [ - # Note: sources list duplicated in GN build. - 'elide_url.cc', - 'elide_url.h', - 'url_fixer.cc', - 'url_fixer.h', - 'url_formatter.cc', - 'url_formatter.h', - ], - # TODO(jschuh): crbug.com/167187 fix size_t to int truncations. - 'msvs_disabled_warnings': [4267, ], - - 'conditions': [ - ['OS != "android"', { - 'dependencies': [ - '../../ui/gfx/gfx.gyp:gfx', - ] - }], - ], - }, - ], -} diff --git a/components/url_formatter/url_formatter.h b/components/url_formatter/url_formatter.h deleted file mode 100644 index 01c8795..0000000 --- a/components/url_formatter/url_formatter.h +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// url_formatter contains routines for formatting URLs in a way that can be -// safely and securely displayed to users. For example, it is responsible -// for determining when to convert an IDN A-Label (e.g. "xn--[something]") -// into the IDN U-Label. -// -// Note that this formatting is only intended for display purposes; it would -// be insecure and insufficient to make comparisons solely on formatted URLs -// (that is, it should not be used for normalizing URLs for comparison for -// security decisions). - -#ifndef COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_ -#define COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_ - -#include <stdint.h> - -#include <string> -#include <vector> - -#include "base/strings/string16.h" -#include "base/strings/utf_offset_string_conversions.h" -#include "net/base/escape.h" - -class GURL; - -namespace url { -struct Parsed; -} // url - -namespace url_formatter { - -// Used by FormatUrl to specify handling of certain parts of the url. -typedef uint32_t FormatUrlType; -typedef uint32_t FormatUrlTypes; - -// Nothing is ommitted. -extern const FormatUrlType kFormatUrlOmitNothing; - -// If set, any username and password are removed. -extern const FormatUrlType kFormatUrlOmitUsernamePassword; - -// If the scheme is 'http://', it's removed. -extern const FormatUrlType kFormatUrlOmitHTTP; - -// Omits the path if it is just a slash and there is no query or ref. This is -// meaningful for non-file "standard" URLs. -extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname; - -// Convenience for omitting all unecessary types. -extern const FormatUrlType kFormatUrlOmitAll; - -// Creates a string representation of |url|. The IDN host name may be in Unicode -// if |languages| accepts the Unicode representation. |format_type| is a bitmask -// of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean -// the URL for human readability. You will generally want |UnescapeRule::SPACES| -// for display to the user if you can handle spaces, or |UnescapeRule::NORMAL| -// if not. If the path part and the query part seem to be encoded in %-encoded -// UTF-8, decodes %-encoding and UTF-8. -// -// The last three parameters may be NULL. -// -// |new_parsed| will be set to the parsing parameters of the resultant URL. -// -// |prefix_end| will be the length before the hostname of the resultant URL. -// -// |offset[s]_for_adjustment| specifies one or more offsets into the original -// URL, representing insertion or selection points between characters: if the -// input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is -// between the scheme and the host, and offset 15 is after the end of the URL. -// Valid input offsets range from 0 to the length of the input URL string. On -// exit, each offset will have been modified to reflect any changes made to the -// output string. For example, if |url| is "http://a:b@c.com/", -// |omit_username_password| is true, and an offset is 12 (pointing between 'c' -// and '.'), then on return the output string will be "http://c.com/" and the -// offset will be 8. If an offset cannot be successfully adjusted (e.g. because -// it points into the middle of a component that was entirely removed or into -// the middle of an encoding sequence), it will be set to base::string16::npos. -// For consistency, if an input offset points between the scheme and the -// username/password, and both are removed, on output this offset will be 0 -// rather than npos; this means that offsets at the starts and ends of removed -// components are always transformed the same way regardless of what other -// components are adjacent. -base::string16 FormatUrl(const GURL& url, - const std::string& languages, - FormatUrlTypes format_types, - net::UnescapeRule::Type unescape_rules, - url::Parsed* new_parsed, - size_t* prefix_end, - size_t* offset_for_adjustment); - -base::string16 FormatUrlWithOffsets( - const GURL& url, - const std::string& languages, - FormatUrlTypes format_types, - net::UnescapeRule::Type unescape_rules, - url::Parsed* new_parsed, - size_t* prefix_end, - std::vector<size_t>* offsets_for_adjustment); - -// This function is like those above except it takes |adjustments| rather -// than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all -// the transformations that happened to |url| to convert it into the returned -// value. -base::string16 FormatUrlWithAdjustments( - const GURL& url, - const std::string& languages, - FormatUrlTypes format_types, - net::UnescapeRule::Type unescape_rules, - url::Parsed* new_parsed, - size_t* prefix_end, - base::OffsetAdjuster::Adjustments* adjustments); - -// This is a convenience function for FormatUrl() with -// format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical -// set of flags for "URLs to display to the user". You should be cautious about -// using this for URLs which will be parsed or sent to other applications. -inline base::string16 FormatUrl(const GURL& url, const std::string& languages) { - return FormatUrl(url, languages, kFormatUrlOmitAll, net::UnescapeRule::SPACES, - nullptr, nullptr, nullptr); -} - -// Returns whether FormatUrl() would strip a trailing slash from |url|, given a -// format flag including kFormatUrlOmitTrailingSlashOnBareHostname. -bool CanStripTrailingSlash(const GURL& url); - -// Formats the host in |url| and appends it to |output|. The host formatter -// takes the same accept languages component as ElideURL(). -void AppendFormattedHost(const GURL& url, - const std::string& languages, - base::string16* output); - -// Converts the given host name to unicode characters. This can be called for -// any host name, if the input is not IDN or is invalid in some way, we'll just -// return the ASCII source so it is still usable. -// -// The input should be the canonicalized ASCII host name from GURL. This -// function does NOT accept UTF-8! -// -// |languages| is a comma separated list of ISO 639 language codes. It -// is used to determine whether a hostname is 'comprehensible' to a user -// who understands languages listed. |host| will be converted to a -// human-readable form (Unicode) ONLY when each component of |host| is -// regarded as 'comprehensible'. Scipt-mixing is not allowed except that -// Latin letters in the ASCII range can be mixed with a limited set of -// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). -// When |languages| is empty, even that mixing is not allowed. -base::string16 IDNToUnicode(const std::string& host, - const std::string& languages); - -} // url_formatter - -#endif // COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_ diff --git a/components/url_formatter/url_formatter_unittest.cc b/components/url_formatter/url_formatter_unittest.cc deleted file mode 100644 index 0dd635a..0000000 --- a/components/url_formatter/url_formatter_unittest.cc +++ /dev/null @@ -1,978 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "components/url_formatter/url_formatter.h" - -#include <string.h> - -#include <vector> - -#include "base/macros.h" -#include "base/strings/string_number_conversions.h" -#include "base/strings/stringprintf.h" -#include "base/strings/utf_string_conversions.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "url/gurl.h" - - -namespace url_formatter { - -namespace { - -using base::WideToUTF16; -using base::ASCIIToUTF16; - -const size_t kNpos = base::string16::npos; - -const char* const kLanguages[] = { - "", "en", "zh-CN", "ja", "ko", - "he", "ar", "ru", "el", "fr", - "de", "pt", "sv", "th", "hi", - "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en", - "zh,ru,en" -}; - -struct IDNTestCase { - const char* const input; - const wchar_t* unicode_output; - const bool unicode_allowed[arraysize(kLanguages)]; -}; - -// TODO(jungshik) This is just a random sample of languages and is far -// from exhaustive. We may have to generate all the combinations -// of languages (powerset of a set of all the languages). -const IDNTestCase idn_cases[] = { - // No IDN - {"www.google.com", L"www.google.com", - {true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true}}, - {"www.google.com.", L"www.google.com.", - {true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true}}, - {".", L".", - {true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true}}, - {"", L"", - {true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true}}, - // IDN - // Hanzi (Traditional Chinese) - {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", - {true, false, true, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, true, true, false, - true}}, - // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh) - {"xn--cy2a840a.com", L"\x89c6\x9891.com", - {true, false, true, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - true}}, - // Hanzi + '123' - {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", - {true, false, true, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, true, true, false, - true}}, - // Hanzi + Latin : U+56FD is simplified and is regarded - // as not supported in zh-TW. - {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", - {false, false, true, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - true}}, - // Kanji + Kana (Japanese) - {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", - {true, false, false, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - false}}, - // Katakana including U+30FC - {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", - {true, false, false, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - }}, - {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", - {true, false, false, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - }}, - // Katakana + Latin (Japanese) - // TODO(jungshik): Change 'false' in the first element to 'true' - // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead - // of our IsIDNComponentInSingleScript(). - {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", - {false, false, false, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - }}, - {"xn--3bkxe.jp", L"\x30c8\x309a.jp", - {false, false, false, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - }}, - // Hangul (Korean) - {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", - {true, false, false, false, true, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - false}}, - // b<u-umlaut>cher (German) - {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", - {true, false, false, false, false, - false, false, false, false, true, - true, false, false, false, false, - true, false, false, false, false, - false}}, - // a with diaeresis - {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", - {true, false, false, false, false, - false, false, false, false, false, - true, false, true, false, false, - true, false, false, false, false, - false}}, - // c-cedilla (French) - {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", - {true, false, false, false, false, - false, false, false, false, true, - false, true, false, false, false, - false, false, false, false, false, - false}}, - // caf'e with acute accent' (French) - {"xn--caf-dma.fr", L"caf\x00e9.fr", - {true, false, false, false, false, - false, false, false, false, true, - false, true, true, false, false, - false, false, false, false, false, - false}}, - // c-cedillla and a with tilde (Portuguese) - {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", - {true, false, false, false, false, - false, false, false, false, false, - false, true, false, false, false, - false, false, false, false, false, - false}}, - // s with caron - {"xn--achy-f6a.com", L"\x0161" L"achy.com", - {true, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // TODO(jungshik) : Add examples with Cyrillic letters - // only used in some languages written in Cyrillic. - // Eutopia (Greek) - {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", - {true, false, false, false, false, - false, false, false, true, false, - false, false, false, false, false, - false, true, false, false, false, - false}}, - // Eutopia + 123 (Greek) - {"xn---123-pldm0haj2bk.gr", - L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", - {true, false, false, false, false, - false, false, false, true, false, - false, false, false, false, false, - false, true, false, false, false, - false}}, - // Cyrillic (Russian) - {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", - {true, false, false, false, false, - false, false, true, false, false, - false, false, false, false, false, - false, false, false, false, true, - true}}, - // Cyrillic + 123 (Russian) - {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", - {true, false, false, false, false, - false, false, true, false, false, - false, false, false, false, false, - false, false, false, false, true, - true}}, - // Arabic - {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", - {true, false, false, false, false, - false, true, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // Hebrew - {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", - {true, false, false, false, false, - true, false, false, false, false, - false, false, false, false, false, - false, false, false, false, true, - false}}, - // Thai - {"xn--12c2cc4ag3b4ccu.th", - L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", - {true, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - false, false, false, false, false, - false}}, - // Devangari (Hindi) - {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", - {true, false, false, false, false, - false, false, false, false, false, - false, false, false, false, true, - false, false, false, false, false, - false}}, - // Invalid IDN - {"xn--hello?world.com", NULL, - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // Unsafe IDNs - // "payp<alpha>l.com" - {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // google.gr with Greek omicron and epsilon - {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // google.ru with Cyrillic o - {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // h<e with acute>llo<China in Han>.cn - {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // <Greek rho><Cyrillic a><Cyrillic u>.ru - {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // One that's really long that will force a buffer realloc - {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - "aaaaaaa", - L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - L"aaaaaaaa", - {true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true}}, - // Test cases for characters we blacklisted although allowed in IDN. - // Embedded spaces will be turned to %20 in the display. - // TODO(jungshik): We need to have more cases. This is a typical - // data-driven trap. The following test cases need to be separated - // and tested only for a couple of languages. - {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - }}, - {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - }}, - {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - }}, - {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - }}, - // Padlock icon spoof. - {"xn--google-hj64e", L"\U0001f512google.com", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - }}, - // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist - // all strings with the surrogate '\xdd12'. - {"xn--fk9c.com", L"\U00010912.com", - {true, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - }}, -#if 0 - // These two cases are special. We need a separate test. - // U+3000 and U+3002 are normalized to ASCII space and dot. - {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", - {false, false, true, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, true, false, false, - true}}, - {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", - {false, false, true, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, true, false, false, - true}}, -#endif -}; - -struct AdjustOffsetCase { - size_t input_offset; - size_t output_offset; -}; - -struct UrlTestData { - const char* const description; - const char* const input; - const char* const languages; - FormatUrlTypes format_types; - net::UnescapeRule::Type escape_rules; - const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. - size_t prefix_len; -}; - -// A helper for IDN*{Fast,Slow}. -// Append "::<language list>" to |expected| and |actual| to make it -// easy to tell which sub-case fails without debugging. -void AppendLanguagesToOutputs(const char* languages, - base::string16* expected, - base::string16* actual) { - base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages); - expected->append(to_append); - actual->append(to_append); -} - -// A pair of helpers for the FormatUrlWithOffsets() test. -void VerboseExpect(size_t expected, - size_t actual, - const std::string& original_url, - size_t position, - const base::string16& formatted_url) { - EXPECT_EQ(expected, actual) << "Original URL: " << original_url - << " (at char " << position << ")\nFormatted URL: " << formatted_url; -} - -void CheckAdjustedOffsets(const std::string& url_string, - const std::string& languages, - FormatUrlTypes format_types, - net::UnescapeRule::Type unescape_rules, - const size_t* output_offsets) { - GURL url(url_string); - size_t url_length = url_string.length(); - std::vector<size_t> offsets; - for (size_t i = 0; i <= url_length + 1; ++i) - offsets.push_back(i); - offsets.push_back(500000); // Something larger than any input length. - offsets.push_back(std::string::npos); - base::string16 formatted_url = FormatUrlWithOffsets(url, languages, - format_types, unescape_rules, NULL, NULL, &offsets); - for (size_t i = 0; i < url_length; ++i) - VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); - VerboseExpect(formatted_url.length(), offsets[url_length], url_string, - url_length, formatted_url); - VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, - 500000, formatted_url); - VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, - std::string::npos, formatted_url); -} - -TEST(UrlFormatterTest, IDNToUnicodeFast) { - for (size_t i = 0; i < arraysize(idn_cases); i++) { - for (size_t j = 0; j < arraysize(kLanguages); j++) { - // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow - if (j == 3 || j == 17 || j == 18) - continue; - base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); - base::string16 expected(idn_cases[i].unicode_allowed[j] ? - WideToUTF16(idn_cases[i].unicode_output) : - ASCIIToUTF16(idn_cases[i].input)); - AppendLanguagesToOutputs(kLanguages[j], &expected, &output); - EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input - << "\", languages: \"" << kLanguages[j] - << "\""; - } - } -} - -TEST(UrlFormatterTest, IDNToUnicodeSlow) { - for (size_t i = 0; i < arraysize(idn_cases); i++) { - for (size_t j = 0; j < arraysize(kLanguages); j++) { - // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast - if (!(j == 3 || j == 17 || j == 18)) - continue; - base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); - base::string16 expected(idn_cases[i].unicode_allowed[j] ? - WideToUTF16(idn_cases[i].unicode_output) : - ASCIIToUTF16(idn_cases[i].input)); - AppendLanguagesToOutputs(kLanguages[j], &expected, &output); - EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input - << "\", languages: \"" << kLanguages[j] - << "\""; - } - } -} - -// ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and -// te), which was causing a crash (See http://crbug.com/510551). This may be an -// icu bug, but regardless, that should not cause a crash. -TEST(UrlFormatterTest, IDNToUnicodeNeverCrashes) { - for (char c1 = 'a'; c1 <= 'z'; c1++) { - for (char c2 = 'a'; c2 <= 'z'; c2++) { - std::string lang = base::StringPrintf("%c%c", c1, c2); - base::string16 output(IDNToUnicode("xn--74h", lang)); - } - } -} - -TEST(UrlFormatterTest, FormatUrl) { - FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; - const UrlTestData tests[] = { - {"Empty URL", "", "", default_format_type, net::UnescapeRule::NORMAL, L"", - 0}, - - {"Simple URL", "http://www.google.com/", "", default_format_type, - net::UnescapeRule::NORMAL, L"http://www.google.com/", 7}, - - {"With a port number and a reference", - "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, - net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, - - // -------- IDN tests -------- - {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", "ja", - default_format_type, net::UnescapeRule::NORMAL, - L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, - - {"Japanese IDN with en", "http://xn--l8jvb1ey91xtjb.jp", "en", - default_format_type, net::UnescapeRule::NORMAL, - L"http://xn--l8jvb1ey91xtjb.jp/", 7}, - - {"Japanese IDN without any languages", "http://xn--l8jvb1ey91xtjb.jp", "", - default_format_type, net::UnescapeRule::NORMAL, - // Single script is safe for empty languages. - L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, - - {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", - default_format_type, net::UnescapeRule::NORMAL, - // GURL doesn't assume an email address's domain part as a host name. - L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, - - {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys", - "ja", default_format_type, net::UnescapeRule::NORMAL, - L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, - - {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", - default_format_type, net::UnescapeRule::NORMAL, - L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, - - // -------- omit_username_password flag tests -------- - {"With username and password, omit_username_password=false", - "http://user:passwd@example.com/foo", "", kFormatUrlOmitNothing, - net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, - - {"With username and password, omit_username_password=true", - "http://user:passwd@example.com/foo", "", default_format_type, - net::UnescapeRule::NORMAL, L"http://example.com/foo", 7}, - - {"With username and no password", "http://user@example.com/foo", "", - default_format_type, net::UnescapeRule::NORMAL, - L"http://example.com/foo", 7}, - - {"Just '@' without username and password", "http://@example.com/foo", "", - default_format_type, net::UnescapeRule::NORMAL, - L"http://example.com/foo", 7}, - - // GURL doesn't think local-part of an email address is username for URL. - {"mailto:, omit_username_password=true", "mailto:foo@example.com", "", - default_format_type, net::UnescapeRule::NORMAL, - L"mailto:foo@example.com", 7}, - - // -------- unescape flag tests -------- - {"Do not unescape", - "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" - "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" - "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", - "en", default_format_type, net::UnescapeRule::NONE, - // GURL parses %-encoded hostnames into Punycode. - L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" - L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", - 7}, - - {"Unescape normally", - "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" - "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" - "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", - "en", default_format_type, net::UnescapeRule::NORMAL, - L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" - L"?q=\x30B0\x30FC\x30B0\x30EB", - 7}, - - {"Unescape normally with BiDi control character", - "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", - default_format_type, net::UnescapeRule::NORMAL, - L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, - - {"Unescape normally including unescape spaces", - "http://www.google.com/search?q=Hello%20World", "en", - default_format_type, net::UnescapeRule::SPACES, - L"http://www.google.com/search?q=Hello World", 7}, - - /* - {"unescape=true with some special characters", - "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", - kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, - L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, - */ - // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". - - // -------- omit http: -------- - {"omit http with user name", "http://user@example.com/foo", "", - kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0}, - - {"omit http", "http://www.google.com/", "en", kFormatUrlOmitHTTP, - net::UnescapeRule::NORMAL, L"www.google.com/", 0}, - - {"omit http with https", "https://www.google.com/", "en", - kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, - L"https://www.google.com/", 8}, - - {"omit http starts with ftp.", "http://ftp.google.com/", "en", - kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/", - 7}, - - // -------- omit trailing slash on bare hostname -------- - {"omit slash when it's the entire path", "http://www.google.com/", "en", - kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, - L"http://www.google.com", 7}, - {"omit slash when there's a ref", "http://www.google.com/#ref", "en", - kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, - L"http://www.google.com/#ref", 7}, - {"omit slash when there's a query", "http://www.google.com/?", "en", - kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, - L"http://www.google.com/?", 7}, - {"omit slash when it's not the entire path", "http://www.google.com/foo", - "en", kFormatUrlOmitTrailingSlashOnBareHostname, - net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7}, - {"omit slash for nonstandard URLs", "data:/", "en", - kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, - L"data:/", 5}, - {"omit slash for file URLs", "file:///", "en", - kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, - L"file:///", 7}, - - // -------- view-source: -------- - {"view-source", "view-source:http://xn--qcka1pmc.jp/", "ja", - default_format_type, net::UnescapeRule::NORMAL, - L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19}, - - {"view-source of view-source", - "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", - default_format_type, net::UnescapeRule::NORMAL, - L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, - - // view-source should omit http and trailing slash where non-view-source - // would. - {"view-source omit http", "view-source:http://a.b/c", "en", - kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12}, - {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c", - "en", kFormatUrlOmitAll, net::UnescapeRule::NORMAL, - L"view-source:http://ftp.b/c", 19}, - {"view-source omit slash when it's the entire path", - "view-source:http://a.b/", "en", kFormatUrlOmitAll, - net::UnescapeRule::NORMAL, L"view-source:a.b", 12}, - }; - - for (size_t i = 0; i < arraysize(tests); ++i) { - size_t prefix_len; - base::string16 formatted = FormatUrl( - GURL(tests[i].input), tests[i].languages, tests[i].format_types, - tests[i].escape_rules, NULL, &prefix_len, NULL); - EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; - EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; - } -} - -TEST(UrlFormatterTest, FormatUrlParsed) { - // No unescape case. - url::Parsed parsed; - base::string16 formatted = - FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" - "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - "ja", kFormatUrlOmitNothing, net::UnescapeRule::NONE, &parsed, - NULL, NULL); - EXPECT_EQ(WideToUTF16( - L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" - L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); - EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), - formatted.substr(parsed.username.begin, parsed.username.len)); - EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), - formatted.substr(parsed.password.begin, parsed.password.len)); - EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"8080"), - formatted.substr(parsed.port.begin, parsed.port.len)); - EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"), - formatted.substr(parsed.path.begin, parsed.path.len)); - EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"), - formatted.substr(parsed.query.begin, parsed.query.len)); - EXPECT_EQ(WideToUTF16(L"\x30B0"), - formatted.substr(parsed.ref.begin, parsed.ref.len)); - - // Unescape case. - formatted = - FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" - "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - "ja", kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, &parsed, - NULL, NULL); - EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" - L"/\x30B0/?q=\x30B0#\x30B0"), formatted); - EXPECT_EQ(WideToUTF16(L"\x30B0"), - formatted.substr(parsed.username.begin, parsed.username.len)); - EXPECT_EQ(WideToUTF16(L"\x30FC"), - formatted.substr(parsed.password.begin, parsed.password.len)); - EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"8080"), - formatted.substr(parsed.port.begin, parsed.port.len)); - EXPECT_EQ(WideToUTF16(L"/\x30B0/"), - formatted.substr(parsed.path.begin, parsed.path.len)); - EXPECT_EQ(WideToUTF16(L"q=\x30B0"), - formatted.substr(parsed.query.begin, parsed.query.len)); - EXPECT_EQ(WideToUTF16(L"\x30B0"), - formatted.substr(parsed.ref.begin, parsed.ref.len)); - - // Omit_username_password + unescape case. - formatted = - FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" - "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - "ja", kFormatUrlOmitUsernamePassword, net::UnescapeRule::NORMAL, - &parsed, NULL, NULL); - EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" - L"/\x30B0/?q=\x30B0#\x30B0"), formatted); - EXPECT_FALSE(parsed.username.is_valid()); - EXPECT_FALSE(parsed.password.is_valid()); - EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"8080"), - formatted.substr(parsed.port.begin, parsed.port.len)); - EXPECT_EQ(WideToUTF16(L"/\x30B0/"), - formatted.substr(parsed.path.begin, parsed.path.len)); - EXPECT_EQ(WideToUTF16(L"q=\x30B0"), - formatted.substr(parsed.query.begin, parsed.query.len)); - EXPECT_EQ(WideToUTF16(L"\x30B0"), - formatted.substr(parsed.ref.begin, parsed.ref.len)); - - // View-source case. - formatted = - FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"), - std::string(), kFormatUrlOmitUsernamePassword, - net::UnescapeRule::NORMAL, &parsed, NULL, NULL); - EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"), - formatted); - EXPECT_EQ(WideToUTF16(L"view-source:http"), - formatted.substr(parsed.scheme.begin, parsed.scheme.len)); - EXPECT_FALSE(parsed.username.is_valid()); - EXPECT_FALSE(parsed.password.is_valid()); - EXPECT_EQ(WideToUTF16(L"host"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"81"), - formatted.substr(parsed.port.begin, parsed.port.len)); - EXPECT_EQ(WideToUTF16(L"/path"), - formatted.substr(parsed.path.begin, parsed.path.len)); - EXPECT_EQ(WideToUTF16(L"query"), - formatted.substr(parsed.query.begin, parsed.query.len)); - EXPECT_EQ(WideToUTF16(L"ref"), - formatted.substr(parsed.ref.begin, parsed.ref.len)); - - // omit http case. - formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"), std::string(), - kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, &parsed, - NULL, NULL); - EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted); - EXPECT_FALSE(parsed.scheme.is_valid()); - EXPECT_FALSE(parsed.username.is_valid()); - EXPECT_FALSE(parsed.password.is_valid()); - EXPECT_EQ(WideToUTF16(L"host"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"8000"), - formatted.substr(parsed.port.begin, parsed.port.len)); - EXPECT_EQ(WideToUTF16(L"/a"), - formatted.substr(parsed.path.begin, parsed.path.len)); - EXPECT_EQ(WideToUTF16(L"b=c"), - formatted.substr(parsed.query.begin, parsed.query.len)); - EXPECT_EQ(WideToUTF16(L"d"), - formatted.substr(parsed.ref.begin, parsed.ref.len)); - - // omit http starts with ftp case. - formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"), std::string(), - kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, &parsed, - NULL, NULL); - EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted); - EXPECT_TRUE(parsed.scheme.is_valid()); - EXPECT_FALSE(parsed.username.is_valid()); - EXPECT_FALSE(parsed.password.is_valid()); - EXPECT_EQ(WideToUTF16(L"http"), - formatted.substr(parsed.scheme.begin, parsed.scheme.len)); - EXPECT_EQ(WideToUTF16(L"ftp.host"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"8000"), - formatted.substr(parsed.port.begin, parsed.port.len)); - EXPECT_EQ(WideToUTF16(L"/a"), - formatted.substr(parsed.path.begin, parsed.path.len)); - EXPECT_EQ(WideToUTF16(L"b=c"), - formatted.substr(parsed.query.begin, parsed.query.len)); - EXPECT_EQ(WideToUTF16(L"d"), - formatted.substr(parsed.ref.begin, parsed.ref.len)); - - // omit http starts with 'f' case. - formatted = FormatUrl(GURL("http://f/"), std::string(), kFormatUrlOmitHTTP, - net::UnescapeRule::NORMAL, &parsed, NULL, NULL); - EXPECT_EQ(WideToUTF16(L"f/"), formatted); - EXPECT_FALSE(parsed.scheme.is_valid()); - EXPECT_FALSE(parsed.username.is_valid()); - EXPECT_FALSE(parsed.password.is_valid()); - EXPECT_FALSE(parsed.port.is_valid()); - EXPECT_TRUE(parsed.path.is_valid()); - EXPECT_FALSE(parsed.query.is_valid()); - EXPECT_FALSE(parsed.ref.is_valid()); - EXPECT_EQ(WideToUTF16(L"f"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"/"), - formatted.substr(parsed.path.begin, parsed.path.len)); -} - -// Make sure that calling FormatUrl on a GURL and then converting back to a GURL -// results in the original GURL, for each ASCII character in the path. -TEST(UrlFormatterTest, FormatUrlRoundTripPathASCII) { - for (unsigned char test_char = 32; test_char < 128; ++test_char) { - GURL url(std::string("http://www.google.com/") + - static_cast<char>(test_char)); - size_t prefix_len; - base::string16 formatted = - FormatUrl(url, std::string(), kFormatUrlOmitUsernamePassword, - net::UnescapeRule::NORMAL, NULL, &prefix_len, NULL); - EXPECT_EQ(url.spec(), GURL(formatted).spec()); - } -} - -// Make sure that calling FormatUrl on a GURL and then converting back to a GURL -// results in the original GURL, for each escaped ASCII character in the path. -TEST(UrlFormatterTest, FormatUrlRoundTripPathEscaped) { - for (unsigned char test_char = 32; test_char < 128; ++test_char) { - std::string original_url("http://www.google.com/"); - original_url.push_back('%'); - original_url.append(base::HexEncode(&test_char, 1)); - - GURL url(original_url); - size_t prefix_len; - base::string16 formatted = - FormatUrl(url, std::string(), kFormatUrlOmitUsernamePassword, - net::UnescapeRule::NORMAL, NULL, &prefix_len, NULL); - EXPECT_EQ(url.spec(), GURL(formatted).spec()); - } -} - -// Make sure that calling FormatUrl on a GURL and then converting back to a GURL -// results in the original GURL, for each ASCII character in the query. -TEST(UrlFormatterTest, FormatUrlRoundTripQueryASCII) { - for (unsigned char test_char = 32; test_char < 128; ++test_char) { - GURL url(std::string("http://www.google.com/?") + - static_cast<char>(test_char)); - size_t prefix_len; - base::string16 formatted = - FormatUrl(url, std::string(), kFormatUrlOmitUsernamePassword, - net::UnescapeRule::NORMAL, NULL, &prefix_len, NULL); - EXPECT_EQ(url.spec(), GURL(formatted).spec()); - } -} - -// Make sure that calling FormatUrl on a GURL and then converting back to a GURL -// only results in a different GURL for certain characters. -TEST(UrlFormatterTest, FormatUrlRoundTripQueryEscaped) { - // A full list of characters which FormatURL should unescape and GURL should - // not escape again, when they appear in a query string. - const char kUnescapedCharacters[] = - "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~"; - for (unsigned char test_char = 0; test_char < 128; ++test_char) { - std::string original_url("http://www.google.com/?"); - original_url.push_back('%'); - original_url.append(base::HexEncode(&test_char, 1)); - - GURL url(original_url); - size_t prefix_len; - base::string16 formatted = - FormatUrl(url, std::string(), kFormatUrlOmitUsernamePassword, - net::UnescapeRule::NORMAL, NULL, &prefix_len, NULL); - - if (test_char && - strchr(kUnescapedCharacters, static_cast<char>(test_char))) { - EXPECT_NE(url.spec(), GURL(formatted).spec()); - } else { - EXPECT_EQ(url.spec(), GURL(formatted).spec()); - } - } -} - -TEST(UrlFormatterTest, FormatUrlWithOffsets) { - CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, - net::UnescapeRule::NORMAL, NULL); - - const size_t basic_offsets[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25 - }; - CheckAdjustedOffsets("http://www.google.com/foo/", "en", - kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, - basic_offsets); - - const size_t omit_auth_offsets_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, - 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 - }; - CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", - kFormatUrlOmitUsernamePassword, - net::UnescapeRule::NORMAL, omit_auth_offsets_1); - - const size_t omit_auth_offsets_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21 - }; - CheckAdjustedOffsets("http://foo@www.google.com/", "en", - kFormatUrlOmitUsernamePassword, - net::UnescapeRule::NORMAL, omit_auth_offsets_2); - - const size_t dont_omit_auth_offsets[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31 - }; - // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". - CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", - kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, - dont_omit_auth_offsets); - - const size_t view_source_offsets[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, - kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 - }; - CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", - kFormatUrlOmitUsernamePassword, - net::UnescapeRule::NORMAL, view_source_offsets); - - const size_t idn_hostname_offsets_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, - 13, 14, 15, 16, 17, 18, 19 - }; - // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". - CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", - kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, - idn_hostname_offsets_1); - - const size_t idn_hostname_offsets_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, 19, 20, 21, 22, 23, 24 - }; - // Convert punycode to - // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". - CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", - "zh-CN", kFormatUrlOmitNothing, - net::UnescapeRule::NORMAL, idn_hostname_offsets_2); - - const size_t unescape_offsets[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos - }; - // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". - CheckAdjustedOffsets( - "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", - "en", kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); - - const size_t ref_offsets[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, - 33 - }; - // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". - CheckAdjustedOffsets( - "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", - kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets); - - const size_t omit_http_offsets[] = { - 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14 - }; - CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, - net::UnescapeRule::NORMAL, omit_http_offsets); - - const size_t omit_http_start_with_ftp_offsets[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 - }; - CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, - net::UnescapeRule::NORMAL, - omit_http_start_with_ftp_offsets); - - const size_t omit_all_offsets[] = { - 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, - 0, 1, 2, 3, 4, 5, 6, 7 - }; - CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, - net::UnescapeRule::NORMAL, omit_all_offsets); -} - -} // namespace - -} // namespace url_formatter |