diff options
Diffstat (limited to 'components/url_fixer')
-rw-r--r-- | components/url_fixer/BUILD.gn | 18 | ||||
-rw-r--r-- | components/url_fixer/DEPS | 3 | ||||
-rw-r--r-- | components/url_fixer/OWNERS | 1 | ||||
-rw-r--r-- | components/url_fixer/url_fixer.cc | 681 | ||||
-rw-r--r-- | components/url_fixer/url_fixer.h | 87 | ||||
-rw-r--r-- | components/url_fixer/url_fixer_unittest.cc | 536 |
6 files changed, 1326 insertions, 0 deletions
diff --git a/components/url_fixer/BUILD.gn b/components/url_fixer/BUILD.gn new file mode 100644 index 0000000..93eac1f3 --- /dev/null +++ b/components/url_fixer/BUILD.gn @@ -0,0 +1,18 @@ +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +static_library("url_fixer") { + sources = [ + "url_fixer.cc", + "url_fixer.h", + ] + + # TODO(jschuh): crbug.com/167187 fix size_t to int truncations. + configs += [ "//build/config/compiler:no_size_t_to_int_warning" ] + + deps = [ + "//base", + "//net", + ] +} diff --git a/components/url_fixer/DEPS b/components/url_fixer/DEPS new file mode 100644 index 0000000..8fa9d48 --- /dev/null +++ b/components/url_fixer/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+net", +] diff --git a/components/url_fixer/OWNERS b/components/url_fixer/OWNERS new file mode 100644 index 0000000..bf426d6 --- /dev/null +++ b/components/url_fixer/OWNERS @@ -0,0 +1 @@ +pkasting@chromium.org diff --git a/components/url_fixer/url_fixer.cc b/components/url_fixer/url_fixer.cc new file mode 100644 index 0000000..c34ac90 --- /dev/null +++ b/components/url_fixer/url_fixer.cc @@ -0,0 +1,681 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/url_fixer/url_fixer.h" + +#include <algorithm> + +#include "base/files/file_path.h" +#include "base/files/file_util.h" +#include "base/logging.h" +#if defined(OS_POSIX) +#include "base/path_service.h" +#endif +#include "base/strings/string_util.h" +#include "base/strings/utf_string_conversions.h" +#include "net/base/escape.h" +#include "net/base/filename_util.h" +#include "net/base/net_util.h" +#include "net/base/registry_controlled_domains/registry_controlled_domain.h" +#include "url/third_party/mozilla/url_parse.h" +#include "url/url_file.h" +#include "url/url_util.h" + +const char* url_fixer::home_directory_override = NULL; + +namespace { + +// Hardcode these constants to avoid dependences on //chrome and //content. +const char kChromeUIScheme[] = "chrome"; +const char kChromeUIDefaultHost[] = "version"; +const char kViewSourceScheme[] = "view-source"; + +// TODO(estade): Remove these ugly, ugly functions. They are only used in +// SegmentURL. A url::Parsed object keeps track of a bunch of indices into +// a url string, and these need to be updated when the URL is converted from +// UTF8 to UTF16. Instead of this after-the-fact adjustment, we should parse it +// in the correct string format to begin with. +url::Component UTF8ComponentToUTF16Component( + const std::string& text_utf8, + const url::Component& component_utf8) { + if (component_utf8.len == -1) + return url::Component(); + + std::string before_component_string = + text_utf8.substr(0, component_utf8.begin); + std::string component_string = + text_utf8.substr(component_utf8.begin, component_utf8.len); + base::string16 before_component_string_16 = + base::UTF8ToUTF16(before_component_string); + base::string16 component_string_16 = base::UTF8ToUTF16(component_string); + url::Component component_16(before_component_string_16.length(), + component_string_16.length()); + return component_16; +} + +void UTF8PartsToUTF16Parts(const std::string& text_utf8, + const url::Parsed& parts_utf8, + url::Parsed* parts) { + if (base::IsStringASCII(text_utf8)) { + *parts = parts_utf8; + return; + } + + parts->scheme = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.scheme); + parts->username = + UTF8ComponentToUTF16Component(text_utf8, parts_utf8.username); + parts->password = + UTF8ComponentToUTF16Component(text_utf8, parts_utf8.password); + parts->host = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.host); + parts->port = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.port); + parts->path = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.path); + parts->query = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.query); + parts->ref = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.ref); +} + +base::TrimPositions TrimWhitespaceUTF8(const std::string& input, + base::TrimPositions positions, + std::string* output) { + // This implementation is not so fast since it converts the text encoding + // twice. Please feel free to file a bug if this function hurts the + // performance of Chrome. + DCHECK(base::IsStringUTF8(input)); + base::string16 input16 = base::UTF8ToUTF16(input); + base::string16 output16; + base::TrimPositions result = + base::TrimWhitespace(input16, positions, &output16); + *output = base::UTF16ToUTF8(output16); + return result; +} + +// does some basic fixes for input that we want to test for file-ness +void PrepareStringForFileOps(const base::FilePath& text, + base::FilePath::StringType* output) { +#if defined(OS_WIN) + base::TrimWhitespace(text.value(), base::TRIM_ALL, output); + replace(output->begin(), output->end(), '/', '\\'); +#else + TrimWhitespaceUTF8(text.value(), base::TRIM_ALL, output); +#endif +} + +// Tries to create a full path from |text|. If the result is valid and the +// file exists, returns true and sets |full_path| to the result. Otherwise, +// returns false and leaves |full_path| unchanged. +bool ValidPathForFile(const base::FilePath::StringType& text, + base::FilePath* full_path) { + base::FilePath file_path = base::MakeAbsoluteFilePath(base::FilePath(text)); + if (file_path.empty()) + return false; + + if (!base::PathExists(file_path)) + return false; + + *full_path = file_path; + return true; +} + +#if defined(OS_POSIX) +// Given a path that starts with ~, return a path that starts with an +// expanded-out /user/foobar directory. +std::string FixupHomedir(const std::string& text) { + DCHECK(text.length() > 0 && text[0] == '~'); + + if (text.length() == 1 || text[1] == '/') { + base::FilePath file_path; + if (url_fixer::home_directory_override) + file_path = base::FilePath(url_fixer::home_directory_override); + else + PathService::Get(base::DIR_HOME, &file_path); + + // We'll probably break elsewhere if $HOME is undefined, but check here + // just in case. + if (file_path.value().empty()) + return text; + // Append requires to be a relative path, so we have to cut all preceeding + // '/' characters. + size_t i = 1; + while (i < text.length() && text[i] == '/') + ++i; + return file_path.Append(text.substr(i)).value(); + } + +// Otherwise, this is a path like ~foobar/baz, where we must expand to +// user foobar's home directory. Officially, we should use getpwent(), +// but that is a nasty blocking call. + +#if defined(OS_MACOSX) + static const char kHome[] = "/Users/"; +#else + static const char kHome[] = "/home/"; +#endif + return kHome + text.substr(1); +} +#endif + +// Tries to create a file: URL from |text| if it looks like a filename, even if +// it doesn't resolve as a valid path or to an existing file. Returns a +// (possibly invalid) file: URL in |fixed_up_url| for input beginning +// with a drive specifier or "\\". Returns the unchanged input in other cases +// (including file: URLs: these don't look like filenames). +std::string FixupPath(const std::string& text) { + DCHECK(!text.empty()); + + base::FilePath::StringType filename; +#if defined(OS_WIN) + base::FilePath input_path(base::UTF8ToWide(text)); + PrepareStringForFileOps(input_path, &filename); + + // Fixup Windows-style drive letters, where "C:" gets rewritten to "C|". + if (filename.length() > 1 && filename[1] == '|') + filename[1] = ':'; +#elif defined(OS_POSIX) + base::FilePath input_path(text); + PrepareStringForFileOps(input_path, &filename); + if (filename.length() > 0 && filename[0] == '~') + filename = FixupHomedir(filename); +#endif + + // Here, we know the input looks like a file. + GURL file_url = net::FilePathToFileURL(base::FilePath(filename)); + if (file_url.is_valid()) { + return base::UTF16ToUTF8(net::FormatUrl(file_url, + std::string(), + net::kFormatUrlOmitUsernamePassword, + net::UnescapeRule::NORMAL, + NULL, + NULL, + NULL)); + } + + // Invalid file URL, just return the input. + return text; +} + +// Checks |domain| to see if a valid TLD is already present. If not, appends +// |desired_tld| to the domain, and prepends "www." unless it's already present. +void AddDesiredTLD(const std::string& desired_tld, std::string* domain) { + if (desired_tld.empty() || domain->empty()) + return; + + // Check the TLD. If the return value is positive, we already have a TLD, so + // abort. If the return value is std::string::npos, there's no valid host, + // but we can try to append a TLD anyway, since the host may become valid once + // the TLD is attached -- for example, "999999999999" is detected as a broken + // IP address and marked invalid, but attaching ".com" makes it legal. When + // the return value is 0, there's a valid host with no known TLD, so we can + // definitely append the user's TLD. We disallow unknown registries here so + // users can input "mail.yahoo" and hit ctrl-enter to get + // "www.mail.yahoo.com". + const size_t registry_length = + net::registry_controlled_domains::GetRegistryLength( + *domain, + net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, + net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); + if ((registry_length != 0) && (registry_length != std::string::npos)) + return; + + // Add the suffix at the end of the domain. + const size_t domain_length(domain->length()); + DCHECK_GT(domain_length, 0U); + DCHECK_NE(desired_tld[0], '.'); + if ((*domain)[domain_length - 1] != '.') + domain->push_back('.'); + domain->append(desired_tld); + + // Now, if the domain begins with "www.", stop. + const std::string prefix("www."); + if (domain->compare(0, prefix.length(), prefix) != 0) { + // Otherwise, add www. to the beginning of the URL. + domain->insert(0, prefix); + } +} + +inline void FixupUsername(const std::string& text, + const url::Component& part, + std::string* url) { + if (!part.is_valid()) + return; + + // We don't fix up the username at the moment. + url->append(text, part.begin, part.len); + // Do not append the trailing '@' because we might need to include the user's + // password. FixupURL itself will append the '@' for us. +} + +inline void FixupPassword(const std::string& text, + const url::Component& part, + std::string* url) { + if (!part.is_valid()) + return; + + // We don't fix up the password at the moment. + url->append(":"); + url->append(text, part.begin, part.len); +} + +void FixupHost(const std::string& text, + const url::Component& part, + bool has_scheme, + const std::string& desired_tld, + std::string* url) { + if (!part.is_valid()) + return; + + // Make domain valid. + // Strip all leading dots and all but one trailing dot, unless the user only + // typed dots, in which case their input is totally invalid and we should just + // leave it unchanged. + std::string domain(text, part.begin, part.len); + const size_t first_nondot(domain.find_first_not_of('.')); + if (first_nondot != std::string::npos) { + domain.erase(0, first_nondot); + size_t last_nondot(domain.find_last_not_of('.')); + DCHECK(last_nondot != std::string::npos); + last_nondot += 2; // Point at second period in ending string + if (last_nondot < domain.length()) + domain.erase(last_nondot); + } + + // Add any user-specified TLD, if applicable. + AddDesiredTLD(desired_tld, &domain); + + url->append(domain); +} + +void FixupPort(const std::string& text, + const url::Component& part, + std::string* url) { + if (!part.is_valid()) + return; + + // We don't fix up the port at the moment. + url->append(":"); + url->append(text, part.begin, part.len); +} + +inline void FixupPath(const std::string& text, + const url::Component& part, + std::string* url) { + if (!part.is_valid() || part.len == 0) { + // We should always have a path. + url->append("/"); + return; + } + + // Append the path as is. + url->append(text, part.begin, part.len); +} + +inline void FixupQuery(const std::string& text, + const url::Component& part, + std::string* url) { + if (!part.is_valid()) + return; + + // We don't fix up the query at the moment. + url->append("?"); + url->append(text, part.begin, part.len); +} + +inline void FixupRef(const std::string& text, + const url::Component& part, + std::string* url) { + if (!part.is_valid()) + return; + + // We don't fix up the ref at the moment. + url->append("#"); + url->append(text, part.begin, part.len); +} + +bool HasPort(const std::string& original_text, + const url::Component& scheme_component) { + // Find the range between the ":" and the "/". + size_t port_start = scheme_component.end() + 1; + size_t port_end = port_start; + while ((port_end < original_text.length()) && + !url::IsAuthorityTerminator(original_text[port_end])) + ++port_end; + if (port_end == port_start) + return false; + + // Scan the range to see if it is entirely digits. + for (size_t i = port_start; i < port_end; ++i) { + if (!base::IsAsciiDigit(original_text[i])) + return false; + } + + return true; +} + +// Try to extract a valid scheme from the beginning of |text|. +// If successful, set |scheme_component| to the text range where the scheme +// was located, and fill |canon_scheme| with its canonicalized form. +// Otherwise, return false and leave the outputs in an indeterminate state. +bool GetValidScheme(const std::string& text, + url::Component* scheme_component, + std::string* canon_scheme) { + canon_scheme->clear(); + + // Locate everything up to (but not including) the first ':' + if (!url::ExtractScheme( + text.data(), static_cast<int>(text.length()), scheme_component)) { + return false; + } + + // Make sure the scheme contains only valid characters, and convert + // to lowercase. This also catches IPv6 literals like [::1], because + // brackets are not in the whitelist. + url::StdStringCanonOutput canon_scheme_output(canon_scheme); + url::Component canon_scheme_component; + if (!url::CanonicalizeScheme(text.data(), + *scheme_component, + &canon_scheme_output, + &canon_scheme_component)) { + return false; + } + + // Strip the ':', and any trailing buffer space. + DCHECK_EQ(0, canon_scheme_component.begin); + canon_scheme->erase(canon_scheme_component.len); + + // We need to fix up the segmentation for "www.example.com:/". For this + // case, we guess that schemes with a "." are not actually schemes. + if (canon_scheme->find('.') != std::string::npos) + return false; + + // We need to fix up the segmentation for "www:123/". For this case, we + // will add an HTTP scheme later and make the URL parser happy. + // TODO(pkasting): Maybe we should try to use GURL's parser for this? + if (HasPort(text, *scheme_component)) + return false; + + // Everything checks out. + return true; +} + +// Performs the work for url_fixer::SegmentURL. |text| may be modified on +// output on success: a semicolon following a valid scheme is replaced with a +// colon. +std::string SegmentURLInternal(std::string* text, url::Parsed* parts) { + // Initialize the result. + *parts = url::Parsed(); + + std::string trimmed; + TrimWhitespaceUTF8(*text, base::TRIM_ALL, &trimmed); + if (trimmed.empty()) + return std::string(); // Nothing to segment. + +#if defined(OS_WIN) + int trimmed_length = static_cast<int>(trimmed.length()); + if (url::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) || + url::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, true)) + return "file"; +#elif defined(OS_POSIX) + if (base::FilePath::IsSeparator(trimmed.data()[0]) || + trimmed.data()[0] == '~') + return "file"; +#endif + + // Otherwise, we need to look at things carefully. + std::string scheme; + if (!GetValidScheme(*text, &parts->scheme, &scheme)) { + // Try again if there is a ';' in the text. If changing it to a ':' results + // in a scheme being found, continue processing with the modified text. + bool found_scheme = false; + size_t semicolon = text->find(';'); + if (semicolon != 0 && semicolon != std::string::npos) { + (*text)[semicolon] = ':'; + if (GetValidScheme(*text, &parts->scheme, &scheme)) + found_scheme = true; + else + (*text)[semicolon] = ';'; + } + if (!found_scheme) { + // Couldn't determine the scheme, so just pick one. + parts->scheme.reset(); + scheme = base::StartsWith(*text, "ftp.", + base::CompareCase::INSENSITIVE_ASCII) ? + url::kFtpScheme : url::kHttpScheme; + } + } + + // Proceed with about and chrome schemes, but not file or nonstandard schemes. + if ((scheme != url::kAboutScheme) && (scheme != kChromeUIScheme) && + ((scheme == url::kFileScheme) || + !url::IsStandard( + scheme.c_str(), + url::Component(0, static_cast<int>(scheme.length()))))) { + return scheme; + } + + if (scheme == url::kFileSystemScheme) { + // Have the GURL parser do the heavy lifting for us. + url::ParseFileSystemURL( + text->data(), static_cast<int>(text->length()), parts); + return scheme; + } + + if (parts->scheme.is_valid()) { + // Have the GURL parser do the heavy lifting for us. + url::ParseStandardURL( + text->data(), static_cast<int>(text->length()), parts); + return scheme; + } + + // We need to add a scheme in order for ParseStandardURL to be happy. + // Find the first non-whitespace character. + std::string::iterator first_nonwhite = text->begin(); + while ((first_nonwhite != text->end()) && + base::IsUnicodeWhitespace(*first_nonwhite)) + ++first_nonwhite; + + // Construct the text to parse by inserting the scheme. + std::string inserted_text(scheme); + inserted_text.append(url::kStandardSchemeSeparator); + std::string text_to_parse(text->begin(), first_nonwhite); + text_to_parse.append(inserted_text); + text_to_parse.append(first_nonwhite, text->end()); + + // Have the GURL parser do the heavy lifting for us. + url::ParseStandardURL( + text_to_parse.data(), static_cast<int>(text_to_parse.length()), parts); + + // Offset the results of the parse to match the original text. + const int offset = -static_cast<int>(inserted_text.length()); + url_fixer::OffsetComponent(offset, &parts->scheme); + url_fixer::OffsetComponent(offset, &parts->username); + url_fixer::OffsetComponent(offset, &parts->password); + url_fixer::OffsetComponent(offset, &parts->host); + url_fixer::OffsetComponent(offset, &parts->port); + url_fixer::OffsetComponent(offset, &parts->path); + url_fixer::OffsetComponent(offset, &parts->query); + url_fixer::OffsetComponent(offset, &parts->ref); + + return scheme; +} + +} // namespace + +std::string url_fixer::SegmentURL(const std::string& text, url::Parsed* parts) { + std::string mutable_text(text); + return SegmentURLInternal(&mutable_text, parts); +} + +base::string16 url_fixer::SegmentURL(const base::string16& text, + url::Parsed* parts) { + std::string text_utf8 = base::UTF16ToUTF8(text); + url::Parsed parts_utf8; + std::string scheme_utf8 = SegmentURL(text_utf8, &parts_utf8); + UTF8PartsToUTF16Parts(text_utf8, parts_utf8, parts); + return base::UTF8ToUTF16(scheme_utf8); +} + +GURL url_fixer::FixupURL(const std::string& text, + const std::string& desired_tld) { + std::string trimmed; + TrimWhitespaceUTF8(text, base::TRIM_ALL, &trimmed); + if (trimmed.empty()) + return GURL(); // Nothing here. + + // Segment the URL. + url::Parsed parts; + std::string scheme(SegmentURLInternal(&trimmed, &parts)); + + // For view-source: URLs, we strip "view-source:", do fixup, and stick it back + // on. This allows us to handle things like "view-source:google.com". + if (scheme == kViewSourceScheme) { + // Reject "view-source:view-source:..." to avoid deep recursion. + std::string view_source(kViewSourceScheme + std::string(":")); + if (!base::StartsWith(text, view_source + view_source, + base::CompareCase::INSENSITIVE_ASCII)) { + return GURL(kViewSourceScheme + std::string(":") + + FixupURL(trimmed.substr(scheme.length() + 1), desired_tld) + .possibly_invalid_spec()); + } + } + + // We handle the file scheme separately. + if (scheme == url::kFileScheme) + return GURL(parts.scheme.is_valid() ? text : FixupPath(text)); + + // We handle the filesystem scheme separately. + if (scheme == url::kFileSystemScheme) { + if (parts.inner_parsed() && parts.inner_parsed()->scheme.is_valid()) + return GURL(text); + return GURL(); + } + + // Parse and rebuild about: and chrome: URLs, except about:blank. + bool chrome_url = + !base::LowerCaseEqualsASCII(trimmed, url::kAboutBlankURL) && + ((scheme == url::kAboutScheme) || (scheme == kChromeUIScheme)); + + // For some schemes whose layouts we understand, we rebuild it. + if (chrome_url || + url::IsStandard(scheme.c_str(), + url::Component(0, static_cast<int>(scheme.length())))) { + // Replace the about: scheme with the chrome: scheme. + std::string url(chrome_url ? kChromeUIScheme : scheme); + url.append(url::kStandardSchemeSeparator); + + // We need to check whether the |username| is valid because it is our + // responsibility to append the '@' to delineate the user information from + // the host portion of the URL. + if (parts.username.is_valid()) { + FixupUsername(trimmed, parts.username, &url); + FixupPassword(trimmed, parts.password, &url); + url.append("@"); + } + + FixupHost(trimmed, parts.host, parts.scheme.is_valid(), desired_tld, &url); + if (chrome_url && !parts.host.is_valid()) + url.append(kChromeUIDefaultHost); + FixupPort(trimmed, parts.port, &url); + FixupPath(trimmed, parts.path, &url); + FixupQuery(trimmed, parts.query, &url); + FixupRef(trimmed, parts.ref, &url); + + return GURL(url); + } + + // In the worst-case, we insert a scheme if the URL lacks one. + if (!parts.scheme.is_valid()) { + std::string fixed_scheme(scheme); + fixed_scheme.append(url::kStandardSchemeSeparator); + trimmed.insert(0, fixed_scheme); + } + + return GURL(trimmed); +} + +// The rules are different here than for regular fixup, since we need to handle +// input like "hello.html" and know to look in the current directory. Regular +// fixup will look for cues that it is actually a file path before trying to +// figure out what file it is. If our logic doesn't work, we will fall back on +// regular fixup. +GURL url_fixer::FixupRelativeFile(const base::FilePath& base_dir, + const base::FilePath& text) { + base::FilePath old_cur_directory; + if (!base_dir.empty()) { + // Save the old current directory before we move to the new one. + base::GetCurrentDirectory(&old_cur_directory); + base::SetCurrentDirectory(base_dir); + } + + // Allow funny input with extra whitespace and the wrong kind of slashes. + base::FilePath::StringType trimmed; + PrepareStringForFileOps(text, &trimmed); + + bool is_file = true; + // Avoid recognizing definite non-file URLs as file paths. + GURL gurl(trimmed); + if (gurl.is_valid() && gurl.IsStandard()) + is_file = false; + base::FilePath full_path; + if (is_file && !ValidPathForFile(trimmed, &full_path)) { +// Not a path as entered, try unescaping it in case the user has +// escaped things. We need to go through 8-bit since the escaped values +// only represent 8-bit values. +#if defined(OS_WIN) + std::wstring unescaped = base::UTF8ToWide(net::UnescapeURLComponent( + base::WideToUTF8(trimmed), + net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS)); +#elif defined(OS_POSIX) + std::string unescaped = net::UnescapeURLComponent( + trimmed, + net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); +#endif + + if (!ValidPathForFile(unescaped, &full_path)) + is_file = false; + } + + // Put back the current directory if we saved it. + if (!base_dir.empty()) + base::SetCurrentDirectory(old_cur_directory); + + if (is_file) { + GURL file_url = net::FilePathToFileURL(full_path); + if (file_url.is_valid()) + return GURL( + base::UTF16ToUTF8(net::FormatUrl(file_url, + std::string(), + net::kFormatUrlOmitUsernamePassword, + net::UnescapeRule::NORMAL, + NULL, + NULL, + NULL))); + // Invalid files fall through to regular processing. + } + +// Fall back on regular fixup for this input. +#if defined(OS_WIN) + std::string text_utf8 = base::WideToUTF8(text.value()); +#elif defined(OS_POSIX) + std::string text_utf8 = text.value(); +#endif + return FixupURL(text_utf8, std::string()); +} + +void url_fixer::OffsetComponent(int offset, url::Component* part) { + DCHECK(part); + + if (part->is_valid()) { + // Offset the location of this component. + part->begin += offset; + + // This part might not have existed in the original text. + if (part->begin < 0) + part->reset(); + } +} + +bool url_fixer::IsEquivalentScheme(const std::string& scheme1, + const std::string& scheme2) { + return scheme1 == scheme2 || + (scheme1 == url::kAboutScheme && scheme2 == kChromeUIScheme) || + (scheme1 == kChromeUIScheme && scheme2 == url::kAboutScheme); +} diff --git a/components/url_fixer/url_fixer.h b/components/url_fixer/url_fixer.h new file mode 100644 index 0000000..baf14c6 --- /dev/null +++ b/components/url_fixer/url_fixer.h @@ -0,0 +1,87 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_URL_FIXER_URL_FIXER_H_ +#define COMPONENTS_URL_FIXER_URL_FIXER_H_ + +#include <string> + +#include "base/strings/string16.h" +#include "url/gurl.h" + +namespace base { +class FilePath; +} + +namespace url { +struct Component; +struct Parsed; +} + +// This object is designed to convert various types of input into URLs that we +// know are valid. For example, user typing in the URL bar or command line +// options. This is NOT the place for converting between different types of URLs +// or parsing them, see net_util.h for that. +namespace url_fixer { + +// Segments the given text string into parts of a URL. This is most useful for +// schemes such as http, https, and ftp where |SegmentURL| will find many +// segments. Currently does not segment "file" schemes. +// Returns the canonicalized scheme, or the empty string when |text| is only +// whitespace. +std::string SegmentURL(const std::string& text, url::Parsed* parts); +base::string16 SegmentURL(const base::string16& text, url::Parsed* parts); + +// Converts |text| to a fixed-up URL and returns it. Attempts to make some +// "smart" adjustments to obviously-invalid input where possible. +// |text| may be an absolute path to a file, which will get converted to a +// "file:" URL. +// +// The result will be a "more" valid URL than the input. It may still not be +// valid, so check the return value's validity or use possibly_invalid_spec(). +// +// Schemes "about" and "chrome" are normalized to "chrome://", with slashes. +// "about:blank" is unaltered, as Webkit allows frames to access about:blank. +// Additionally, if a chrome URL does not have a valid host, as in "about:", the +// returned URL will have the host "version", as in "chrome://version". +// +// If |desired_tld| is non-empty, it represents the TLD the user wishes to +// append in the case of an incomplete domain. We check that this is not a file +// path and there does not appear to be a valid TLD already, then append +// |desired_tld| to the domain and prepend "www." (unless it, or a scheme, are +// already present.) This TLD should not have a leading '.' (use "com" instead +// of ".com"). +GURL FixupURL(const std::string& text, const std::string& desired_tld); + +// Converts |text| to a fixed-up URL, allowing it to be a relative path on the +// local filesystem. Begin searching in |base_dir|; if empty, use the current +// working directory. If this resolves to a file on disk, convert it to a +// "file:" URL in |fixed_up_url|; otherwise, fall back to the behavior of +// FixupURL(). +// +// For "regular" input, even if it is possibly a file with a full path, you +// should use FixupURL() directly. This function should only be used when +// relative path handling is desired, as for command line processing. +GURL FixupRelativeFile(const base::FilePath& base_dir, + const base::FilePath& text); + +// Offsets the beginning index of |part| by |offset|, which is allowed to be +// negative. In some cases, the desired component does not exist at the given +// offset. For example, when converting from "http://foo" to "foo", the scheme +// component no longer exists. In such a case, the beginning index is set to 0. +// Does nothing if |part| is invalid. +void OffsetComponent(int offset, url::Component* part); + +// Returns true if |scheme1| is equivalent to |scheme2|. +// Generally this is true if the two schemes are actually identical, but it's +// also true when one scheme is "about" and the other "chrome". +bool IsEquivalentScheme(const std::string& scheme1, const std::string& scheme2); + +// For paths like ~, we use $HOME for the current user's home directory. +// For tests, we allow our idea of $HOME to be overriden by this variable. +extern const char* home_directory_override; + +} // namespace url_fixer + +#endif // COMPONENTS_URL_FIXER_URL_FIXER_H_ diff --git a/components/url_fixer/url_fixer_unittest.cc b/components/url_fixer/url_fixer_unittest.cc new file mode 100644 index 0000000..ee19289 --- /dev/null +++ b/components/url_fixer/url_fixer_unittest.cc @@ -0,0 +1,536 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stdlib.h> + +#include "base/base_paths.h" +#include "base/basictypes.h" +#include "base/files/file_path.h" +#include "base/files/file_util.h" +#include "base/files/scoped_temp_dir.h" +#include "base/strings/string_util.h" +#include "base/strings/utf_string_conversions.h" +#include "components/url_fixer/url_fixer.h" +#include "net/base/filename_util.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "url/gurl.h" +#include "url/third_party/mozilla/url_parse.h" + +namespace url { + +std::ostream& operator<<(std::ostream& os, const Component& part) { + return os << "(begin=" << part.begin << ", len=" << part.len << ")"; +} + +} // namespace url + +struct SegmentCase { + const std::string input; + const std::string result; + const url::Component scheme; + const url::Component username; + const url::Component password; + const url::Component host; + const url::Component port; + const url::Component path; + const url::Component query; + const url::Component ref; +}; + +static const SegmentCase segment_cases[] = { + { "http://www.google.com/", "http", + url::Component(0, 4), // scheme + url::Component(), // username + url::Component(), // password + url::Component(7, 14), // host + url::Component(), // port + url::Component(21, 1), // path + url::Component(), // query + url::Component(), // ref + }, + { "aBoUt:vErSiOn", "about", + url::Component(0, 5), // scheme + url::Component(), // username + url::Component(), // password + url::Component(6, 7), // host + url::Component(), // port + url::Component(), // path + url::Component(), // query + url::Component(), // ref + }, + { "about:host/path?query#ref", "about", + url::Component(0, 5), // scheme + url::Component(), // username + url::Component(), // password + url::Component(6, 4), // host + url::Component(), // port + url::Component(10, 5), // path + url::Component(16, 5), // query + url::Component(22, 3), // ref + }, + { "about://host/path?query#ref", "about", + url::Component(0, 5), // scheme + url::Component(), // username + url::Component(), // password + url::Component(8, 4), // host + url::Component(), // port + url::Component(12, 5), // path + url::Component(18, 5), // query + url::Component(24, 3), // ref + }, + { "chrome:host/path?query#ref", "chrome", + url::Component(0, 6), // scheme + url::Component(), // username + url::Component(), // password + url::Component(7, 4), // host + url::Component(), // port + url::Component(11, 5), // path + url::Component(17, 5), // query + url::Component(23, 3), // ref + }, + { "chrome://host/path?query#ref", "chrome", + url::Component(0, 6), // scheme + url::Component(), // username + url::Component(), // password + url::Component(9, 4), // host + url::Component(), // port + url::Component(13, 5), // path + url::Component(19, 5), // query + url::Component(25, 3), // ref + }, + { " www.google.com:124?foo#", "http", + url::Component(), // scheme + url::Component(), // username + url::Component(), // password + url::Component(4, 14), // host + url::Component(19, 3), // port + url::Component(), // path + url::Component(23, 3), // query + url::Component(27, 0), // ref + }, + { "user@www.google.com", "http", + url::Component(), // scheme + url::Component(0, 4), // username + url::Component(), // password + url::Component(5, 14), // host + url::Component(), // port + url::Component(), // path + url::Component(), // query + url::Component(), // ref + }, + { "ftp:/user:P:a$$Wd@..ftp.google.com...::23///pub?foo#bar", "ftp", + url::Component(0, 3), // scheme + url::Component(5, 4), // username + url::Component(10, 7), // password + url::Component(18, 20), // host + url::Component(39, 2), // port + url::Component(41, 6), // path + url::Component(48, 3), // query + url::Component(52, 3), // ref + }, + { "[2001:db8::1]/path", "http", + url::Component(), // scheme + url::Component(), // username + url::Component(), // password + url::Component(0, 13), // host + url::Component(), // port + url::Component(13, 5), // path + url::Component(), // query + url::Component(), // ref + }, + { "[::1]", "http", + url::Component(), // scheme + url::Component(), // username + url::Component(), // password + url::Component(0, 5), // host + url::Component(), // port + url::Component(), // path + url::Component(), // query + url::Component(), // ref + }, + // Incomplete IPv6 addresses (will not canonicalize). + { "[2001:4860:", "http", + url::Component(), // scheme + url::Component(), // username + url::Component(), // password + url::Component(0, 11), // host + url::Component(), // port + url::Component(), // path + url::Component(), // query + url::Component(), // ref + }, + { "[2001:4860:/foo", "http", + url::Component(), // scheme + url::Component(), // username + url::Component(), // password + url::Component(0, 11), // host + url::Component(), // port + url::Component(11, 4), // path + url::Component(), // query + url::Component(), // ref + }, + { "http://:b005::68]", "http", + url::Component(0, 4), // scheme + url::Component(), // username + url::Component(), // password + url::Component(7, 10), // host + url::Component(), // port + url::Component(), // path + url::Component(), // query + url::Component(), // ref + }, + // Can't do anything useful with this. + { ":b005::68]", "", + url::Component(0, 0), // scheme + url::Component(), // username + url::Component(), // password + url::Component(), // host + url::Component(), // port + url::Component(), // path + url::Component(), // query + url::Component(), // ref + }, +}; + +typedef testing::Test URLFixerTest; + +TEST(URLFixerTest, SegmentURL) { + std::string result; + url::Parsed parts; + + for (size_t i = 0; i < arraysize(segment_cases); ++i) { + SegmentCase value = segment_cases[i]; + result = url_fixer::SegmentURL(value.input, &parts); + EXPECT_EQ(value.result, result); + EXPECT_EQ(value.scheme, parts.scheme); + EXPECT_EQ(value.username, parts.username); + EXPECT_EQ(value.password, parts.password); + EXPECT_EQ(value.host, parts.host); + EXPECT_EQ(value.port, parts.port); + EXPECT_EQ(value.path, parts.path); + EXPECT_EQ(value.query, parts.query); + EXPECT_EQ(value.ref, parts.ref); + } +} + +// Creates a file and returns its full name as well as the decomposed +// version. Example: +// full_path = "c:\foo\bar.txt" +// dir = "c:\foo" +// file_name = "bar.txt" +static bool MakeTempFile(const base::FilePath& dir, + const base::FilePath& file_name, + base::FilePath* full_path) { + *full_path = dir.Append(file_name); + return base::WriteFile(*full_path, "", 0) == 0; +} + +// Returns true if the given URL is a file: URL that matches the given file +static bool IsMatchingFileURL(const std::string& url, + const base::FilePath& full_file_path) { + if (url.length() <= 8) + return false; + if (std::string("file:///") != url.substr(0, 8)) + return false; // no file:/// prefix + if (url.find('\\') != std::string::npos) + return false; // contains backslashes + + base::FilePath derived_path; + net::FileURLToFilePath(GURL(url), &derived_path); + + return base::FilePath::CompareEqualIgnoreCase(derived_path.value(), + full_file_path.value()); +} + +struct FixupCase { + const std::string input; + const std::string output; +} fixup_cases[] = { + {"www.google.com", "http://www.google.com/"}, + {" www.google.com ", "http://www.google.com/"}, + {" foo.com/asdf bar", "http://foo.com/asdf%20%20bar"}, + {"..www.google.com..", "http://www.google.com./"}, + {"http://......", "http://....../"}, + {"http://host.com:ninety-two/", "http://host.com:ninety-two/"}, + {"http://host.com:ninety-two?foo", "http://host.com:ninety-two/?foo"}, + {"google.com:123", "http://google.com:123/"}, + {"about:", "chrome://version/"}, + {"about:foo", "chrome://foo/"}, + {"about:version", "chrome://version/"}, + {"about:blank", "about:blank"}, + {"about:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"}, + {"about://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"}, + {"chrome:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"}, + {"chrome://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"}, + {"www:123", "http://www:123/"}, + {" www:123", "http://www:123/"}, + {"www.google.com?foo", "http://www.google.com/?foo"}, + {"www.google.com#foo", "http://www.google.com/#foo"}, + {"www.google.com?", "http://www.google.com/?"}, + {"www.google.com#", "http://www.google.com/#"}, + {"www.google.com:123?foo#bar", "http://www.google.com:123/?foo#bar"}, + {"user@www.google.com", "http://user@www.google.com/"}, + {"\xE6\xB0\xB4.com", "http://xn--1rw.com/"}, + // It would be better if this next case got treated as http, but I don't see + // a clean way to guess this isn't the new-and-exciting "user" scheme. + {"user:passwd@www.google.com:8080/", "user:passwd@www.google.com:8080/"}, + // {"file:///c:/foo/bar%20baz.txt", "file:///C:/foo/bar%20baz.txt"}, + {"ftp.google.com", "ftp://ftp.google.com/"}, + {" ftp.google.com", "ftp://ftp.google.com/"}, + {"FTP.GooGle.com", "ftp://ftp.google.com/"}, + {"ftpblah.google.com", "http://ftpblah.google.com/"}, + {"ftp", "http://ftp/"}, + {"google.ftp.com", "http://google.ftp.com/"}, + // URLs which end with 0x85 (NEL in ISO-8859). + {"http://foo.com/s?q=\xd0\x85", "http://foo.com/s?q=%D0%85"}, + {"http://foo.com/s?q=\xec\x97\x85", "http://foo.com/s?q=%EC%97%85"}, + {"http://foo.com/s?q=\xf0\x90\x80\x85", "http://foo.com/s?q=%F0%90%80%85"}, + // URLs which end with 0xA0 (non-break space in ISO-8859). + {"http://foo.com/s?q=\xd0\xa0", "http://foo.com/s?q=%D0%A0"}, + {"http://foo.com/s?q=\xec\x97\xa0", "http://foo.com/s?q=%EC%97%A0"}, + {"http://foo.com/s?q=\xf0\x90\x80\xa0", "http://foo.com/s?q=%F0%90%80%A0"}, + // URLs containing IPv6 literals. + {"[2001:db8::2]", "http://[2001:db8::2]/"}, + {"[::]:80", "http://[::]/"}, + {"[::]:80/path", "http://[::]/path"}, + {"[::]:180/path", "http://[::]:180/path"}, + // TODO(pmarks): Maybe we should parse bare IPv6 literals someday. + {"::1", "::1"}, + // Semicolon as scheme separator for standard schemes. + {"http;//www.google.com/", "http://www.google.com/"}, + {"about;chrome", "chrome://chrome/"}, + // Semicolon left as-is for non-standard schemes. + {"whatsup;//fool", "whatsup://fool"}, + // Semicolon left as-is in URL itself. + {"http://host/port?query;moar", "http://host/port?query;moar"}, + // Fewer slashes than expected. + {"http;www.google.com/", "http://www.google.com/"}, + {"http;/www.google.com/", "http://www.google.com/"}, + // Semicolon at start. + {";http://www.google.com/", "http://%3Bhttp//www.google.com/"}, +}; + +TEST(URLFixerTest, FixupURL) { + for (size_t i = 0; i < arraysize(fixup_cases); ++i) { + FixupCase value = fixup_cases[i]; + EXPECT_EQ(value.output, + url_fixer::FixupURL(value.input, "").possibly_invalid_spec()) + << "input: " << value.input; + } + + // Check the TLD-appending functionality. + FixupCase tld_cases[] = { + {"somedomainthatwillnotbeagtld", + "http://www.somedomainthatwillnotbeagtld.com/"}, + {"somedomainthatwillnotbeagtld.", + "http://www.somedomainthatwillnotbeagtld.com/"}, + {"somedomainthatwillnotbeagtld..", + "http://www.somedomainthatwillnotbeagtld.com/"}, + {".somedomainthatwillnotbeagtld", + "http://www.somedomainthatwillnotbeagtld.com/"}, + {"www.somedomainthatwillnotbeagtld", + "http://www.somedomainthatwillnotbeagtld.com/"}, + {"somedomainthatwillnotbeagtld.com", + "http://somedomainthatwillnotbeagtld.com/"}, + {"http://somedomainthatwillnotbeagtld", + "http://www.somedomainthatwillnotbeagtld.com/"}, + {"..somedomainthatwillnotbeagtld..", + "http://www.somedomainthatwillnotbeagtld.com/"}, + {"http://www.somedomainthatwillnotbeagtld", + "http://www.somedomainthatwillnotbeagtld.com/"}, + {"9999999999999999", "http://www.9999999999999999.com/"}, + {"somedomainthatwillnotbeagtld/foo", + "http://www.somedomainthatwillnotbeagtld.com/foo"}, + {"somedomainthatwillnotbeagtld.com/foo", + "http://somedomainthatwillnotbeagtld.com/foo"}, + {"somedomainthatwillnotbeagtld/?foo=.com", + "http://www.somedomainthatwillnotbeagtld.com/?foo=.com"}, + {"www.somedomainthatwillnotbeagtld/?foo=www.", + "http://www.somedomainthatwillnotbeagtld.com/?foo=www."}, + {"somedomainthatwillnotbeagtld.com/?foo=.com", + "http://somedomainthatwillnotbeagtld.com/?foo=.com"}, + {"http://www.somedomainthatwillnotbeagtld.com", + "http://www.somedomainthatwillnotbeagtld.com/"}, + {"somedomainthatwillnotbeagtld:123", + "http://www.somedomainthatwillnotbeagtld.com:123/"}, + {"http://somedomainthatwillnotbeagtld:123", + "http://www.somedomainthatwillnotbeagtld.com:123/"}, + }; + for (size_t i = 0; i < arraysize(tld_cases); ++i) { + FixupCase value = tld_cases[i]; + EXPECT_EQ(value.output, + url_fixer::FixupURL(value.input, "com").possibly_invalid_spec()); + } +} + +// Test different types of file inputs to URIFixerUpper::FixupURL. This +// doesn't go into the nice array of fixups above since the file input +// has to exist. +TEST(URLFixerTest, FixupFile) { + // this "original" filename is the one we tweak to get all the variations + base::ScopedTempDir temp_dir_; + ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); + base::FilePath original; + ASSERT_TRUE(MakeTempFile( + temp_dir_.path(), + base::FilePath(FILE_PATH_LITERAL("url fixer upper existing file.txt")), + &original)); + + // reference path + GURL golden(net::FilePathToFileURL(original)); + + // c:\foo\bar.txt -> file:///c:/foo/bar.txt (basic) + GURL fixedup(url_fixer::FixupURL(original.AsUTF8Unsafe(), std::string())); + EXPECT_EQ(golden, fixedup); + + // TODO(port): Make some equivalent tests for posix. +#if defined(OS_WIN) + // c|/foo\bar.txt -> file:///c:/foo/bar.txt (pipe allowed instead of colon) + std::string cur(base::WideToUTF8(original.value())); + EXPECT_EQ(':', cur[1]); + cur[1] = '|'; + EXPECT_EQ(golden, url_fixer::FixupURL(cur, std::string())); + + FixupCase cases[] = { + {"c:\\Non-existent%20file.txt", "file:///C:/Non-existent%2520file.txt"}, + + // \\foo\bar.txt -> file://foo/bar.txt + // UNC paths, this file won't exist, but since there are no escapes, it + // should be returned just converted to a file: URL. + {"\\\\NonexistentHost\\foo\\bar.txt", "file://nonexistenthost/foo/bar.txt"}, + // We do this strictly, like IE8, which only accepts this form using + // backslashes and not forward ones. Turning "//foo" into "http" matches + // Firefox and IE, silly though it may seem (it falls out of adding "http" + // as the default protocol if you haven't entered one). + {"//NonexistentHost\\foo/bar.txt", "http://nonexistenthost/foo/bar.txt"}, + {"file:///C:/foo/bar", "file:///C:/foo/bar"}, + + // Much of the work here comes from GURL's canonicalization stage. + {"file://C:/foo/bar", "file:///C:/foo/bar"}, + {"file:c:", "file:///C:/"}, + {"file:c:WINDOWS", "file:///C:/WINDOWS"}, + {"file:c|Program Files", "file:///C:/Program%20Files"}, + {"file:/file", "file://file/"}, + {"file:////////c:\\foo", "file:///C:/foo"}, + {"file://server/folder/file", "file://server/folder/file"}, + + // These are fixups we don't do, but could consider: + // {"file:///foo:/bar", "file://foo/bar"}, + // {"file:/\\/server\\folder/file", "file://server/folder/file"}, + }; +#elif defined(OS_POSIX) + +#if defined(OS_MACOSX) +#define HOME "/Users/" +#else +#define HOME "/home/" +#endif + url_fixer::home_directory_override = "/foo"; + FixupCase cases[] = { + // File URLs go through GURL, which tries to escape intelligently. + {"/A%20non-existent file.txt", "file:///A%2520non-existent%20file.txt"}, + // A plain "/" refers to the root. + {"/", "file:///"}, + + // These rely on the above home_directory_override. + {"~", "file:///foo"}, + {"~/bar", "file:///foo/bar"}, + + // References to other users' homedirs. + {"~foo", "file://" HOME "foo"}, + {"~x/blah", "file://" HOME "x/blah"}, + }; +#endif + + for (size_t i = 0; i < arraysize(cases); i++) { + EXPECT_EQ(cases[i].output, + url_fixer::FixupURL(cases[i].input, "").possibly_invalid_spec()); + } + + EXPECT_TRUE(base::DeleteFile(original, false)); +} + +TEST(URLFixerTest, FixupRelativeFile) { + base::FilePath full_path; + base::FilePath file_part( + FILE_PATH_LITERAL("url_fixer_upper_existing_file.txt")); + base::ScopedTempDir temp_dir_; + ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); + ASSERT_TRUE(MakeTempFile(temp_dir_.path(), file_part, &full_path)); + full_path = base::MakeAbsoluteFilePath(full_path); + ASSERT_FALSE(full_path.empty()); + + // make sure we pass through good URLs + for (size_t i = 0; i < arraysize(fixup_cases); ++i) { + FixupCase value = fixup_cases[i]; + base::FilePath input = base::FilePath::FromUTF8Unsafe(value.input); + EXPECT_EQ(value.output, + url_fixer::FixupRelativeFile(temp_dir_.path(), + input).possibly_invalid_spec()); + } + + // make sure the existing file got fixed-up to a file URL, and that there + // are no backslashes + EXPECT_TRUE(IsMatchingFileURL( + url_fixer::FixupRelativeFile(temp_dir_.path(), + file_part).possibly_invalid_spec(), full_path)); + EXPECT_TRUE(base::DeleteFile(full_path, false)); + + // create a filename we know doesn't exist and make sure it doesn't get + // fixed up to a file URL + base::FilePath nonexistent_file( + FILE_PATH_LITERAL("url_fixer_upper_nonexistent_file.txt")); + std::string fixedup(url_fixer::FixupRelativeFile( + temp_dir_.path(), nonexistent_file).possibly_invalid_spec()); + EXPECT_NE(std::string("file:///"), fixedup.substr(0, 8)); + EXPECT_FALSE(IsMatchingFileURL(fixedup, nonexistent_file)); + + // make a subdir to make sure relative paths with directories work, also + // test spaces: + // "app_dir\url fixer-upper dir\url fixer-upper existing file.txt" + base::FilePath sub_dir(FILE_PATH_LITERAL("url fixer-upper dir")); + base::FilePath sub_file( + FILE_PATH_LITERAL("url fixer-upper existing file.txt")); + base::FilePath new_dir = temp_dir_.path().Append(sub_dir); + base::CreateDirectory(new_dir); + ASSERT_TRUE(MakeTempFile(new_dir, sub_file, &full_path)); + full_path = base::MakeAbsoluteFilePath(full_path); + ASSERT_FALSE(full_path.empty()); + + // test file in the subdir + base::FilePath relative_file = sub_dir.Append(sub_file); + EXPECT_TRUE(IsMatchingFileURL( + url_fixer::FixupRelativeFile(temp_dir_.path(), + relative_file).possibly_invalid_spec(), full_path)); + + // test file in the subdir with different slashes and escaping. + base::FilePath::StringType relative_file_str = sub_dir.value() + + FILE_PATH_LITERAL("/") + sub_file.value(); + base::ReplaceSubstringsAfterOffset(&relative_file_str, 0, + FILE_PATH_LITERAL(" "), FILE_PATH_LITERAL("%20")); + EXPECT_TRUE(IsMatchingFileURL( + url_fixer::FixupRelativeFile(temp_dir_.path(), + base::FilePath(relative_file_str)).possibly_invalid_spec(), + full_path)); + + // test relative directories and duplicate slashes + // (should resolve to the same file as above) + relative_file_str = sub_dir.value() + FILE_PATH_LITERAL("/../") + + sub_dir.value() + FILE_PATH_LITERAL("///./") + sub_file.value(); + EXPECT_TRUE(IsMatchingFileURL( + url_fixer::FixupRelativeFile(temp_dir_.path(), + base::FilePath(relative_file_str)).possibly_invalid_spec(), + full_path)); + + // done with the subdir + EXPECT_TRUE(base::DeleteFile(full_path, false)); + EXPECT_TRUE(base::DeleteFile(new_dir, true)); + + // Test that an obvious HTTP URL isn't accidentally treated as an absolute + // file path (on account of system-specific craziness). + base::FilePath empty_path; + base::FilePath http_url_path(FILE_PATH_LITERAL("http://../")); + EXPECT_TRUE( + url_fixer::FixupRelativeFile(empty_path, http_url_path).SchemeIs("http")); +} |