Componentize URLFixerUpper.

This CL moves URLFixerUpper into a new url_fixer component to allow this code to be shared by the iOS port. At the present time the component contains some minor Chrome-specific logic. Specifically, it rewrites about:// to chrome:// and uses about:// version as the default about:// host. We decided not to abstract this logic at this time, as the only embedders wishing to use this component are ports of Chrome. However, if there comes to be a non-Chrome embedder that wishes to use this component, this behavior could easily be generalized (e.g., by exposing the variables that are used for these purposes in the header file to allow the embedder to customize them). BUG=373229 R=jam@chromium.org TBR=mmenke NOTREECHECKS=true Review URL: https://codereview.chromium.org/320253004 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@276672 0039d316-1c4b-4281-b951-d872f2087c98
author: blundell@chromium.org <blundell@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-06-12 14:29:02 +0000
committer: blundell@chromium.org <blundell@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-06-12 14:29:02 +0000
commit: 9b5b1d60209fd342960797723e9ad108d49e6cad (patch)
tree: 032759d7e9c9f1ab207d6c28308b1c788416e69b /components/url_fixer
parent: 820fb1ed762a001b82ecc227f24d6dbde8ea6b17 (diff)
download: chromium_src-9b5b1d60209fd342960797723e9ad108d49e6cad.zip
chromium_src-9b5b1d60209fd342960797723e9ad108d49e6cad.tar.gz
chromium_src-9b5b1d60209fd342960797723e9ad108d49e6cad.tar.bz2
5 files changed, 1269 insertions, 0 deletions
diff --git a/components/url_fixer/DEPS b/components/url_fixer/DEPS
new file mode 100644
index 0000000..8fa9d48
--- /dev/null
+++ b/components/url_fixer/DEPS
@@ -0,0 +1,3 @@
+include_rules = [
+  "+net",
+]
diff --git a/components/url_fixer/OWNERS b/components/url_fixer/OWNERS
new file mode 100644
index 0000000..863a84a
--- /dev/null
+++ b/components/url_fixer/OWNERS
@@ -0,0 +1,2 @@
+brettw@chromium.org
+pkasting@chromium.org
diff --git a/components/url_fixer/url_fixer.cc b/components/url_fixer/url_fixer.cc
new file mode 100644
index 0000000..5b90cd2
--- /dev/null
+++ b/components/url_fixer/url_fixer.cc
@@ -0,0 +1,662 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/url_fixer/url_fixer.h"
+
+#include <algorithm>
+
+#if defined(OS_POSIX)
+#include "base/environment.h"
+#endif
+#include "base/file_util.h"
+#include "base/logging.h"
+#include "base/strings/string_util.h"
+#include "base/strings/utf_string_conversions.h"
+#include "net/base/escape.h"
+#include "net/base/filename_util.h"
+#include "net/base/net_util.h"
+#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
+#include "url/url_file.h"
+#include "url/url_parse.h"
+#include "url/url_util.h"
+
+const char* url_fixer::home_directory_override = NULL;
+
+namespace {
+
+// Hardcode these constants to avoid dependences on //chrome and //content.
+const char kChromeUIScheme[] = "chrome";
+const char kChromeUIDefaultHost[] = "version";
+const char kViewSourceScheme[] = "view-source";
+
+// TODO(estade): Remove these ugly, ugly functions. They are only used in
+// SegmentURL. A url::Parsed object keeps track of a bunch of indices into
+// a url string, and these need to be updated when the URL is converted from
+// UTF8 to UTF16. Instead of this after-the-fact adjustment, we should parse it
+// in the correct string format to begin with.
+url::Component UTF8ComponentToUTF16Component(
+    const std::string& text_utf8,
+    const url::Component& component_utf8) {
+  if (component_utf8.len == -1)
+    return url::Component();
+
+  std::string before_component_string =
+      text_utf8.substr(0, component_utf8.begin);
+  std::string component_string =
+      text_utf8.substr(component_utf8.begin, component_utf8.len);
+  base::string16 before_component_string_16 =
+      base::UTF8ToUTF16(before_component_string);
+  base::string16 component_string_16 = base::UTF8ToUTF16(component_string);
+  url::Component component_16(before_component_string_16.length(),
+                              component_string_16.length());
+  return component_16;
+}
+
+void UTF8PartsToUTF16Parts(const std::string& text_utf8,
+                           const url::Parsed& parts_utf8,
+                           url::Parsed* parts) {
+  if (base::IsStringASCII(text_utf8)) {
+    *parts = parts_utf8;
+    return;
+  }
+
+  parts->scheme = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.scheme);
+  parts->username =
+      UTF8ComponentToUTF16Component(text_utf8, parts_utf8.username);
+  parts->password =
+      UTF8ComponentToUTF16Component(text_utf8, parts_utf8.password);
+  parts->host = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.host);
+  parts->port = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.port);
+  parts->path = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.path);
+  parts->query = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.query);
+  parts->ref = UTF8ComponentToUTF16Component(text_utf8, parts_utf8.ref);
+}
+
+base::TrimPositions TrimWhitespaceUTF8(const std::string& input,
+                                       base::TrimPositions positions,
+                                       std::string* output) {
+  // This implementation is not so fast since it converts the text encoding
+  // twice. Please feel free to file a bug if this function hurts the
+  // performance of Chrome.
+  DCHECK(base::IsStringUTF8(input));
+  base::string16 input16 = base::UTF8ToUTF16(input);
+  base::string16 output16;
+  base::TrimPositions result =
+      base::TrimWhitespace(input16, positions, &output16);
+  *output = base::UTF16ToUTF8(output16);
+  return result;
+}
+
+// does some basic fixes for input that we want to test for file-ness
+void PrepareStringForFileOps(const base::FilePath& text,
+                             base::FilePath::StringType* output) {
+#if defined(OS_WIN)
+  base::TrimWhitespace(text.value(), base::TRIM_ALL, output);
+  replace(output->begin(), output->end(), '/', '\\');
+#else
+  TrimWhitespaceUTF8(text.value(), base::TRIM_ALL, output);
+#endif
+}
+
+// Tries to create a full path from |text|.  If the result is valid and the
+// file exists, returns true and sets |full_path| to the result.  Otherwise,
+// returns false and leaves |full_path| unchanged.
+bool ValidPathForFile(const base::FilePath::StringType& text,
+                      base::FilePath* full_path) {
+  base::FilePath file_path = base::MakeAbsoluteFilePath(base::FilePath(text));
+  if (file_path.empty())
+    return false;
+
+  if (!base::PathExists(file_path))
+    return false;
+
+  *full_path = file_path;
+  return true;
+}
+
+#if defined(OS_POSIX)
+// Given a path that starts with ~, return a path that starts with an
+// expanded-out /user/foobar directory.
+std::string FixupHomedir(const std::string& text) {
+  DCHECK(text.length() > 0 && text[0] == '~');
+
+  if (text.length() == 1 || text[1] == '/') {
+    const char* home = getenv(base::env_vars::kHome);
+    if (url_fixer::home_directory_override)
+      home = url_fixer::home_directory_override;
+    // We'll probably break elsewhere if $HOME is undefined, but check here
+    // just in case.
+    if (!home)
+      return text;
+    return home + text.substr(1);
+  }
+
+// Otherwise, this is a path like ~foobar/baz, where we must expand to
+// user foobar's home directory.  Officially, we should use getpwent(),
+// but that is a nasty blocking call.
+
+#if defined(OS_MACOSX)
+  static const char kHome[] = "/Users/";
+#else
+  static const char kHome[] = "/home/";
+#endif
+  return kHome + text.substr(1);
+}
+#endif
+
+// Tries to create a file: URL from |text| if it looks like a filename, even if
+// it doesn't resolve as a valid path or to an existing file.  Returns a
+// (possibly invalid) file: URL in |fixed_up_url| for input beginning
+// with a drive specifier or "\\".  Returns the unchanged input in other cases
+// (including file: URLs: these don't look like filenames).
+std::string FixupPath(const std::string& text) {
+  DCHECK(!text.empty());
+
+  base::FilePath::StringType filename;
+#if defined(OS_WIN)
+  base::FilePath input_path(base::UTF8ToWide(text));
+  PrepareStringForFileOps(input_path, &filename);
+
+  // Fixup Windows-style drive letters, where "C:" gets rewritten to "C|".
+  if (filename.length() > 1 && filename[1] == '|')
+    filename[1] = ':';
+#elif defined(OS_POSIX)
+  base::FilePath input_path(text);
+  PrepareStringForFileOps(input_path, &filename);
+  if (filename.length() > 0 && filename[0] == '~')
+    filename = FixupHomedir(filename);
+#endif
+
+  // Here, we know the input looks like a file.
+  GURL file_url = net::FilePathToFileURL(base::FilePath(filename));
+  if (file_url.is_valid()) {
+    return base::UTF16ToUTF8(net::FormatUrl(file_url,
+                                            std::string(),
+                                            net::kFormatUrlOmitUsernamePassword,
+                                            net::UnescapeRule::NORMAL,
+                                            NULL,
+                                            NULL,
+                                            NULL));
+  }
+
+  // Invalid file URL, just return the input.
+  return text;
+}
+
+// Checks |domain| to see if a valid TLD is already present.  If not, appends
+// |desired_tld| to the domain, and prepends "www." unless it's already present.
+void AddDesiredTLD(const std::string& desired_tld, std::string* domain) {
+  if (desired_tld.empty() || domain->empty())
+    return;
+
+  // Check the TLD.  If the return value is positive, we already have a TLD, so
+  // abort.  If the return value is std::string::npos, there's no valid host,
+  // but we can try to append a TLD anyway, since the host may become valid once
+  // the TLD is attached -- for example, "999999999999" is detected as a broken
+  // IP address and marked invalid, but attaching ".com" makes it legal.  When
+  // the return value is 0, there's a valid host with no known TLD, so we can
+  // definitely append the user's TLD.  We disallow unknown registries here so
+  // users can input "mail.yahoo" and hit ctrl-enter to get
+  // "www.mail.yahoo.com".
+  const size_t registry_length =
+      net::registry_controlled_domains::GetRegistryLength(
+          *domain,
+          net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
+          net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
+  if ((registry_length != 0) && (registry_length != std::string::npos))
+    return;
+
+  // Add the suffix at the end of the domain.
+  const size_t domain_length(domain->length());
+  DCHECK_GT(domain_length, 0U);
+  DCHECK_NE(desired_tld[0], '.');
+  if ((*domain)[domain_length - 1] != '.')
+    domain->push_back('.');
+  domain->append(desired_tld);
+
+  // Now, if the domain begins with "www.", stop.
+  const std::string prefix("www.");
+  if (domain->compare(0, prefix.length(), prefix) != 0) {
+    // Otherwise, add www. to the beginning of the URL.
+    domain->insert(0, prefix);
+  }
+}
+
+inline void FixupUsername(const std::string& text,
+                          const url::Component& part,
+                          std::string* url) {
+  if (!part.is_valid())
+    return;
+
+  // We don't fix up the username at the moment.
+  url->append(text, part.begin, part.len);
+  // Do not append the trailing '@' because we might need to include the user's
+  // password.  FixupURL itself will append the '@' for us.
+}
+
+inline void FixupPassword(const std::string& text,
+                          const url::Component& part,
+                          std::string* url) {
+  if (!part.is_valid())
+    return;
+
+  // We don't fix up the password at the moment.
+  url->append(":");
+  url->append(text, part.begin, part.len);
+}
+
+void FixupHost(const std::string& text,
+               const url::Component& part,
+               bool has_scheme,
+               const std::string& desired_tld,
+               std::string* url) {
+  if (!part.is_valid())
+    return;
+
+  // Make domain valid.
+  // Strip all leading dots and all but one trailing dot, unless the user only
+  // typed dots, in which case their input is totally invalid and we should just
+  // leave it unchanged.
+  std::string domain(text, part.begin, part.len);
+  const size_t first_nondot(domain.find_first_not_of('.'));
+  if (first_nondot != std::string::npos) {
+    domain.erase(0, first_nondot);
+    size_t last_nondot(domain.find_last_not_of('.'));
+    DCHECK(last_nondot != std::string::npos);
+    last_nondot += 2;  // Point at second period in ending string
+    if (last_nondot < domain.length())
+      domain.erase(last_nondot);
+  }
+
+  // Add any user-specified TLD, if applicable.
+  AddDesiredTLD(desired_tld, &domain);
+
+  url->append(domain);
+}
+
+void FixupPort(const std::string& text,
+               const url::Component& part,
+               std::string* url) {
+  if (!part.is_valid())
+    return;
+
+  // We don't fix up the port at the moment.
+  url->append(":");
+  url->append(text, part.begin, part.len);
+}
+
+inline void FixupPath(const std::string& text,
+                      const url::Component& part,
+                      std::string* url) {
+  if (!part.is_valid() || part.len == 0) {
+    // We should always have a path.
+    url->append("/");
+    return;
+  }
+
+  // Append the path as is.
+  url->append(text, part.begin, part.len);
+}
+
+inline void FixupQuery(const std::string& text,
+                       const url::Component& part,
+                       std::string* url) {
+  if (!part.is_valid())
+    return;
+
+  // We don't fix up the query at the moment.
+  url->append("?");
+  url->append(text, part.begin, part.len);
+}
+
+inline void FixupRef(const std::string& text,
+                     const url::Component& part,
+                     std::string* url) {
+  if (!part.is_valid())
+    return;
+
+  // We don't fix up the ref at the moment.
+  url->append("#");
+  url->append(text, part.begin, part.len);
+}
+
+bool HasPort(const std::string& original_text,
+             const url::Component& scheme_component) {
+  // Find the range between the ":" and the "/".
+  size_t port_start = scheme_component.end() + 1;
+  size_t port_end = port_start;
+  while ((port_end < original_text.length()) &&
+         !url::IsAuthorityTerminator(original_text[port_end]))
+    ++port_end;
+  if (port_end == port_start)
+    return false;
+
+  // Scan the range to see if it is entirely digits.
+  for (size_t i = port_start; i < port_end; ++i) {
+    if (!IsAsciiDigit(original_text[i]))
+      return false;
+  }
+
+  return true;
+}
+
+// Try to extract a valid scheme from the beginning of |text|.
+// If successful, set |scheme_component| to the text range where the scheme
+// was located, and fill |canon_scheme| with its canonicalized form.
+// Otherwise, return false and leave the outputs in an indeterminate state.
+bool GetValidScheme(const std::string& text,
+                    url::Component* scheme_component,
+                    std::string* canon_scheme) {
+  canon_scheme->clear();
+
+  // Locate everything up to (but not including) the first ':'
+  if (!url::ExtractScheme(
+          text.data(), static_cast<int>(text.length()), scheme_component)) {
+    return false;
+  }
+
+  // Make sure the scheme contains only valid characters, and convert
+  // to lowercase.  This also catches IPv6 literals like [::1], because
+  // brackets are not in the whitelist.
+  url::StdStringCanonOutput canon_scheme_output(canon_scheme);
+  url::Component canon_scheme_component;
+  if (!url::CanonicalizeScheme(text.data(),
+                               *scheme_component,
+                               &canon_scheme_output,
+                               &canon_scheme_component)) {
+    return false;
+  }
+
+  // Strip the ':', and any trailing buffer space.
+  DCHECK_EQ(0, canon_scheme_component.begin);
+  canon_scheme->erase(canon_scheme_component.len);
+
+  // We need to fix up the segmentation for "www.example.com:/".  For this
+  // case, we guess that schemes with a "." are not actually schemes.
+  if (canon_scheme->find('.') != std::string::npos)
+    return false;
+
+  // We need to fix up the segmentation for "www:123/".  For this case, we
+  // will add an HTTP scheme later and make the URL parser happy.
+  // TODO(pkasting): Maybe we should try to use GURL's parser for this?
+  if (HasPort(text, *scheme_component))
+    return false;
+
+  // Everything checks out.
+  return true;
+}
+
+// Performs the work for url_fixer::SegmentURL. |text| may be modified on
+// output on success: a semicolon following a valid scheme is replaced with a
+// colon.
+std::string SegmentURLInternal(std::string* text, url::Parsed* parts) {
+  // Initialize the result.
+  *parts = url::Parsed();
+
+  std::string trimmed;
+  TrimWhitespaceUTF8(*text, base::TRIM_ALL, &trimmed);
+  if (trimmed.empty())
+    return std::string();  // Nothing to segment.
+
+#if defined(OS_WIN)
+  int trimmed_length = static_cast<int>(trimmed.length());
+  if (url::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) ||
+      url::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, true))
+    return "file";
+#elif defined(OS_POSIX)
+  if (base::FilePath::IsSeparator(trimmed.data()[0]) ||
+      trimmed.data()[0] == '~')
+    return "file";
+#endif
+
+  // Otherwise, we need to look at things carefully.
+  std::string scheme;
+  if (!GetValidScheme(*text, &parts->scheme, &scheme)) {
+    // Try again if there is a ';' in the text. If changing it to a ':' results
+    // in a scheme being found, continue processing with the modified text.
+    bool found_scheme = false;
+    size_t semicolon = text->find(';');
+    if (semicolon != 0 && semicolon != std::string::npos) {
+      (*text)[semicolon] = ':';
+      if (GetValidScheme(*text, &parts->scheme, &scheme))
+        found_scheme = true;
+      else
+        (*text)[semicolon] = ';';
+    }
+    if (!found_scheme) {
+      // Couldn't determine the scheme, so just pick one.
+      parts->scheme.reset();
+      scheme = StartsWithASCII(*text, "ftp.", false) ? url::kFtpScheme
+                                                     : url::kHttpScheme;
+    }
+  }
+
+  // Proceed with about and chrome schemes, but not file or nonstandard schemes.
+  if ((scheme != url::kAboutScheme) && (scheme != kChromeUIScheme) &&
+      ((scheme == url::kFileScheme) ||
+       !url::IsStandard(
+           scheme.c_str(),
+           url::Component(0, static_cast<int>(scheme.length()))))) {
+    return scheme;
+  }
+
+  if (scheme == url::kFileSystemScheme) {
+    // Have the GURL parser do the heavy lifting for us.
+    url::ParseFileSystemURL(
+        text->data(), static_cast<int>(text->length()), parts);
+    return scheme;
+  }
+
+  if (parts->scheme.is_valid()) {
+    // Have the GURL parser do the heavy lifting for us.
+    url::ParseStandardURL(
+        text->data(), static_cast<int>(text->length()), parts);
+    return scheme;
+  }
+
+  // We need to add a scheme in order for ParseStandardURL to be happy.
+  // Find the first non-whitespace character.
+  std::string::iterator first_nonwhite = text->begin();
+  while ((first_nonwhite != text->end()) && IsWhitespace(*first_nonwhite))
+    ++first_nonwhite;
+
+  // Construct the text to parse by inserting the scheme.
+  std::string inserted_text(scheme);
+  inserted_text.append(url::kStandardSchemeSeparator);
+  std::string text_to_parse(text->begin(), first_nonwhite);
+  text_to_parse.append(inserted_text);
+  text_to_parse.append(first_nonwhite, text->end());
+
+  // Have the GURL parser do the heavy lifting for us.
+  url::ParseStandardURL(
+      text_to_parse.data(), static_cast<int>(text_to_parse.length()), parts);
+
+  // Offset the results of the parse to match the original text.
+  const int offset = -static_cast<int>(inserted_text.length());
+  url_fixer::OffsetComponent(offset, &parts->scheme);
+  url_fixer::OffsetComponent(offset, &parts->username);
+  url_fixer::OffsetComponent(offset, &parts->password);
+  url_fixer::OffsetComponent(offset, &parts->host);
+  url_fixer::OffsetComponent(offset, &parts->port);
+  url_fixer::OffsetComponent(offset, &parts->path);
+  url_fixer::OffsetComponent(offset, &parts->query);
+  url_fixer::OffsetComponent(offset, &parts->ref);
+
+  return scheme;
+}
+
+}  // namespace
+
+std::string url_fixer::SegmentURL(const std::string& text, url::Parsed* parts) {
+  std::string mutable_text(text);
+  return SegmentURLInternal(&mutable_text, parts);
+}
+
+base::string16 url_fixer::SegmentURL(const base::string16& text,
+                                     url::Parsed* parts) {
+  std::string text_utf8 = base::UTF16ToUTF8(text);
+  url::Parsed parts_utf8;
+  std::string scheme_utf8 = SegmentURL(text_utf8, &parts_utf8);
+  UTF8PartsToUTF16Parts(text_utf8, parts_utf8, parts);
+  return base::UTF8ToUTF16(scheme_utf8);
+}
+
+GURL url_fixer::FixupURL(const std::string& text,
+                         const std::string& desired_tld) {
+  std::string trimmed;
+  TrimWhitespaceUTF8(text, base::TRIM_ALL, &trimmed);
+  if (trimmed.empty())
+    return GURL();  // Nothing here.
+
+  // Segment the URL.
+  url::Parsed parts;
+  std::string scheme(SegmentURLInternal(&trimmed, &parts));
+
+  // For view-source: URLs, we strip "view-source:", do fixup, and stick it back
+  // on.  This allows us to handle things like "view-source:google.com".
+  if (scheme == kViewSourceScheme) {
+    // Reject "view-source:view-source:..." to avoid deep recursion.
+    std::string view_source(kViewSourceScheme + std::string(":"));
+    if (!StartsWithASCII(text, view_source + view_source, false)) {
+      return GURL(kViewSourceScheme + std::string(":") +
+                  FixupURL(trimmed.substr(scheme.length() + 1), desired_tld)
+                      .possibly_invalid_spec());
+    }
+  }
+
+  // We handle the file scheme separately.
+  if (scheme == url::kFileScheme)
+    return GURL(parts.scheme.is_valid() ? text : FixupPath(text));
+
+  // We handle the filesystem scheme separately.
+  if (scheme == url::kFileSystemScheme) {
+    if (parts.inner_parsed() && parts.inner_parsed()->scheme.is_valid())
+      return GURL(text);
+    return GURL();
+  }
+
+  // Parse and rebuild about: and chrome: URLs, except about:blank.
+  bool chrome_url =
+      !LowerCaseEqualsASCII(trimmed, url::kAboutBlankURL) &&
+      ((scheme == url::kAboutScheme) || (scheme == kChromeUIScheme));
+
+  // For some schemes whose layouts we understand, we rebuild it.
+  if (chrome_url ||
+      url::IsStandard(scheme.c_str(),
+                      url::Component(0, static_cast<int>(scheme.length())))) {
+    // Replace the about: scheme with the chrome: scheme.
+    std::string url(chrome_url ? kChromeUIScheme : scheme);
+    url.append(url::kStandardSchemeSeparator);
+
+    // We need to check whether the |username| is valid because it is our
+    // responsibility to append the '@' to delineate the user information from
+    // the host portion of the URL.
+    if (parts.username.is_valid()) {
+      FixupUsername(trimmed, parts.username, &url);
+      FixupPassword(trimmed, parts.password, &url);
+      url.append("@");
+    }
+
+    FixupHost(trimmed, parts.host, parts.scheme.is_valid(), desired_tld, &url);
+    if (chrome_url && !parts.host.is_valid())
+      url.append(kChromeUIDefaultHost);
+    FixupPort(trimmed, parts.port, &url);
+    FixupPath(trimmed, parts.path, &url);
+    FixupQuery(trimmed, parts.query, &url);
+    FixupRef(trimmed, parts.ref, &url);
+
+    return GURL(url);
+  }
+
+  // In the worst-case, we insert a scheme if the URL lacks one.
+  if (!parts.scheme.is_valid()) {
+    std::string fixed_scheme(scheme);
+    fixed_scheme.append(url::kStandardSchemeSeparator);
+    trimmed.insert(0, fixed_scheme);
+  }
+
+  return GURL(trimmed);
+}
+
+// The rules are different here than for regular fixup, since we need to handle
+// input like "hello.html" and know to look in the current directory.  Regular
+// fixup will look for cues that it is actually a file path before trying to
+// figure out what file it is.  If our logic doesn't work, we will fall back on
+// regular fixup.
+GURL url_fixer::FixupRelativeFile(const base::FilePath& base_dir,
+                                  const base::FilePath& text) {
+  base::FilePath old_cur_directory;
+  if (!base_dir.empty()) {
+    // Save the old current directory before we move to the new one.
+    base::GetCurrentDirectory(&old_cur_directory);
+    base::SetCurrentDirectory(base_dir);
+  }
+
+  // Allow funny input with extra whitespace and the wrong kind of slashes.
+  base::FilePath::StringType trimmed;
+  PrepareStringForFileOps(text, &trimmed);
+
+  bool is_file = true;
+  // Avoid recognizing definite non-file URLs as file paths.
+  GURL gurl(trimmed);
+  if (gurl.is_valid() && gurl.IsStandard())
+    is_file = false;
+  base::FilePath full_path;
+  if (is_file && !ValidPathForFile(trimmed, &full_path)) {
+// Not a path as entered, try unescaping it in case the user has
+// escaped things. We need to go through 8-bit since the escaped values
+// only represent 8-bit values.
+#if defined(OS_WIN)
+    std::wstring unescaped = base::UTF8ToWide(net::UnescapeURLComponent(
+        base::WideToUTF8(trimmed),
+        net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS));
+#elif defined(OS_POSIX)
+    std::string unescaped = net::UnescapeURLComponent(
+        trimmed,
+        net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);
+#endif
+
+    if (!ValidPathForFile(unescaped, &full_path))
+      is_file = false;
+  }
+
+  // Put back the current directory if we saved it.
+  if (!base_dir.empty())
+    base::SetCurrentDirectory(old_cur_directory);
+
+  if (is_file) {
+    GURL file_url = net::FilePathToFileURL(full_path);
+    if (file_url.is_valid())
+      return GURL(
+          base::UTF16ToUTF8(net::FormatUrl(file_url,
+                                           std::string(),
+                                           net::kFormatUrlOmitUsernamePassword,
+                                           net::UnescapeRule::NORMAL,
+                                           NULL,
+                                           NULL,
+                                           NULL)));
+    // Invalid files fall through to regular processing.
+  }
+
+// Fall back on regular fixup for this input.
+#if defined(OS_WIN)
+  std::string text_utf8 = base::WideToUTF8(text.value());
+#elif defined(OS_POSIX)
+  std::string text_utf8 = text.value();
+#endif
+  return FixupURL(text_utf8, std::string());
+}
+
+void url_fixer::OffsetComponent(int offset, url::Component* part) {
+  DCHECK(part);
+
+  if (part->is_valid()) {
+    // Offset the location of this component.
+    part->begin += offset;
+
+    // This part might not have existed in the original text.
+    if (part->begin < 0)
+      part->reset();
+  }
+}
diff --git a/components/url_fixer/url_fixer.h b/components/url_fixer/url_fixer.h
new file mode 100644
index 0000000..fa682d7
--- /dev/null
+++ b/components/url_fixer/url_fixer.h
@@ -0,0 +1,85 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_URL_FIXER_URL_FIXER_H_
+#define COMPONENTS_URL_FIXER_URL_FIXER_H_
+
+#include <string>
+
+#include "base/strings/string16.h"
+#include "url/gurl.h"
+
+namespace base {
+class FilePath;
+}
+
+namespace url {
+struct Component;
+struct Parsed;
+}
+
+// This object is designed to convert various types of input into URLs that we
+// know are valid. For example, user typing in the URL bar or command line
+// options. This is NOT the place for converting between different types of
+// URLs or parsing them, see net_util.h for that.
+namespace url_fixer {
+
+  // Segments the given text string into parts of a URL.  This is most useful
+  // for schemes such as http, https, and ftp where |SegmentURL| will find many
+  // segments.  Currently does not segment "file" schemes.
+  // Returns the canonicalized scheme, or the empty string when |text| is only
+  // whitespace.
+std::string SegmentURL(const std::string& text, url::Parsed* parts);
+base::string16 SegmentURL(const base::string16& text, url::Parsed* parts);
+
+  // Converts |text| to a fixed-up URL and returns it. Attempts to make
+  // some "smart" adjustments to obviously-invalid input where possible.
+  // |text| may be an absolute path to a file, which will get converted to a
+  // "file:" URL.
+  //
+  // The result will be a "more" valid URL than the input. It may still not
+  // be valid, so check the return value's validity or use
+  // possibly_invalid_spec().
+  //
+  // Schemes "about" and "chrome" are normalized to "chrome://", with slashes.
+  // "about:blank" is unaltered, as Webkit allows frames to access about:blank.
+  // Additionally, if a chrome URL does not have a valid host, as in "about:",
+  // the returned URL will have the host "version", as in "chrome://version".
+  //
+  // If |desired_tld| is non-empty, it represents the TLD the user wishes to
+  // append in the case of an incomplete domain.  We check that this is not a
+  // file path and there does not appear to be a valid TLD already, then append
+  // |desired_tld| to the domain and prepend "www." (unless it, or a scheme,
+  // are already present.)  This TLD should not have a leading '.' (use "com"
+  // instead of ".com").
+  GURL FixupURL(const std::string& text, const std::string& desired_tld);
+
+  // Converts |text| to a fixed-up URL, allowing it to be a relative path on
+  // the local filesystem.  Begin searching in |base_dir|; if empty, use the
+  // current working directory.  If this resolves to a file on disk, convert it
+  // to a "file:" URL in |fixed_up_url|; otherwise, fall back to the behavior
+  // of FixupURL().
+  //
+  // For "regular" input, even if it is possibly a file with a full path, you
+  // should use FixupURL() directly.  This function should only be used when
+  // relative path handling is desired, as for command line processing.
+  GURL FixupRelativeFile(const base::FilePath& base_dir,
+                         const base::FilePath& text);
+
+  // Offsets the beginning index of |part| by |offset|, which is allowed to be
+  // negative.  In some cases, the desired component does not exist at the given
+  // offset.  For example, when converting from "http://foo" to "foo", the
+  // scheme component no longer exists.  In such a case, the beginning index is
+  // set to 0.
+  // Does nothing if |part| is invalid.
+  void OffsetComponent(int offset, url::Component* part);
+
+  // For paths like ~, we use $HOME for the current user's home
+  // directory.  For tests, we allow our idea of $HOME to be overriden
+  // by this variable.
+  extern const char* home_directory_override;
+
+}  // namespace url_fixer
+
+#endif  // COMPONENTS_URL_FIXER_URL_FIXER_H_
diff --git a/components/url_fixer/url_fixer_unittest.cc b/components/url_fixer/url_fixer_unittest.cc
new file mode 100644
index 0000000..fa4e03c
--- /dev/null
+++ b/components/url_fixer/url_fixer_unittest.cc
@@ -0,0 +1,517 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdlib.h>
+
+#include "base/base_paths.h"
+#include "base/basictypes.h"
+#include "base/file_util.h"
+#include "base/files/file_path.h"
+#include "base/path_service.h"
+#include "base/strings/string_util.h"
+#include "base/strings/utf_string_conversions.h"
+#include "components/url_fixer/url_fixer.h"
+#include "net/base/filename_util.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+#include "url/url_parse.h"
+
+namespace url {
+
+std::ostream& operator<<(std::ostream& os, const Component& part) {
+  return os << "(begin=" << part.begin << ", len=" << part.len << ")";
+}
+
+}  // namespace url
+
+struct SegmentCase {
+  const std::string input;
+  const std::string result;
+  const url::Component scheme;
+  const url::Component username;
+  const url::Component password;
+  const url::Component host;
+  const url::Component port;
+  const url::Component path;
+  const url::Component query;
+  const url::Component ref;
+};
+
+static const SegmentCase segment_cases[] = {
+  { "http://www.google.com/", "http",
+    url::Component(0, 4), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(7, 14), // host
+    url::Component(), // port
+    url::Component(21, 1), // path
+    url::Component(), // query
+    url::Component(), // ref
+  },
+  { "aBoUt:vErSiOn", "about",
+    url::Component(0, 5), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(6, 7), // host
+    url::Component(), // port
+    url::Component(), // path
+    url::Component(), // query
+    url::Component(), // ref
+  },
+  { "about:host/path?query#ref", "about",
+    url::Component(0, 5), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(6, 4), // host
+    url::Component(), // port
+    url::Component(10, 5), // path
+    url::Component(16, 5), // query
+    url::Component(22, 3), // ref
+  },
+  { "about://host/path?query#ref", "about",
+    url::Component(0, 5), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(8, 4), // host
+    url::Component(), // port
+    url::Component(12, 5), // path
+    url::Component(18, 5), // query
+    url::Component(24, 3), // ref
+  },
+  { "chrome:host/path?query#ref", "chrome",
+    url::Component(0, 6), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(7, 4), // host
+    url::Component(), // port
+    url::Component(11, 5), // path
+    url::Component(17, 5), // query
+    url::Component(23, 3), // ref
+  },
+  { "chrome://host/path?query#ref", "chrome",
+    url::Component(0, 6), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(9, 4), // host
+    url::Component(), // port
+    url::Component(13, 5), // path
+    url::Component(19, 5), // query
+    url::Component(25, 3), // ref
+  },
+  { "    www.google.com:124?foo#", "http",
+    url::Component(), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(4, 14), // host
+    url::Component(19, 3), // port
+    url::Component(), // path
+    url::Component(23, 3), // query
+    url::Component(27, 0), // ref
+  },
+  { "user@www.google.com", "http",
+    url::Component(), // scheme
+    url::Component(0, 4), // username
+    url::Component(), // password
+    url::Component(5, 14), // host
+    url::Component(), // port
+    url::Component(), // path
+    url::Component(), // query
+    url::Component(), // ref
+  },
+  { "ftp:/user:P:a$$Wd@..ftp.google.com...::23///pub?foo#bar", "ftp",
+    url::Component(0, 3), // scheme
+    url::Component(5, 4), // username
+    url::Component(10, 7), // password
+    url::Component(18, 20), // host
+    url::Component(39, 2), // port
+    url::Component(41, 6), // path
+    url::Component(48, 3), // query
+    url::Component(52, 3), // ref
+  },
+  { "[2001:db8::1]/path", "http",
+    url::Component(), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(0, 13), // host
+    url::Component(), // port
+    url::Component(13, 5), // path
+    url::Component(), // query
+    url::Component(), // ref
+  },
+  { "[::1]", "http",
+    url::Component(), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(0, 5), // host
+    url::Component(), // port
+    url::Component(), // path
+    url::Component(), // query
+    url::Component(), // ref
+  },
+  // Incomplete IPv6 addresses (will not canonicalize).
+  { "[2001:4860:", "http",
+    url::Component(), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(0, 11), // host
+    url::Component(), // port
+    url::Component(), // path
+    url::Component(), // query
+    url::Component(), // ref
+  },
+  { "[2001:4860:/foo", "http",
+    url::Component(), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(0, 11), // host
+    url::Component(), // port
+    url::Component(11, 4), // path
+    url::Component(), // query
+    url::Component(), // ref
+  },
+  { "http://:b005::68]", "http",
+    url::Component(0, 4), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(7, 10), // host
+    url::Component(), // port
+    url::Component(), // path
+    url::Component(), // query
+    url::Component(), // ref
+  },
+  // Can't do anything useful with this.
+  { ":b005::68]", "",
+    url::Component(0, 0), // scheme
+    url::Component(), // username
+    url::Component(), // password
+    url::Component(), // host
+    url::Component(), // port
+    url::Component(), // path
+    url::Component(), // query
+    url::Component(), // ref
+  },
+};
+
+typedef testing::Test URLFixerTest;
+
+TEST(URLFixerTest, SegmentURL) {
+  std::string result;
+  url::Parsed parts;
+
+  for (size_t i = 0; i < arraysize(segment_cases); ++i) {
+    SegmentCase value = segment_cases[i];
+    result = url_fixer::SegmentURL(value.input, &parts);
+    EXPECT_EQ(value.result, result);
+    EXPECT_EQ(value.scheme, parts.scheme);
+    EXPECT_EQ(value.username, parts.username);
+    EXPECT_EQ(value.password, parts.password);
+    EXPECT_EQ(value.host, parts.host);
+    EXPECT_EQ(value.port, parts.port);
+    EXPECT_EQ(value.path, parts.path);
+    EXPECT_EQ(value.query, parts.query);
+    EXPECT_EQ(value.ref, parts.ref);
+  }
+}
+
+// Creates a file and returns its full name as well as the decomposed
+// version. Example:
+//    full_path = "c:\foo\bar.txt"
+//    dir = "c:\foo"
+//    file_name = "bar.txt"
+static bool MakeTempFile(const base::FilePath& dir,
+                         const base::FilePath& file_name,
+                         base::FilePath* full_path) {
+  *full_path = dir.Append(file_name);
+  return base::WriteFile(*full_path, "", 0) == 0;
+}
+
+// Returns true if the given URL is a file: URL that matches the given file
+static bool IsMatchingFileURL(const std::string& url,
+                              const base::FilePath& full_file_path) {
+  if (url.length() <= 8)
+    return false;
+  if (std::string("file:///") != url.substr(0, 8))
+    return false; // no file:/// prefix
+  if (url.find('\\') != std::string::npos)
+    return false; // contains backslashes
+
+  base::FilePath derived_path;
+  net::FileURLToFilePath(GURL(url), &derived_path);
+
+  return base::FilePath::CompareEqualIgnoreCase(derived_path.value(),
+                                          full_file_path.value());
+}
+
+struct FixupCase {
+  const std::string input;
+  const std::string output;
+} fixup_cases[] = {
+  {"www.google.com", "http://www.google.com/"},
+  {" www.google.com     ", "http://www.google.com/"},
+  {" foo.com/asdf  bar", "http://foo.com/asdf%20%20bar"},
+  {"..www.google.com..", "http://www.google.com./"},
+  {"http://......", "http://....../"},
+  {"http://host.com:ninety-two/", "http://host.com:ninety-two/"},
+  {"http://host.com:ninety-two?foo", "http://host.com:ninety-two/?foo"},
+  {"google.com:123", "http://google.com:123/"},
+  {"about:", "chrome://version/"},
+  {"about:foo", "chrome://foo/"},
+  {"about:version", "chrome://version/"},
+  {"about:blank", "about:blank"},
+  {"about:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
+  {"about://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
+  {"chrome:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
+  {"chrome://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
+  {"www:123", "http://www:123/"},
+  {"   www:123", "http://www:123/"},
+  {"www.google.com?foo", "http://www.google.com/?foo"},
+  {"www.google.com#foo", "http://www.google.com/#foo"},
+  {"www.google.com?", "http://www.google.com/?"},
+  {"www.google.com#", "http://www.google.com/#"},
+  {"www.google.com:123?foo#bar", "http://www.google.com:123/?foo#bar"},
+  {"user@www.google.com", "http://user@www.google.com/"},
+  {"\xE6\xB0\xB4.com", "http://xn--1rw.com/"},
+  // It would be better if this next case got treated as http, but I don't see
+  // a clean way to guess this isn't the new-and-exciting "user" scheme.
+  {"user:passwd@www.google.com:8080/", "user:passwd@www.google.com:8080/"},
+  // {"file:///c:/foo/bar%20baz.txt", "file:///C:/foo/bar%20baz.txt"},
+  {"ftp.google.com", "ftp://ftp.google.com/"},
+  {"    ftp.google.com", "ftp://ftp.google.com/"},
+  {"FTP.GooGle.com", "ftp://ftp.google.com/"},
+  {"ftpblah.google.com", "http://ftpblah.google.com/"},
+  {"ftp", "http://ftp/"},
+  {"google.ftp.com", "http://google.ftp.com/"},
+  // URLs which end with 0x85 (NEL in ISO-8859).
+  {"http://foo.com/s?q=\xd0\x85", "http://foo.com/s?q=%D0%85"},
+  {"http://foo.com/s?q=\xec\x97\x85", "http://foo.com/s?q=%EC%97%85"},
+  {"http://foo.com/s?q=\xf0\x90\x80\x85", "http://foo.com/s?q=%F0%90%80%85"},
+  // URLs which end with 0xA0 (non-break space in ISO-8859).
+  {"http://foo.com/s?q=\xd0\xa0", "http://foo.com/s?q=%D0%A0"},
+  {"http://foo.com/s?q=\xec\x97\xa0", "http://foo.com/s?q=%EC%97%A0"},
+  {"http://foo.com/s?q=\xf0\x90\x80\xa0", "http://foo.com/s?q=%F0%90%80%A0"},
+  // URLs containing IPv6 literals.
+  {"[2001:db8::2]", "http://[2001:db8::2]/"},
+  {"[::]:80", "http://[::]/"},
+  {"[::]:80/path", "http://[::]/path"},
+  {"[::]:180/path", "http://[::]:180/path"},
+  // TODO(pmarks): Maybe we should parse bare IPv6 literals someday.
+  {"::1", "::1"},
+  // Semicolon as scheme separator for standard schemes.
+  {"http;//www.google.com/", "http://www.google.com/"},
+  {"about;chrome", "chrome://chrome/"},
+  // Semicolon left as-is for non-standard schemes.
+  {"whatsup;//fool", "whatsup://fool"},
+  // Semicolon left as-is in URL itself.
+  {"http://host/port?query;moar", "http://host/port?query;moar"},
+  // Fewer slashes than expected.
+  {"http;www.google.com/", "http://www.google.com/"},
+  {"http;/www.google.com/", "http://www.google.com/"},
+  // Semicolon at start.
+  {";http://www.google.com/", "http://%3Bhttp//www.google.com/"},
+};
+
+TEST(URLFixerTest, FixupURL) {
+  for (size_t i = 0; i < arraysize(fixup_cases); ++i) {
+    FixupCase value = fixup_cases[i];
+    EXPECT_EQ(value.output,
+              url_fixer::FixupURL(value.input, "").possibly_invalid_spec())
+        << "input: " << value.input;
+  }
+
+  // Check the TLD-appending functionality.
+  FixupCase tld_cases[] = {
+    {"google", "http://www.google.com/"},
+    {"google.", "http://www.google.com/"},
+    {"google..", "http://www.google.com/"},
+    {".google", "http://www.google.com/"},
+    {"www.google", "http://www.google.com/"},
+    {"google.com", "http://google.com/"},
+    {"http://google", "http://www.google.com/"},
+    {"..google..", "http://www.google.com/"},
+    {"http://www.google", "http://www.google.com/"},
+    {"9999999999999999", "http://www.9999999999999999.com/"},
+    {"google/foo", "http://www.google.com/foo"},
+    {"google.com/foo", "http://google.com/foo"},
+    {"google/?foo=.com", "http://www.google.com/?foo=.com"},
+    {"www.google/?foo=www.", "http://www.google.com/?foo=www."},
+    {"google.com/?foo=.com", "http://google.com/?foo=.com"},
+    {"http://www.google.com", "http://www.google.com/"},
+    {"google:123", "http://www.google.com:123/"},
+    {"http://google:123", "http://www.google.com:123/"},
+  };
+  for (size_t i = 0; i < arraysize(tld_cases); ++i) {
+    FixupCase value = tld_cases[i];
+    EXPECT_EQ(value.output,
+              url_fixer::FixupURL(value.input, "com").possibly_invalid_spec());
+  }
+}
+
+// Test different types of file inputs to URIFixerUpper::FixupURL. This
+// doesn't go into the nice array of fixups above since the file input
+// has to exist.
+TEST(URLFixerTest, FixupFile) {
+  // this "original" filename is the one we tweak to get all the variations
+  base::FilePath dir;
+  base::FilePath original;
+  ASSERT_TRUE(PathService::Get(base::DIR_MODULE, &dir));
+  ASSERT_TRUE(MakeTempFile(
+      dir,
+      base::FilePath(FILE_PATH_LITERAL("url fixer upper existing file.txt")),
+      &original));
+
+  // reference path
+  GURL golden(net::FilePathToFileURL(original));
+
+  // c:\foo\bar.txt -> file:///c:/foo/bar.txt (basic)
+  GURL fixedup(url_fixer::FixupURL(original.AsUTF8Unsafe(), std::string()));
+  EXPECT_EQ(golden, fixedup);
+
+  // TODO(port): Make some equivalent tests for posix.
+#if defined(OS_WIN)
+  // c|/foo\bar.txt -> file:///c:/foo/bar.txt (pipe allowed instead of colon)
+  std::string cur(base::WideToUTF8(original.value()));
+  EXPECT_EQ(':', cur[1]);
+  cur[1] = '|';
+  EXPECT_EQ(golden, url_fixer::FixupURL(cur, std::string()));
+
+  FixupCase cases[] = {
+    {"c:\\Non-existent%20file.txt", "file:///C:/Non-existent%2520file.txt"},
+
+    // \\foo\bar.txt -> file://foo/bar.txt
+    // UNC paths, this file won't exist, but since there are no escapes, it
+    // should be returned just converted to a file: URL.
+    {"\\\\NonexistentHost\\foo\\bar.txt", "file://nonexistenthost/foo/bar.txt"},
+    // We do this strictly, like IE8, which only accepts this form using
+    // backslashes and not forward ones.  Turning "//foo" into "http" matches
+    // Firefox and IE, silly though it may seem (it falls out of adding "http"
+    // as the default protocol if you haven't entered one).
+    {"//NonexistentHost\\foo/bar.txt", "http://nonexistenthost/foo/bar.txt"},
+    {"file:///C:/foo/bar", "file:///C:/foo/bar"},
+
+    // Much of the work here comes from GURL's canonicalization stage.
+    {"file://C:/foo/bar", "file:///C:/foo/bar"},
+    {"file:c:", "file:///C:/"},
+    {"file:c:WINDOWS", "file:///C:/WINDOWS"},
+    {"file:c|Program Files", "file:///C:/Program%20Files"},
+    {"file:/file", "file://file/"},
+    {"file:////////c:\\foo", "file:///C:/foo"},
+    {"file://server/folder/file", "file://server/folder/file"},
+
+    // These are fixups we don't do, but could consider:
+    //   {"file:///foo:/bar", "file://foo/bar"},
+    //   {"file:/\\/server\\folder/file", "file://server/folder/file"},
+  };
+#elif defined(OS_POSIX)
+
+#if defined(OS_MACOSX)
+#define HOME "/Users/"
+#else
+#define HOME "/home/"
+#endif
+  url_fixer::home_directory_override = "/foo";
+  FixupCase cases[] = {
+    // File URLs go through GURL, which tries to escape intelligently.
+    {"/A%20non-existent file.txt", "file:///A%2520non-existent%20file.txt"},
+    // A plain "/" refers to the root.
+    {"/", "file:///"},
+
+    // These rely on the above home_directory_override.
+    {"~", "file:///foo"},
+    {"~/bar", "file:///foo/bar"},
+
+    // References to other users' homedirs.
+    {"~foo", "file://" HOME "foo"},
+    {"~x/blah", "file://" HOME "x/blah"},
+  };
+#endif
+
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    EXPECT_EQ(cases[i].output,
+              url_fixer::FixupURL(cases[i].input, "").possibly_invalid_spec());
+  }
+
+  EXPECT_TRUE(base::DeleteFile(original, false));
+}
+
+TEST(URLFixerTest, FixupRelativeFile) {
+  base::FilePath full_path, dir;
+  base::FilePath file_part(
+      FILE_PATH_LITERAL("url_fixer_upper_existing_file.txt"));
+  ASSERT_TRUE(PathService::Get(base::DIR_MODULE, &dir));
+  ASSERT_TRUE(MakeTempFile(dir, file_part, &full_path));
+  full_path = base::MakeAbsoluteFilePath(full_path);
+  ASSERT_FALSE(full_path.empty());
+
+  // make sure we pass through good URLs
+  for (size_t i = 0; i < arraysize(fixup_cases); ++i) {
+    FixupCase value = fixup_cases[i];
+    base::FilePath input = base::FilePath::FromUTF8Unsafe(value.input);
+    EXPECT_EQ(value.output,
+              url_fixer::FixupRelativeFile(dir, input).possibly_invalid_spec());
+  }
+
+  // make sure the existing file got fixed-up to a file URL, and that there
+  // are no backslashes
+  EXPECT_TRUE(IsMatchingFileURL(
+      url_fixer::FixupRelativeFile(dir, file_part).possibly_invalid_spec(),
+      full_path));
+  EXPECT_TRUE(base::DeleteFile(full_path, false));
+
+  // create a filename we know doesn't exist and make sure it doesn't get
+  // fixed up to a file URL
+  base::FilePath nonexistent_file(
+      FILE_PATH_LITERAL("url_fixer_upper_nonexistent_file.txt"));
+  std::string fixedup(url_fixer::FixupRelativeFile(dir, nonexistent_file)
+                          .possibly_invalid_spec());
+  EXPECT_NE(std::string("file:///"), fixedup.substr(0, 8));
+  EXPECT_FALSE(IsMatchingFileURL(fixedup, nonexistent_file));
+
+  // make a subdir to make sure relative paths with directories work, also
+  // test spaces:
+  // "app_dir\url fixer-upper dir\url fixer-upper existing file.txt"
+  base::FilePath sub_dir(FILE_PATH_LITERAL("url fixer-upper dir"));
+  base::FilePath sub_file(
+      FILE_PATH_LITERAL("url fixer-upper existing file.txt"));
+  base::FilePath new_dir = dir.Append(sub_dir);
+  base::CreateDirectory(new_dir);
+  ASSERT_TRUE(MakeTempFile(new_dir, sub_file, &full_path));
+  full_path = base::MakeAbsoluteFilePath(full_path);
+  ASSERT_FALSE(full_path.empty());
+
+  // test file in the subdir
+  base::FilePath relative_file = sub_dir.Append(sub_file);
+  EXPECT_TRUE(IsMatchingFileURL(
+      url_fixer::FixupRelativeFile(dir, relative_file).possibly_invalid_spec(),
+      full_path));
+
+  // test file in the subdir with different slashes and escaping.
+  base::FilePath::StringType relative_file_str = sub_dir.value() +
+      FILE_PATH_LITERAL("/") + sub_file.value();
+  ReplaceSubstringsAfterOffset(&relative_file_str, 0,
+      FILE_PATH_LITERAL(" "), FILE_PATH_LITERAL("%20"));
+  EXPECT_TRUE(IsMatchingFileURL(
+      url_fixer::FixupRelativeFile(dir, base::FilePath(relative_file_str))
+          .possibly_invalid_spec(),
+      full_path));
+
+  // test relative directories and duplicate slashes
+  // (should resolve to the same file as above)
+  relative_file_str = sub_dir.value() + FILE_PATH_LITERAL("/../") +
+      sub_dir.value() + FILE_PATH_LITERAL("///./") + sub_file.value();
+  EXPECT_TRUE(IsMatchingFileURL(
+      url_fixer::FixupRelativeFile(dir, base::FilePath(relative_file_str))
+          .possibly_invalid_spec(),
+      full_path));
+
+  // done with the subdir
+  EXPECT_TRUE(base::DeleteFile(full_path, false));
+  EXPECT_TRUE(base::DeleteFile(new_dir, true));
+
+  // Test that an obvious HTTP URL isn't accidentally treated as an absolute
+  // file path (on account of system-specific craziness).
+  base::FilePath empty_path;
+  base::FilePath http_url_path(FILE_PATH_LITERAL("http://../"));
+  EXPECT_TRUE(
+      url_fixer::FixupRelativeFile(empty_path, http_url_path).SchemeIs("http"));
+}
author	blundell@chromium.org <blundell@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-06-12 14:29:02 +0000
committer	blundell@chromium.org <blundell@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-06-12 14:29:02 +0000
commit	9b5b1d60209fd342960797723e9ad108d49e6cad (patch)
tree	032759d7e9c9f1ab207d6c28308b1c788416e69b /components/url_fixer
parent	820fb1ed762a001b82ecc227f24d6dbde8ea6b17 (diff)
download	chromium_src-9b5b1d60209fd342960797723e9ad108d49e6cad.zip chromium_src-9b5b1d60209fd342960797723e9ad108d49e6cad.tar.gz chromium_src-9b5b1d60209fd342960797723e9ad108d49e6cad.tar.bz2