Merge Chromium src@r53293

Change-Id: Ia79acf8670f385cee48c45b0a75371d8e950af34
author: Ben Murdoch <benm@google.com> 2010-07-29 17:14:53 +0100
committer: Ben Murdoch <benm@google.com> 2010-08-04 14:29:45 +0100
commit: c407dc5cd9bdc5668497f21b26b09d988ab439de (patch)
tree: 7eaf8707c0309516bdb042ad976feedaf72b0bb1 /googleurl
parent: 0998b1cdac5733f299c12d88bc31ef9c8035b8fa (diff)
download: external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.zip
external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.tar.gz
external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.tar.bz2
19 files changed, 908 insertions, 543 deletions
diff --git a/googleurl/src/gurl.cc b/googleurl/src/gurl.cc
index 2dab0b2..a0bfd26 100644
--- a/googleurl/src/gurl.cc
+++ b/googleurl/src/gurl.cc
@@ -304,8 +304,7 @@ GURL GURL::GetWithEmptyPath() const {
 }
 
 bool GURL::IsStandard() const {
-  return url_util::IsStandard(spec_.data(), static_cast<int>(spec_.length()),
-                              parsed_.scheme);
+  return url_util::IsStandard(spec_.data(), parsed_.scheme);
 }
 
 bool GURL::SchemeIs(const char* lower_ascii_scheme) const {
diff --git a/googleurl/src/gurl.h b/googleurl/src/gurl.h
index 36cd14c..29fea81 100644
--- a/googleurl/src/gurl.h
+++ b/googleurl/src/gurl.h
@@ -36,6 +36,7 @@
 #include "base/string16.h"
 #include "googleurl/src/url_canon.h"
 #include "googleurl/src/url_canon_stdstring.h"
+#include "googleurl/src/url_common.h"
 #include "googleurl/src/url_parse.h"
 
 class GURL {
@@ -44,11 +45,11 @@ class GURL {
   typedef url_canon::StdStringReplacements<string16> ReplacementsW;
 
   // Creates an empty, invalid URL.
-  GURL();
+  GURL_API GURL();
 
   // Copy construction is relatively inexpensive, with most of the time going
   // to reallocating the string. It does not re-parse.
-  GURL(const GURL& other);
+  GURL_API GURL(const GURL& other);
 
   // The narrow version requires the input be UTF-8. Invalid UTF-8 input will
   // result in an invalid URL.
@@ -57,14 +58,16 @@ class GURL {
   // encode the query parameters. It is probably sufficient for the narrow
   // version to assume the query parameter encoding should be the same as the
   // input encoding.
-  explicit GURL(const std::string& url_string /*, output_param_encoding*/);
-  explicit GURL(const string16& url_string /*, output_param_encoding*/);
+  GURL_API explicit GURL(const std::string& url_string
+                         /*, output_param_encoding*/);
+  GURL_API explicit GURL(const string16& url_string
+                         /*, output_param_encoding*/);
 
   // Constructor for URLs that have already been parsed and canonicalized. This
   // is used for conversions from KURL, for example. The caller must supply all
   // information associated with the URL, which must be correct and consistent.
-  GURL(const char* canonical_spec, size_t canonical_spec_len,
-       const url_parse::Parsed& parsed, bool is_valid);
+  GURL_API GURL(const char* canonical_spec, size_t canonical_spec_len,
+                const url_parse::Parsed& parsed, bool is_valid);
 
   // Returns true when this object represents a valid parsed URL. When not
   // valid, other functions will still succeed, but you will not get canonical
@@ -96,7 +99,7 @@ class GURL {
   // Used invalid_spec() below to get the unusable spec of an invalid URL. This
   // separation is designed to prevent errors that may cause security problems
   // that could result from the mistaken use of an invalid URL.
-  const std::string& spec() const;
+  GURL_API const std::string& spec() const;
 
   // Returns the potentially invalid spec for a the URL. This spec MUST NOT be
   // modified or sent over the network. It is designed to be displayed in error
@@ -148,8 +151,8 @@ class GURL {
   //
   // It is an error to resolve a URL relative to an invalid URL. The result
   // will be the empty URL.
-  GURL Resolve(const std::string& relative) const;
-  GURL Resolve(const string16& relative) const;
+  GURL_API GURL Resolve(const std::string& relative) const;
+  GURL_API GURL Resolve(const string16& relative) const;
 
   // Like Resolve() above but takes a character set encoder which will be used
   // for any query text specified in the input. The charset converter parameter
@@ -158,10 +161,10 @@ class GURL {
   // TODO(brettw): These should be replaced with versions that take something
   // more friendly than a raw CharsetConverter (maybe like an ICU character set
   // name).
-  GURL ResolveWithCharsetConverter(
+  GURL_API GURL ResolveWithCharsetConverter(
       const std::string& relative,
       url_canon::CharsetConverter* charset_converter) const;
-  GURL ResolveWithCharsetConverter(
+  GURL_API GURL ResolveWithCharsetConverter(
       const string16& relative,
       url_canon::CharsetConverter* charset_converter) const;
 
@@ -176,9 +179,9 @@ class GURL {
   //
   // Note that we use the more general url_canon::Replacements type to give
   // callers extra flexibility rather than our override.
-  GURL ReplaceComponents(
+  GURL_API GURL ReplaceComponents(
       const url_canon::Replacements<char>& replacements) const;
-  GURL ReplaceComponents(
+  GURL_API GURL ReplaceComponents(
       const url_canon::Replacements<char16>& replacements) const;
 
   // A helper function that is equivalent to replacing the path with a slash
@@ -190,7 +193,7 @@ class GURL {
   //
   // It is an error to get an empty path on an invalid URL. The result
   // will be the empty URL.
-  GURL GetWithEmptyPath() const;
+  GURL_API GURL GetWithEmptyPath() const;
 
   // A helper function to return a GURL containing just the scheme, host,
   // and port from a URL. Equivalent to clearing any username and password,
@@ -201,19 +204,19 @@ class GURL {
   //
   // It is an error to get the origin of an invalid URL. The result
   // will be the empty URL.
-  GURL GetOrigin() const;
+  GURL_API GURL GetOrigin() const;
 
   // Returns true if the scheme for the current URL is a known "standard"
-  // scheme or there is a "://" after it. Standard schemes have an authority
-  // and a path section. This includes file:, which some callers may want to
-  // filter out explicitly by calling SchemeIsFile.
-  bool IsStandard() const;
+  // scheme. Standard schemes have an authority and a path section. This
+  // includes file:, which some callers may want to filter out explicitly by
+  // calling SchemeIsFile.
+  GURL_API bool IsStandard() const;
 
   // Returns true if the given parameter (should be lower-case ASCII to match
   // the canonicalized scheme) is the scheme for this URL. This call is more
   // efficient than getting the scheme and comparing it because no copies or
   // object constructions are done.
-  bool SchemeIs(const char* lower_ascii_scheme) const;
+  GURL_API bool SchemeIs(const char* lower_ascii_scheme) const;
 
   // We often need to know if this is a file URL. File URLs are "standard", but
   // are often treated separately by some programs.
@@ -229,7 +232,7 @@ class GURL {
   // Returns true if the hostname is an IP address. Note: this function isn't
   // as cheap as a simple getter because it re-parses the hostname to verify.
   // This currently identifies only IPv4 addresses (bug 822685).
-  bool HostIsIPAddress() const;
+  GURL_API bool HostIsIPAddress() const;
 
   // Getters for various components of the URL. The returned string will be
   // empty if the component is empty or is not present.
@@ -295,24 +298,24 @@ class GURL {
 
   // Returns a parsed version of the port. Can also be any of the special
   // values defined in Parsed for ExtractPort.
-  int IntPort() const;
+  GURL_API int IntPort() const;
 
   // Returns the port number of the url, or the default port number.
   // If the scheme has no concept of port (or unknown default) returns
   // PORT_UNSPECIFIED.
-  int EffectiveIntPort() const;
+  GURL_API int EffectiveIntPort() const;
 
   // Extracts the filename portion of the path and returns it. The filename
   // is everything after the last slash in the path. This may be empty.
-  std::string ExtractFileName() const;
+  GURL_API std::string ExtractFileName() const;
 
   // Returns the path that should be sent to the server. This is the path,
   // parameter, and query portions of the URL. It is guaranteed to be ASCII.
-  std::string PathForRequest() const;
+  GURL_API std::string PathForRequest() const;
 
   // Returns the host, excluding the square brackets surrounding IPv6 address
   // literals.  This can be useful for passing to getaddrinfo().
-  std::string HostNoBrackets() const;
+  GURL_API std::string HostNoBrackets() const;
 
   // Returns true if this URL's host matches or is in the same domain as
   // the given input string. For example if this URL was "www.google.com",
@@ -324,7 +327,7 @@ class GURL {
   //
   // If function DomainIs has parameter domain_len, which means the parameter
   // lower_ascii_domain does not gurantee to terminate with NULL character.
-  bool DomainIs(const char* lower_ascii_domain, int domain_len) const;
+  GURL_API bool DomainIs(const char* lower_ascii_domain, int domain_len) const;
 
   // If function DomainIs only has parameter lower_ascii_domain, which means
   // domain string should be terminate with NULL character.
@@ -335,12 +338,12 @@ class GURL {
 
   // Swaps the contents of this GURL object with the argument without doing
   // any memory allocations.
-  void Swap(GURL* other);
+  GURL_API void Swap(GURL* other);
 
   // Returns a reference to a singleton empty GURL. This object is for callers
   // who return references but don't have anything to return in some cases.
   // This function may be called from any thread.
-  static const GURL& EmptyGURL();
+  GURL_API static const GURL& EmptyGURL();
 
  private:
   // Returns the substring of the input identified by the given component.
diff --git a/googleurl/src/gurl_unittest.cc b/googleurl/src/gurl_unittest.cc
index 4e81de6..079e1ea 100644
--- a/googleurl/src/gurl_unittest.cc
+++ b/googleurl/src/gurl_unittest.cc
@@ -31,35 +31,36 @@ void SetupReplacement(void (url_canon::Replacements<CHAR>::*func)(const CHAR*,
   }
 }
 
+// Returns the canonicalized string for the given URL string for the
+// GURLTest.Types test.
+std::string TypesTestCase(const char* src) {
+  GURL gurl(src);
+  return gurl.possibly_invalid_spec();
+}
+
 }  // namespace
 
 // Different types of URLs should be handled differently by url_util, and
 // handed off to different canonicalizers.
 TEST(GURLTest, Types) {
-  struct TypeTest {
-    const char* src;
-    const char* expected;
-  } type_cases[] = {
-      // URLs with "://" should be treated as standard and have a hostname, even
-      // when the scheme is unknown.
-    {"something:///HOSTNAME.com/", "something://hostname.com/"},
-      // In the reverse, lacking a "://" means a path URL so no canonicalization
-      // should happen.
-    {"something:HOSTNAME.com/", "something:HOSTNAME.com/"},
-    {"something:/HOSTNAME.com/", "something:/HOSTNAME.com/"},
+  // URLs with unknown schemes should be treated as path URLs, even when they
+  // have things like "://".
+  EXPECT_EQ("something:///HOSTNAME.com/",
+            TypesTestCase("something:///HOSTNAME.com/"));
+
+  // In the reverse, known schemes should always trigger standard URL handling.
+  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
+  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
+  EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
+  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
+
 #ifdef WIN32
-      // URLs that look like absolute Windows drive specs.
-    {"c:\\foo.txt", "file:///C:/foo.txt"},
-    {"Z|foo.txt", "file:///Z:/foo.txt"},
-    {"\\\\server\\foo.txt", "file://server/foo.txt"},
-    {"//server/foo.txt", "file://server/foo.txt"},
+  // URLs that look like absolute Windows drive specs.
+  EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
+  EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
+  EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
+  EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt"));
 #endif
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(type_cases); i++) {
-    GURL gurl(type_cases[i].src);
-    EXPECT_STREQ(type_cases[i].expected, gurl.spec().c_str());
-  }
 }
 
 // Test the basic creation and querying of components in a GURL. We assume
@@ -166,9 +167,7 @@ TEST(GURLTest, Resolve) {
     {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
     {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
     {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"},
-      // Unknown schemes with a "://" should be treated as standard.
-    {"somescheme://foo/", "bar", true, "somescheme://foo/bar"},
-      // Unknown schemes with no "://" are not standard.
+      // Unknown schemes are not standard.
     {"data:blahblah", "http://google.com/", true, "http://google.com/"},
     {"data:blahblah", "http:google.com", true, "http://google.com/"},
     {"data:/blahblah", "file.html", false, ""},
@@ -178,15 +177,15 @@ TEST(GURLTest, Resolve) {
     // 8-bit code path.
     GURL input(resolve_cases[i].base);
     GURL output = input.Resolve(resolve_cases[i].relative);
-    EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid());
-    EXPECT_EQ(resolve_cases[i].expected, output.spec());
+    EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i;
+    EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i;
 
     // Wide code path.
     GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base));
     GURL outputw =
         input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative));
-    EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid());
-    EXPECT_EQ(resolve_cases[i].expected, outputw.spec());
+    EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i;
+    EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i;
   }
 }
 
@@ -429,5 +428,5 @@ TEST(GURLTest, IsStandard) {
   EXPECT_FALSE(b.IsStandard());
 
   GURL c("foo://bar/baz");
-  EXPECT_TRUE(c.IsStandard());
+  EXPECT_FALSE(c.IsStandard());
 }
diff --git a/googleurl/src/url_canon.h b/googleurl/src/url_canon.h
index 143574d..e2cfb55 100644
--- a/googleurl/src/url_canon.h
+++ b/googleurl/src/url_canon.h
@@ -33,6 +33,7 @@
 #include <stdlib.h>
 
 #include "base/string16.h"
+#include "googleurl/src/url_common.h"
 #include "googleurl/src/url_parse.h"
 
 namespace url_canon {
@@ -248,12 +249,12 @@ class CharsetConverter {
 //
 // Therefore, callers should not use the buffer, since it may actuall be empty,
 // use the computed pointer and |*output_len| instead.
-const char* RemoveURLWhitespace(const char* input, int input_len,
-                                CanonOutputT<char>* buffer,
-                                int* output_len);
-const char16* RemoveURLWhitespace(const char16* input, int input_len,
-                                  CanonOutputT<char16>* buffer,
-                                  int* output_len);
+GURL_API const char* RemoveURLWhitespace(const char* input, int input_len,
+                                         CanonOutputT<char>* buffer,
+                                         int* output_len);
+GURL_API const char16* RemoveURLWhitespace(const char16* input, int input_len,
+                                           CanonOutputT<char16>* buffer,
+                                           int* output_len);
 
 // IDN ------------------------------------------------------------------------
 
@@ -266,7 +267,7 @@ const char16* RemoveURLWhitespace(const char16* input, int input_len,
 // the length of the output will be set to the length of the new host name.
 //
 // On error, returns false. The output in this case is undefined.
-bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output);
+GURL_API bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output);
 
 // Piece-by-piece canonicalizers ----------------------------------------------
 //
@@ -292,14 +293,14 @@ bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output);
 // URLs.
 //
 // The 8-bit version requires UTF-8 encoding.
-bool CanonicalizeScheme(const char* spec,
-                        const url_parse::Component& scheme,
-                        CanonOutput* output,
-                        url_parse::Component* out_scheme);
-bool CanonicalizeScheme(const char16* spec,
-                        const url_parse::Component& scheme,
-                        CanonOutput* output,
-                        url_parse::Component* out_scheme);
+GURL_API bool CanonicalizeScheme(const char* spec,
+                                 const url_parse::Component& scheme,
+                                 CanonOutput* output,
+                                 url_parse::Component* out_scheme);
+GURL_API bool CanonicalizeScheme(const char16* spec,
+                                 const url_parse::Component& scheme,
+                                 CanonOutput* output,
+                                 url_parse::Component* out_scheme);
 
 // User info: username/password. If present, this will add the delimiters so
 // the output will be "<username>:<password>@" or "<username>@". Empty
@@ -311,20 +312,20 @@ bool CanonicalizeScheme(const char16* spec,
 // is legal as long as the two components don't overlap.
 //
 // The 8-bit version requires UTF-8 encoding.
-bool CanonicalizeUserInfo(const char* username_source,
-                          const url_parse::Component& username,
-                          const char* password_source,
-                          const url_parse::Component& password,
-                          CanonOutput* output,
-                          url_parse::Component* out_username,
-                          url_parse::Component* out_password);
-bool CanonicalizeUserInfo(const char16* username_source,
-                          const url_parse::Component& username,
-                          const char16* password_source,
-                          const url_parse::Component& password,
-                          CanonOutput* output,
-                          url_parse::Component* out_username,
-                          url_parse::Component* out_password);
+GURL_API bool CanonicalizeUserInfo(const char* username_source,
+                                   const url_parse::Component& username,
+                                   const char* password_source,
+                                   const url_parse::Component& password,
+                                   CanonOutput* output,
+                                   url_parse::Component* out_username,
+                                   url_parse::Component* out_password);
+GURL_API bool CanonicalizeUserInfo(const char16* username_source,
+                                   const url_parse::Component& username,
+                                   const char16* password_source,
+                                   const url_parse::Component& password,
+                                   CanonOutput* output,
+                                   url_parse::Component* out_username,
+                                   url_parse::Component* out_password);
 
 
 // This structure holds detailed state exported from the IP/Host canonicalizers.
@@ -366,27 +367,27 @@ struct CanonHostInfo {
 //
 // The 8-bit version requires UTF-8 encoding.  Use this version when you only
 // need to know whether canonicalization succeeded.
-bool CanonicalizeHost(const char* spec,
-                      const url_parse::Component& host,
-                      CanonOutput* output,
-                      url_parse::Component* out_host);
-bool CanonicalizeHost(const char16* spec,
-                      const url_parse::Component& host,
-                      CanonOutput* output,
-                      url_parse::Component* out_host);
+GURL_API bool CanonicalizeHost(const char* spec,
+                               const url_parse::Component& host,
+                               CanonOutput* output,
+                               url_parse::Component* out_host);
+GURL_API bool CanonicalizeHost(const char16* spec,
+                               const url_parse::Component& host,
+                               CanonOutput* output,
+                               url_parse::Component* out_host);
 
 // Extended version of CanonicalizeHost, which returns additional information.
 // Use this when you need to know whether the hostname was an IP address.
 // A successful return is indicated by host_info->family != BROKEN.  See the
 // definition of CanonHostInfo above for details.
-void CanonicalizeHostVerbose(const char* spec,
-                             const url_parse::Component& host,
-                             CanonOutput* output,
-                             CanonHostInfo* host_info);
-void CanonicalizeHostVerbose(const char16* spec,
-                             const url_parse::Component& host,
-                             CanonOutput* output,
-                             CanonHostInfo* host_info);
+GURL_API void CanonicalizeHostVerbose(const char* spec,
+                                      const url_parse::Component& host,
+                                      CanonOutput* output,
+                                      CanonHostInfo* host_info);
+GURL_API void CanonicalizeHostVerbose(const char16* spec,
+                                      const url_parse::Component& host,
+                                      CanonOutput* output,
+                                      CanonHostInfo* host_info);
 
 
 // IP addresses.
@@ -399,34 +400,34 @@ void CanonicalizeHostVerbose(const char16* spec,
 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that
 // the input is unescaped and name-prepped, etc. It should not normally be
 // necessary or wise to call this directly.
-void CanonicalizeIPAddress(const char* spec,
-                           const url_parse::Component& host,
-                           CanonOutput* output,
-                           CanonHostInfo* host_info);
-void CanonicalizeIPAddress(const char16* spec,
-                           const url_parse::Component& host,
-                           CanonOutput* output,
-                           CanonHostInfo* host_info);
+GURL_API void CanonicalizeIPAddress(const char* spec,
+                                    const url_parse::Component& host,
+                                    CanonOutput* output,
+                                    CanonHostInfo* host_info);
+GURL_API void CanonicalizeIPAddress(const char16* spec,
+                                    const url_parse::Component& host,
+                                    CanonOutput* output,
+                                    CanonHostInfo* host_info);
 
 // Port: this function will add the colon for the port if a port is present.
 // The caller can pass url_parse::PORT_UNSPECIFIED as the
 // default_port_for_scheme argument if there is no default port.
 //
 // The 8-bit version requires UTF-8 encoding.
-bool CanonicalizePort(const char* spec,
-                      const url_parse::Component& port,
-                      int default_port_for_scheme,
-                      CanonOutput* output,
-                      url_parse::Component* out_port);
-bool CanonicalizePort(const char16* spec,
-                      const url_parse::Component& port,
-                      int default_port_for_scheme,
-                      CanonOutput* output,
-                      url_parse::Component* out_port);
+GURL_API bool CanonicalizePort(const char* spec,
+                               const url_parse::Component& port,
+                               int default_port_for_scheme,
+                               CanonOutput* output,
+                               url_parse::Component* out_port);
+GURL_API bool CanonicalizePort(const char16* spec,
+                               const url_parse::Component& port,
+                               int default_port_for_scheme,
+                               CanonOutput* output,
+                               url_parse::Component* out_port);
 
 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
 // if the scheme is unknown.
-int DefaultPortForScheme(const char* scheme, int scheme_len);
+GURL_API int DefaultPortForScheme(const char* scheme, int scheme_len);
 
 // Path. If the input does not begin in a slash (including if the input is
 // empty), we'll prepend a slash to the path to make it canonical.
@@ -437,14 +438,14 @@ int DefaultPortForScheme(const char* scheme, int scheme_len);
 // an issue. Somebody giving us an 8-bit path is responsible for generating
 // the path that the server expects (we'll escape high-bit characters), so
 // if something is invalid, it's their problem.
-bool CanonicalizePath(const char* spec,
-                      const url_parse::Component& path,
-                      CanonOutput* output,
-                      url_parse::Component* out_path);
-bool CanonicalizePath(const char16* spec,
-                      const url_parse::Component& path,
-                      CanonOutput* output,
-                      url_parse::Component* out_path);
+GURL_API bool CanonicalizePath(const char* spec,
+                               const url_parse::Component& path,
+                               CanonOutput* output,
+                               url_parse::Component* out_path);
+GURL_API bool CanonicalizePath(const char16* spec,
+                               const url_parse::Component& path,
+                               CanonOutput* output,
+                               url_parse::Component* out_path);
 
 // Canonicalizes the input as a file path. This is like CanonicalizePath except
 // that it also handles Windows drive specs. For example, the path can begin
@@ -452,14 +453,14 @@ bool CanonicalizePath(const char16* spec,
 // The string will be appended to |*output| and |*out_path| will be updated.
 //
 // The 8-bit version requires UTF-8 encoding.
-bool FileCanonicalizePath(const char* spec,
-                          const url_parse::Component& path,
-                          CanonOutput* output,
-                          url_parse::Component* out_path);
-bool FileCanonicalizePath(const char16* spec,
-                          const url_parse::Component& path,
-                          CanonOutput* output,
-                          url_parse::Component* out_path);
+GURL_API bool FileCanonicalizePath(const char* spec,
+                                   const url_parse::Component& path,
+                                   CanonOutput* output,
+                                   url_parse::Component* out_path);
+GURL_API bool FileCanonicalizePath(const char16* spec,
+                                   const url_parse::Component& path,
+                                   CanonOutput* output,
+                                   url_parse::Component* out_path);
 
 // Query: Prepends the ? if needed.
 //
@@ -473,16 +474,16 @@ bool FileCanonicalizePath(const char16* spec,
 // if necessary, for ASCII input, no conversions are necessary.
 //
 // The converter can be NULL. In this case, the output encoding will be UTF-8.
-void CanonicalizeQuery(const char* spec,
-                       const url_parse::Component& query,
-                       CharsetConverter* converter,
-                       CanonOutput* output,
-                       url_parse::Component* out_query);
-void CanonicalizeQuery(const char16* spec,
-                       const url_parse::Component& query,
-                       CharsetConverter* converter,
-                       CanonOutput* output,
-                       url_parse::Component* out_query);
+GURL_API void CanonicalizeQuery(const char* spec,
+                                const url_parse::Component& query,
+                                CharsetConverter* converter,
+                                CanonOutput* output,
+                                url_parse::Component* out_query);
+GURL_API void CanonicalizeQuery(const char16* spec,
+                                const url_parse::Component& query,
+                                CharsetConverter* converter,
+                                CanonOutput* output,
+                                url_parse::Component* out_query);
 
 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only
 // canonicalizer that does not produce ASCII output). The output is
@@ -490,14 +491,14 @@ void CanonicalizeQuery(const char16* spec,
 //
 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use
 // the "Unicode replacement character" for the confusing bits and copy the rest.
-void CanonicalizeRef(const char* spec,
-                     const url_parse::Component& path,
-                     CanonOutput* output,
-                     url_parse::Component* out_path);
-void CanonicalizeRef(const char16* spec,
-                     const url_parse::Component& path,
-                     CanonOutput* output,
-                     url_parse::Component* out_path);
+GURL_API void CanonicalizeRef(const char* spec,
+                              const url_parse::Component& path,
+                              CanonOutput* output,
+                              url_parse::Component* out_path);
+GURL_API void CanonicalizeRef(const char16* spec,
+                              const url_parse::Component& path,
+                              CanonOutput* output,
+                              url_parse::Component* out_path);
 
 // Full canonicalizer ---------------------------------------------------------
 //
@@ -510,61 +511,61 @@ void CanonicalizeRef(const char16* spec,
 // The 8-bit versions require UTF-8 encoding.
 
 // Use for standard URLs with authorities and paths.
-bool CanonicalizeStandardURL(const char* spec,
-                             int spec_len,
-                             const url_parse::Parsed& parsed,
-                             CharsetConverter* query_converter,
-                             CanonOutput* output,
-                             url_parse::Parsed* new_parsed);
-bool CanonicalizeStandardURL(const char16* spec,
-                             int spec_len,
-                             const url_parse::Parsed& parsed,
-                             CharsetConverter* query_converter,
-                             CanonOutput* output,
-                             url_parse::Parsed* new_parsed);
+GURL_API bool CanonicalizeStandardURL(const char* spec,
+                                      int spec_len,
+                                      const url_parse::Parsed& parsed,
+                                      CharsetConverter* query_converter,
+                                      CanonOutput* output,
+                                      url_parse::Parsed* new_parsed);
+GURL_API bool CanonicalizeStandardURL(const char16* spec,
+                                      int spec_len,
+                                      const url_parse::Parsed& parsed,
+                                      CharsetConverter* query_converter,
+                                      CanonOutput* output,
+                                      url_parse::Parsed* new_parsed);
 
 // Use for file URLs.
-bool CanonicalizeFileURL(const char* spec,
-                         int spec_len,
-                         const url_parse::Parsed& parsed,
-                         CharsetConverter* query_converter,
-                         CanonOutput* output,
-                         url_parse::Parsed* new_parsed);
-bool CanonicalizeFileURL(const char16* spec,
-                         int spec_len,
-                         const url_parse::Parsed& parsed,
-                         CharsetConverter* query_converter,
-                         CanonOutput* output,
-                         url_parse::Parsed* new_parsed);
+GURL_API bool CanonicalizeFileURL(const char* spec,
+                                  int spec_len,
+                                  const url_parse::Parsed& parsed,
+                                  CharsetConverter* query_converter,
+                                  CanonOutput* output,
+                                  url_parse::Parsed* new_parsed);
+GURL_API bool CanonicalizeFileURL(const char16* spec,
+                                  int spec_len,
+                                  const url_parse::Parsed& parsed,
+                                  CharsetConverter* query_converter,
+                                  CanonOutput* output,
+                                  url_parse::Parsed* new_parsed);
 
 // Use for path URLs such as javascript. This does not modify the path in any
 // way, for example, by escaping it.
-bool CanonicalizePathURL(const char* spec,
-                         int spec_len,
-                         const url_parse::Parsed& parsed,
-                         CanonOutput* output,
-                         url_parse::Parsed* new_parsed);
-bool CanonicalizePathURL(const char16* spec,
-                         int spec_len,
-                         const url_parse::Parsed& parsed,
-                         CanonOutput* output,
-                         url_parse::Parsed* new_parsed);
+GURL_API bool CanonicalizePathURL(const char* spec,
+                                  int spec_len,
+                                  const url_parse::Parsed& parsed,
+                                  CanonOutput* output,
+                                  url_parse::Parsed* new_parsed);
+GURL_API bool CanonicalizePathURL(const char16* spec,
+                                  int spec_len,
+                                  const url_parse::Parsed& parsed,
+                                  CanonOutput* output,
+                                  url_parse::Parsed* new_parsed);
 
 // Use for mailto URLs. This "canonicalizes" the url into a path and query
 // component. It does not attempt to merge "to" fields. It uses UTF-8 for
 // the query encoding if there is a query. This is because a mailto URL is
 // really intended for an external mail program, and the encoding of a page,
 // etc. which would influence a query encoding normally are irrelevant.
-bool CanonicalizeMailtoURL(const char* spec,
-                           int spec_len,
-                           const url_parse::Parsed& parsed,
-                           CanonOutput* output,
-                           url_parse::Parsed* new_parsed);
-bool CanonicalizeMailtoURL(const char16* spec,
-                           int spec_len,
-                           const url_parse::Parsed& parsed,
-                           CanonOutput* output,
-                           url_parse::Parsed* new_parsed);
+GURL_API bool CanonicalizeMailtoURL(const char* spec,
+                                    int spec_len,
+                                    const url_parse::Parsed& parsed,
+                                    CanonOutput* output,
+                                    url_parse::Parsed* new_parsed);
+GURL_API bool CanonicalizeMailtoURL(const char16* spec,
+                                    int spec_len,
+                                    const url_parse::Parsed& parsed,
+                                    CanonOutput* output,
+                                    url_parse::Parsed* new_parsed);
 
 // Part replacer --------------------------------------------------------------
 
@@ -585,7 +586,7 @@ template<typename CHAR>
 struct URLComponentSource {
   // Constructor normally used by callers wishing to replace components. This
   // will make them all NULL, which is no replacement. The caller would then
-  // override the compoents they want to replace.
+  // override the components they want to replace.
   URLComponentSource()
       : scheme(NULL),
         username(NULL),
@@ -749,59 +750,59 @@ class Replacements {
 };
 
 // The base must be an 8-bit canonical URL.
-bool ReplaceStandardURL(const char* base,
-                        const url_parse::Parsed& base_parsed,
-                        const Replacements<char>& replacements,
-                        CharsetConverter* query_converter,
-                        CanonOutput* output,
-                        url_parse::Parsed* new_parsed);
-bool ReplaceStandardURL(const char* base,
-                        const url_parse::Parsed& base_parsed,
-                        const Replacements<char16>& replacements,
-                        CharsetConverter* query_converter,
-                        CanonOutput* output,
-                        url_parse::Parsed* new_parsed);
+GURL_API bool ReplaceStandardURL(const char* base,
+                                 const url_parse::Parsed& base_parsed,
+                                 const Replacements<char>& replacements,
+                                 CharsetConverter* query_converter,
+                                 CanonOutput* output,
+                                 url_parse::Parsed* new_parsed);
+GURL_API bool ReplaceStandardURL(const char* base,
+                                 const url_parse::Parsed& base_parsed,
+                                 const Replacements<char16>& replacements,
+                                 CharsetConverter* query_converter,
+                                 CanonOutput* output,
+                                 url_parse::Parsed* new_parsed);
 
 // Replacing some parts of a file URL is not permitted. Everything except
 // the host, path, query, and ref will be ignored.
-bool ReplaceFileURL(const char* base,
-                    const url_parse::Parsed& base_parsed,
-                    const Replacements<char>& replacements,
-                    CharsetConverter* query_converter,
-                    CanonOutput* output,
-                    url_parse::Parsed* new_parsed);
-bool ReplaceFileURL(const char* base,
-                    const url_parse::Parsed& base_parsed,
-                    const Replacements<char16>& replacements,
-                    CharsetConverter* query_converter,
-                    CanonOutput* output,
-                    url_parse::Parsed* new_parsed);
+GURL_API bool ReplaceFileURL(const char* base,
+                             const url_parse::Parsed& base_parsed,
+                             const Replacements<char>& replacements,
+                             CharsetConverter* query_converter,
+                             CanonOutput* output,
+                             url_parse::Parsed* new_parsed);
+GURL_API bool ReplaceFileURL(const char* base,
+                             const url_parse::Parsed& base_parsed,
+                             const Replacements<char16>& replacements,
+                             CharsetConverter* query_converter,
+                             CanonOutput* output,
+                             url_parse::Parsed* new_parsed);
 
 // Path URLs can only have the scheme and path replaced. All other components
 // will be ignored.
-bool ReplacePathURL(const char* base,
-                    const url_parse::Parsed& base_parsed,
-                    const Replacements<char>& replacements,
-                    CanonOutput* output,
-                    url_parse::Parsed* new_parsed);
-bool ReplacePathURL(const char* base,
-                    const url_parse::Parsed& base_parsed,
-                    const Replacements<char16>& replacements,
-                    CanonOutput* output,
-                    url_parse::Parsed* new_parsed);
+GURL_API bool ReplacePathURL(const char* base,
+                             const url_parse::Parsed& base_parsed,
+                             const Replacements<char>& replacements,
+                             CanonOutput* output,
+                             url_parse::Parsed* new_parsed);
+GURL_API bool ReplacePathURL(const char* base,
+                             const url_parse::Parsed& base_parsed,
+                             const Replacements<char16>& replacements,
+                             CanonOutput* output,
+                             url_parse::Parsed* new_parsed);
 
 // Mailto URLs can only have the scheme, path, and query replaced.
 // All other components will be ignored.
-bool ReplaceMailtoURL(const char* base,
-                      const url_parse::Parsed& base_parsed,
-                      const Replacements<char>& replacements,
-                      CanonOutput* output,
-                      url_parse::Parsed* new_parsed);
-bool ReplaceMailtoURL(const char* base,
-                      const url_parse::Parsed& base_parsed,
-                      const Replacements<char16>& replacements,
-                      CanonOutput* output,
-                      url_parse::Parsed* new_parsed);
+GURL_API bool ReplaceMailtoURL(const char* base,
+                               const url_parse::Parsed& base_parsed,
+                               const Replacements<char>& replacements,
+                               CanonOutput* output,
+                               url_parse::Parsed* new_parsed);
+GURL_API bool ReplaceMailtoURL(const char* base,
+                               const url_parse::Parsed& base_parsed,
+                               const Replacements<char16>& replacements,
+                               CanonOutput* output,
+                               url_parse::Parsed* new_parsed);
 
 // Relative URL ---------------------------------------------------------------
 
@@ -816,20 +817,20 @@ bool ReplaceMailtoURL(const char* base,
 // not). Failure means that the combination of URLs doesn't make any sense.
 //
 // The base URL should always be canonical, therefore is ASCII.
-bool IsRelativeURL(const char* base,
-                   const url_parse::Parsed& base_parsed,
-                   const char* fragment,
-                   int fragment_len,
-                   bool is_base_hierarchical,
-                   bool* is_relative,
-                   url_parse::Component* relative_component);
-bool IsRelativeURL(const char* base,
-                   const url_parse::Parsed& base_parsed,
-                   const char16* fragment,
-                   int fragment_len,
-                   bool is_base_hierarchical,
-                   bool* is_relative,
-                   url_parse::Component* relative_component);
+GURL_API bool IsRelativeURL(const char* base,
+                            const url_parse::Parsed& base_parsed,
+                            const char* fragment,
+                            int fragment_len,
+                            bool is_base_hierarchical,
+                            bool* is_relative,
+                            url_parse::Component* relative_component);
+GURL_API bool IsRelativeURL(const char* base,
+                            const url_parse::Parsed& base_parsed,
+                            const char16* fragment,
+                            int fragment_len,
+                            bool is_base_hierarchical,
+                            bool* is_relative,
+                            url_parse::Component* relative_component);
 
 // Given a canonical parsed source URL, a URL fragment known to be relative,
 // and the identified relevant portion of the relative URL (computed by
@@ -849,22 +850,22 @@ bool IsRelativeURL(const char* base,
 // Returns true on success. On failure, the output will be "something
 // reasonable" that will be consistent and valid, just probably not what
 // was intended by the web page author or caller.
-bool ResolveRelativeURL(const char* base_url,
-                        const url_parse::Parsed& base_parsed,
-                        bool base_is_file,
-                        const char* relative_url,
-                        const url_parse::Component& relative_component,
-                        CharsetConverter* query_converter,
-                        CanonOutput* output,
-                        url_parse::Parsed* out_parsed);
-bool ResolveRelativeURL(const char* base_url,
-                        const url_parse::Parsed& base_parsed,
-                        bool base_is_file,
-                        const char16* relative_url,
-                        const url_parse::Component& relative_component,
-                        CharsetConverter* query_converter,
-                        CanonOutput* output,
-                        url_parse::Parsed* out_parsed);
+GURL_API bool ResolveRelativeURL(const char* base_url,
+                                 const url_parse::Parsed& base_parsed,
+                                 bool base_is_file,
+                                 const char* relative_url,
+                                 const url_parse::Component& relative_component,
+                                 CharsetConverter* query_converter,
+                                 CanonOutput* output,
+                                 url_parse::Parsed* out_parsed);
+GURL_API bool ResolveRelativeURL(const char* base_url,
+                                 const url_parse::Parsed& base_parsed,
+                                 bool base_is_file,
+                                 const char16* relative_url,
+                                 const url_parse::Component& relative_component,
+                                 CharsetConverter* query_converter,
+                                 CanonOutput* output,
+                                 url_parse::Parsed* out_parsed);
 
 }  // namespace url_canon
 
diff --git a/googleurl/src/url_canon_etc.cc b/googleurl/src/url_canon_etc.cc
index 672b187..aea181a 100644
--- a/googleurl/src/url_canon_etc.cc
+++ b/googleurl/src/url_canon_etc.cc
@@ -120,6 +120,11 @@ bool DoScheme(const CHAR* spec,
   // The output scheme starts from the current position.
   out_scheme->begin = output->length();
 
+  // Danger: it's important that this code does not strip any characters: it
+  // only emits the canonical version (be it valid or escaped) of each of
+  // the input characters. Stripping would put it out of sync with
+  // url_util::FindAndCompareScheme, which could cause some security checks on
+  // schemes to be incorrect.
   bool success = true;
   int end = scheme.end();
   for (int i = scheme.begin; i < end; i++) {
diff --git a/googleurl/src/url_canon_icu.h b/googleurl/src/url_canon_icu.h
index 3980663..6bc52c3 100644
--- a/googleurl/src/url_canon_icu.h
+++ b/googleurl/src/url_canon_icu.h
@@ -45,13 +45,13 @@ class ICUCharsetConverter : public CharsetConverter {
   // Constructs a converter using an already-existing ICU character set
   // converter. This converter is NOT owned by this object; the lifetime must
   // be managed by the creator such that it is alive as long as this is.
-  ICUCharsetConverter(UConverter* converter);
+  GURL_API ICUCharsetConverter(UConverter* converter);
 
-  virtual ~ICUCharsetConverter() {}
+  GURL_API virtual ~ICUCharsetConverter() {}
 
-  virtual void ConvertFromUTF16(const char16* input,
-                                int input_len,
-                                CanonOutput* output);
+  GURL_API virtual void ConvertFromUTF16(const char16* input,
+                                         int input_len,
+                                         CanonOutput* output);
 
  private:
   // The ICU converter, not owned by this class.
diff --git a/googleurl/src/url_canon_ip.cc b/googleurl/src/url_canon_ip.cc
index d84ff7d..86f7c9c 100644
--- a/googleurl/src/url_canon_ip.cc
+++ b/googleurl/src/url_canon_ip.cc
@@ -58,11 +58,14 @@ template<typename CHAR, typename UCHAR>
 bool DoFindIPv4Components(const CHAR* spec,
                           const url_parse::Component& host,
                           url_parse::Component components[4]) {
+  if (!host.is_nonempty())
+    return false;
+
   int cur_component = 0;  // Index of the component we're working on.
   int cur_component_begin = host.begin;  // Start of the current component.
   int end = host.end();
   for (int i = host.begin; /* nothing */; i++) {
-    if (i == end || spec[i] == '.') {
+    if (i >= end || spec[i] == '.') {
       // Found the end of the current component.
       int component_len = i - cur_component_begin;
       components[cur_component] =
@@ -76,10 +79,10 @@ bool DoFindIPv4Components(const CHAR* spec,
       // allow an empty component at the end (this would indicate that the
       // input ends in a dot). We also want to error if the component is
       // empty and it's the only component (cur_component == 1).
-      if (component_len == 0 && (i != end || cur_component == 1))
+      if (component_len == 0 && (i < end || cur_component == 1))
         return false;
 
-      if (i == end)
+      if (i >= end)
         break;  // End of the input.
 
       if (cur_component == 4) {
@@ -537,8 +540,8 @@ bool DoIPv6AddressToNumber(const CHAR* spec,
   if (ipv6_parsed.ipv4_component.is_valid()) {
     // We only allow the embedded IPv4 syntax to be used for "compat" and
     // "mapped" formats:
-    //     "compat" ==>  0:0:0:0:0:ffff:<IPv4-literal>
-    //     "mapped" ==>  0:0:0:0:0:0000:<IPv4-literal>
+    //     "mapped" ==>  0:0:0:0:0:ffff:<IPv4-literal>
+    //     "compat" ==>  0:0:0:0:0:0000:<IPv4-literal>
     for (int j = 0; j < 10; ++j) {
       if (address[j] != 0)
         return false;
diff --git a/googleurl/src/url_canon_ip.h b/googleurl/src/url_canon_ip.h
index 6ce069d..0a01c9f 100644
--- a/googleurl/src/url_canon_ip.h
+++ b/googleurl/src/url_canon_ip.h
@@ -32,6 +32,7 @@
 
 #include "base/string16.h"
 #include "googleurl/src/url_canon.h"
+#include "googleurl/src/url_common.h"
 #include "googleurl/src/url_parse.h"
 
 namespace url_canon {
@@ -54,12 +55,12 @@ namespace url_canon {
 // Mozilla), so this code path never gets hit. Our host canonicalization will
 // notice these spaces and escape them, which will make IP address finding
 // fail. This seems like better behavior than stripping after a space.
-bool FindIPv4Components(const char* spec,
-                        const url_parse::Component& host,
-                        url_parse::Component components[4]);
-bool FindIPv4Components(const char16* spec,
-                        const url_parse::Component& host,
-                        url_parse::Component components[4]);
+GURL_API bool FindIPv4Components(const char* spec,
+                                 const url_parse::Component& host,
+                                 url_parse::Component components[4]);
+GURL_API bool FindIPv4Components(const char16* spec,
+                                 const url_parse::Component& host,
+                                 url_parse::Component components[4]);
 
 // Converts an IPv4 address to a 32-bit number (network byte order).
 //
@@ -72,26 +73,28 @@ bool FindIPv4Components(const char16* spec,
 //
 // On success, |num_ipv4_components| will be populated with the number of
 // components in the IPv4 address.
-CanonHostInfo::Family IPv4AddressToNumber(const char* spec,
-                                          const url_parse::Component& host,
-                                          unsigned char address[4],
-                                          int* num_ipv4_components);
-CanonHostInfo::Family IPv4AddressToNumber(const char16* spec,
-                                          const url_parse::Component& host,
-                                          unsigned char address[4],
-                                          int* num_ipv4_components);
+GURL_API CanonHostInfo::Family IPv4AddressToNumber(
+    const char* spec,
+    const url_parse::Component& host,
+    unsigned char address[4],
+    int* num_ipv4_components);
+GURL_API CanonHostInfo::Family IPv4AddressToNumber(
+    const char16* spec,
+    const url_parse::Component& host,
+    unsigned char address[4],
+    int* num_ipv4_components);
 
 // Converts an IPv6 address to a 128-bit number (network byte order), returning
 // true on success. False means that the input was not a valid IPv6 address.
 //
 // NOTE that |host| is expected to be surrounded by square brackets.
 // i.e. "[::1]" rather than "::1".
-bool IPv6AddressToNumber(const char* spec,
-                         const url_parse::Component& host,
-                         unsigned char address[16]);
-bool IPv6AddressToNumber(const char16* spec,
-                         const url_parse::Component& host,
-                         unsigned char address[16]);
+GURL_API bool IPv6AddressToNumber(const char* spec,
+                                  const url_parse::Component& host,
+                                  unsigned char address[16]);
+GURL_API bool IPv6AddressToNumber(const char16* spec,
+                                  const url_parse::Component& host,
+                                  unsigned char address[16]);
 
 }  // namespace url_canon
 
diff --git a/googleurl/src/url_canon_path.cc b/googleurl/src/url_canon_path.cc
index 98ca40b..df97aad 100644
--- a/googleurl/src/url_canon_path.cc
+++ b/googleurl/src/url_canon_path.cc
@@ -84,7 +84,7 @@ const unsigned char kPathCharLookup[0x100] = {
 //   0        1        2        3        4        5        6        7        8        9        :        ;        <        =        >        ?
      UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,PASS,    PASS,    ESCAPE,  PASS,    ESCAPE,  ESCAPE,
 //   @        A        B        C        D        E        F        G        H        I        J        K        L        M        N        O
-     UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,
+     PASS,    UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,
 //   P        Q        R        S        T        U        V        W        X        Y        Z        [        \        ]        ^        _
      UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,PASS,    ESCAPE,  PASS,    ESCAPE,  UNESCAPE,
 //   `        a        b        c        d        e        f        g        h        i        j        k        l        m        n        o
diff --git a/googleurl/src/url_canon_relative.cc b/googleurl/src/url_canon_relative.cc
index 446b951..6bcc72f 100644
--- a/googleurl/src/url_canon_relative.cc
+++ b/googleurl/src/url_canon_relative.cc
@@ -457,10 +457,11 @@ bool DoResolveRelativeURL(const char* base_url,
   }
 
   if (relative_component.len <= 0) {
-    // Empty relative URL, make no changes.
+    // Empty relative URL, leave unchanged, only removing the ref component.
     int base_len = base_parsed.Length();
-    for (int i = 0; i < base_len; i++)
-      output->push_back(base_url[i]);
+    base_len -= base_parsed.ref.len + 1;
+    out_parsed->ref.reset();
+    output->Append(base_url, base_len);
     return true;
   }
 
diff --git a/googleurl/src/url_canon_stdstring.h b/googleurl/src/url_canon_stdstring.h
index 2241eb1..c43b777 100644
--- a/googleurl/src/url_canon_stdstring.h
+++ b/googleurl/src/url_canon_stdstring.h
@@ -31,15 +31,15 @@
 // strings. Because the canonicalizer tries not to be dependent on the STL,
 // we have segregated it here.
 
-#ifndef GOOGLEURL_SRC_URL_CANON_STRING_H__
-#define GOOGLEURL_SRC_URL_CANON_STRING_H__
+#ifndef GOOGLEURL_SRC_URL_CANON_STDSTRING_H__
+#define GOOGLEURL_SRC_URL_CANON_STDSTRING_H__
 
 #include <string>
 #include "googleurl/src/url_canon.h"
 
 namespace url_canon {
 
-// Write into a std::string given in the constructor. This object odes not own
+// Write into a std::string given in the constructor. This object does not own
 // the string itself, and the user must ensure that the string stays alive
 // throughout the lifetime of this object.
 //
@@ -82,7 +82,7 @@ class StdStringCanonOutput : public CanonOutput {
   }
 
  protected:
-   std::string* str_;
+  std::string* str_;
 };
 
 // An extension of the Replacements class that allows the setters to use
@@ -130,4 +130,5 @@ class StdStringReplacements :
 
 }  // namespace url_canon
 
-#endif  // GOOGLEURL_SRC_URL_CANON_STRING_H__
+#endif  // GOOGLEURL_SRC_URL_CANON_STDSTRING_H__
+
diff --git a/googleurl/src/url_canon_stdurl.cc b/googleurl/src/url_canon_stdurl.cc
index 41a8fa9..1e21a14 100644
--- a/googleurl/src/url_canon_stdurl.cc
+++ b/googleurl/src/url_canon_stdurl.cc
@@ -170,6 +170,15 @@ bool CanonicalizeStandardURL(const char16* spec,
       output, new_parsed);
 }
 
+// It might be nice in the future to optimize this so unchanged components don't
+// need to be recanonicalized. This is especially true since the common case for
+// ReplaceComponents is removing things we don't want, like reference fragments
+// and usernames. These cases can become more efficient if we can assume the
+// rest of the URL is OK with these removed (or only the modified parts
+// recanonicalized). This would be much more complex to implement, however.
+//
+// You would also need to update DoReplaceComponents in url_util.cc which
+// relies on this re-checking everything (see the comment there for why).
 bool ReplaceStandardURL(const char* base,
                         const url_parse::Parsed& base_parsed,
                         const Replacements<char>& replacements,
diff --git a/googleurl/src/url_canon_unittest.cc b/googleurl/src/url_canon_unittest.cc
index c5be423..a3e43e2 100644
--- a/googleurl/src/url_canon_unittest.cc
+++ b/googleurl/src/url_canon_unittest.cc
@@ -766,6 +766,22 @@ TEST(URLCanonTest, IPv6) {
   }
 }
 
+TEST(URLCanonTest, IPEmpty) {
+  std::string out_str1;
+  url_canon::StdStringCanonOutput output1(&out_str1);
+  url_canon::CanonHostInfo host_info;
+
+  // This tests tests.
+  const char spec[] = "192.168.0.1";
+  url_canon::CanonicalizeIPAddress(spec, url_parse::Component(),
+                                   &output1, &host_info);
+  EXPECT_FALSE(host_info.IsIPAddress());
+
+  url_canon::CanonicalizeIPAddress(spec, url_parse::Component(0, 0),
+                                   &output1, &host_info);
+  EXPECT_FALSE(host_info.IsIPAddress());
+}
+
 TEST(URLCanonTest, UserInfo) {
   // Note that the canonicalizer should escape and treat empty components as
   // not being there.
@@ -950,8 +966,8 @@ TEST(URLCanonTest, Path) {
       // %7f should be allowed and %3D should not be unescaped (these were wrong
       // in a previous version).
     {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", "/%7Ffp3%3Eju%3Dduvgw%3Dd", url_parse::Component(0, 24), true},
-      // @ should be unescaped.
-    {"/@asdf%40", L"/@asdf%40", "/@asdf@", url_parse::Component(0, 7), true},
+      // @ should be passed through unchanged (escaped or unescaped).
+    {"/@asdf%40", L"/@asdf%40", "/@asdf%40", url_parse::Component(0, 9), true},
 
     // ----- encoding tests -----
       // Basic conversions
@@ -1736,8 +1752,11 @@ TEST(URLCanonTest, ResolveRelativeURL) {
       // Basic absolute input.
     {"http://host/a", true, false, "http://another/", true, false, false, NULL},
     {"http://host/a", true, false, "http:////another/", true, false, false, NULL},
-      // Empty relative URLs shouldn't change the input.
+      // Empty relative URLs should only remove the ref part of the URL,
+      // leaving the rest unchanged.
     {"http://foo/bar", true, false, "", true, true, true, "http://foo/bar"},
+    {"http://foo/bar#ref", true, false, "", true, true, true, "http://foo/bar"},
+    {"http://foo/bar#", true, false, "", true, true, true, "http://foo/bar"},
       // Spaces at the ends of the relative path should be ignored.
     {"http://foo/bar", true, false, "  another  ", true, true, true, "http://foo/another"},
     {"http://foo/bar", true, false, "  .  ", true, true, true, "http://foo/"},
diff --git a/googleurl/src/url_common.h b/googleurl/src/url_common.h
new file mode 100644
index 0000000..7e7e27a
--- /dev/null
+++ b/googleurl/src/url_common.h
@@ -0,0 +1,48 @@
+// Copyright 2010, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef GOOGLEURL_SRC_URL_COMMON_H__
+#define GOOGLEURL_SRC_URL_COMMON_H__
+
+#if !defined(GURL_IMPLEMENTATION)
+#define GURL_IMPLEMENTATION 0
+#endif
+
+#if defined(WIN32) && defined(GURL_DLL)
+#if GURL_IMPLEMENTATION
+#define GURL_API __declspec(dllexport)
+#else
+#define GURL_API __declspec(dllimport)
+#endif
+#else
+#define GURL_API
+#endif
+
+#endif  // GOOGLEURL_SRC_URL_COMMON_H__
+
diff --git a/googleurl/src/url_parse.cc b/googleurl/src/url_parse.cc
index 7c37f13..a08c4da 100644
--- a/googleurl/src/url_parse.cc
+++ b/googleurl/src/url_parse.cc
@@ -64,54 +64,6 @@ int FindNextAuthorityTerminator(const CHAR* spec,
   return spec_len;  // Not found.
 }
 
-// Fills in all members of the Parsed structure except for the scheme.
-//
-// |spec| is the full spec being parsed, of length |spec_len|.
-// |after_scheme| is the character immediately following the scheme (after the
-//   colon) where we'll begin parsing.
-//
-// Compatability data points. I list "host", "path" extracted:
-// Input                IE6             Firefox                Us
-// -----                --------------  --------------         --------------
-// http://foo.com/      "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
-// http:foo.com/        "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
-// http:/foo.com/       fail(*)         "foo.com", "/"         "foo.com", "/"
-// http:\foo.com/       fail(*)         "\foo.com", "/"(fail)  "foo.com", "/"
-// http:////foo.com/    "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
-//
-// (*) Interestingly, although IE fails to load these URLs, its history
-// canonicalizer handles them, meaning if you've been to the corresponding
-// "http://foo.com/" link, it will be colored.
-template <typename CHAR>
-void DoParseAfterScheme(const CHAR* spec,
-                        int spec_len,
-                        int after_scheme,
-                        Parsed* parsed) {
-  int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
-  int after_slashes = after_scheme + num_slashes;
-
-  // First split into two main parts, the authority (username, password, host,
-  // and port) and the full path (path, query, and reference).
-  Component authority;
-  Component full_path;
-
-  // Found "//<some data>", looks like an authority section. Treat everything
-  // from there to the next slash (or end of spec) to be the authority. Note
-  // that we ignore the number of slashes and treat it as the authority.
-  int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len);
-  authority = Component(after_slashes, end_auth - after_slashes);
-
-  if (end_auth == spec_len)  // No beginning of path found.
-    full_path = Component();
-  else  // Everything starting from the slash to the end is the path.
-    full_path = Component(end_auth, spec_len - end_auth);
-
-  // Now parse those two sub-parts.
-  DoParseAuthority(spec, authority, &parsed->username, &parsed->password,
-                   &parsed->host, &parsed->port);
-  ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref);
-}
-
 template<typename CHAR>
 void ParseUserInfo(const CHAR* spec,
                    const Component& user,
@@ -310,6 +262,54 @@ bool DoExtractScheme(const CHAR* url,
   return false;  // No colon found: no scheme
 }
 
+// Fills in all members of the Parsed structure except for the scheme.
+//
+// |spec| is the full spec being parsed, of length |spec_len|.
+// |after_scheme| is the character immediately following the scheme (after the
+//   colon) where we'll begin parsing.
+//
+// Compatability data points. I list "host", "path" extracted:
+// Input                IE6             Firefox                Us
+// -----                --------------  --------------         --------------
+// http://foo.com/      "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
+// http:foo.com/        "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
+// http:/foo.com/       fail(*)         "foo.com", "/"         "foo.com", "/"
+// http:\foo.com/       fail(*)         "\foo.com", "/"(fail)  "foo.com", "/"
+// http:////foo.com/    "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
+//
+// (*) Interestingly, although IE fails to load these URLs, its history
+// canonicalizer handles them, meaning if you've been to the corresponding
+// "http://foo.com/" link, it will be colored.
+template <typename CHAR>
+void DoParseAfterScheme(const CHAR* spec,
+                        int spec_len,
+                        int after_scheme,
+                        Parsed* parsed) {
+  int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
+  int after_slashes = after_scheme + num_slashes;
+
+  // First split into two main parts, the authority (username, password, host,
+  // and port) and the full path (path, query, and reference).
+  Component authority;
+  Component full_path;
+
+  // Found "//<some data>", looks like an authority section. Treat everything
+  // from there to the next slash (or end of spec) to be the authority. Note
+  // that we ignore the number of slashes and treat it as the authority.
+  int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len);
+  authority = Component(after_slashes, end_auth - after_slashes);
+
+  if (end_auth == spec_len)  // No beginning of path found.
+    full_path = Component();
+  else  // Everything starting from the slash to the end is the path.
+    full_path = Component(end_auth, spec_len - end_auth);
+
+  // Now parse those two sub-parts.
+  DoParseAuthority(spec, authority, &parsed->username, &parsed->password,
+                   &parsed->host, &parsed->port);
+  ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref);
+}
+
 // The main parsing function for standard URLs. Standard URLs have a scheme,
 // host, path, etc.
 template<typename CHAR>
@@ -683,7 +683,7 @@ void ParseAuthority(const char* spec,
   DoParseAuthority(spec, auth, username, password, hostname, port_num);
 }
 
-void ParseAuthority(char16* spec,
+void ParseAuthority(const char16* spec,
                     const Component& auth,
                     Component* username,
                     Component* password,
diff --git a/googleurl/src/url_parse.h b/googleurl/src/url_parse.h
index bea2766..134b445 100644
--- a/googleurl/src/url_parse.h
+++ b/googleurl/src/url_parse.h
@@ -34,6 +34,7 @@
 
 #include "base/basictypes.h"
 #include "base/string16.h"
+#include "googleurl/src/url_common.h"
 
 namespace url_parse {
 
@@ -127,7 +128,7 @@ struct Parsed {
   // of the string. For example "http://": the parsed structure will only
   // contain an entry for the four-character scheme, and it doesn't know about
   // the "://". For all other last-components, it will return the real length.
-  int Length() const;
+  GURL_API int Length() const;
 
   // Returns the number of characters before the given component if it exists,
   // or where the component would be if it did exist. This will return the
@@ -155,7 +156,8 @@ struct Parsed {
   //      *QUERY: 14                   15 <-
   //        *REF: 20                   20
   //
-  int CountCharactersBefore(ComponentType type, bool include_delimiter) const;
+  GURL_API int CountCharactersBefore(ComponentType type,
+                                     bool include_delimiter) const;
 
   // Scheme without the colon: "http://foo"/ would have a scheme of "http".
   // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there
@@ -215,24 +217,24 @@ struct Parsed {
 // StandardURL is for when the scheme is known to be one that has an
 // authority (host) like "http". This function will not handle weird ones
 // like "about:" and "javascript:", or do the right thing for "file:" URLs.
-void ParseStandardURL(const char* url, int url_len, Parsed* parsed);
-void ParseStandardURL(const char16* url, int url_len, Parsed* parsed);
+GURL_API void ParseStandardURL(const char* url, int url_len, Parsed* parsed);
+GURL_API void ParseStandardURL(const char16* url, int url_len, Parsed* parsed);
 
 // PathURL is for when the scheme is known not to have an authority (host)
 // section but that aren't file URLs either. The scheme is parsed, and
 // everything after the scheme is considered as the path. This is used for
 // things like "about:" and "javascript:"
-void ParsePathURL(const char* url, int url_len, Parsed* parsed);
-void ParsePathURL(const char16* url, int url_len, Parsed* parsed);
+GURL_API void ParsePathURL(const char* url, int url_len, Parsed* parsed);
+GURL_API void ParsePathURL(const char16* url, int url_len, Parsed* parsed);
 
 // FileURL is for file URLs. There are some special rules for interpreting
 // these.
-void ParseFileURL(const char* url, int url_len, Parsed* parsed);
-void ParseFileURL(const char16* url, int url_len, Parsed* parsed);
+GURL_API void ParseFileURL(const char* url, int url_len, Parsed* parsed);
+GURL_API void ParseFileURL(const char16* url, int url_len, Parsed* parsed);
 
 // MailtoURL is for mailto: urls. They are made up scheme,path,query
-void ParseMailtoURL(const char* url, int url_len, Parsed* parsed);
-void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed);
+GURL_API void ParseMailtoURL(const char* url, int url_len, Parsed* parsed);
+GURL_API void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed);
 
 // Helper functions -----------------------------------------------------------
 
@@ -256,27 +258,27 @@ void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed);
 // end of the string).
 //
 // The 8-bit version requires UTF-8 encoding.
-bool ExtractScheme(const char* url, int url_len, Component* scheme);
-bool ExtractScheme(const char16* url, int url_len, Component* scheme);
+GURL_API bool ExtractScheme(const char* url, int url_len, Component* scheme);
+GURL_API bool ExtractScheme(const char16* url, int url_len, Component* scheme);
 
 // Returns true if ch is a character that terminates the authority segment
 // of a URL.
-bool IsAuthorityTerminator(char16 ch);
+GURL_API bool IsAuthorityTerminator(char16 ch);
 
 // Does a best effort parse of input |spec|, in range |auth|. If a particular
 // component is not found, it will be set to invalid.
-void ParseAuthority(const char* spec,
-                    const Component& auth,
-                    Component* username,
-                    Component* password,
-                    Component* hostname,
-                    Component* port_num);
-void ParseAuthority(char16* spec,
-                    const Component& auth,
-                    Component* username,
-                    Component* password,
-                    Component* hostname,
-                    Component* port_num);
+GURL_API void ParseAuthority(const char* spec,
+                             const Component& auth,
+                             Component* username,
+                             Component* password,
+                             Component* hostname,
+                             Component* port_num);
+GURL_API void ParseAuthority(const char16* spec,
+                             const Component& auth,
+                             Component* username,
+                             Component* password,
+                             Component* hostname,
+                             Component* port_num);
 
 // Computes the integer port value from the given port component. The port
 // component should have been identified by one of the init functions on
@@ -285,8 +287,8 @@ void ParseAuthority(char16* spec,
 // The return value will be a positive integer between 0 and 64K, or one of
 // the two special values below.
 enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 };
-int ParsePort(const char* url, const Component& port);
-int ParsePort(const char16* url, const Component& port);
+GURL_API int ParsePort(const char* url, const Component& port);
+GURL_API int ParsePort(const char16* url, const Component& port);
 
 // Extracts the range of the file name in the given url. The path must
 // already have been computed by the parse function, and the matching URL
@@ -298,12 +300,12 @@ int ParsePort(const char16* url, const Component& port);
 // following the last slash.
 //
 // The 8-bit version requires UTF-8 encoding.
-void ExtractFileName(const char* url,
-                     const Component& path,
-                     Component* file_name);
-void ExtractFileName(const char16* url,
-                     const Component& path,
-                     Component* file_name);
+GURL_API void ExtractFileName(const char* url,
+                              const Component& path,
+                              Component* file_name);
+GURL_API void ExtractFileName(const char16* url,
+                              const Component& path,
+                              Component* file_name);
 
 // Extract the first key/value from the range defined by |*query|. Updates
 // |*query| to start at the end of the extracted key/value pair. This is
@@ -320,14 +322,14 @@ void ExtractFileName(const char16* url,
 //
 // If no key/value are found |*key| and |*value| will be unchanged and it will
 // return false.
-bool ExtractQueryKeyValue(const char* url,
-                          Component* query,
-                          Component* key,
-                          Component* value);
-bool ExtractQueryKeyValue(const char16* url,
-                          Component* query,
-                          Component* key,
-                          Component* value);
+GURL_API bool ExtractQueryKeyValue(const char* url,
+                                   Component* query,
+                                   Component* key,
+                                   Component* value);
+GURL_API bool ExtractQueryKeyValue(const char16* url,
+                                   Component* query,
+                                   Component* key,
+                                   Component* value);
 
 }  // namespace url_parse
 
diff --git a/googleurl/src/url_util.cc b/googleurl/src/url_util.cc
index d623b45..7e100aa 100644
--- a/googleurl/src/url_util.cc
+++ b/googleurl/src/url_util.cc
@@ -33,6 +33,7 @@
 #include "googleurl/src/url_util.h"
 
 #include "base/logging.h"
+#include "googleurl/src/url_canon_internal.h"
 #include "googleurl/src/url_file.h"
 
 namespace url_util {
@@ -58,13 +59,15 @@ inline bool DoLowerCaseEqualsASCII(Iter a_begin, Iter a_end, const char* b) {
 const char kFileScheme[] = "file";  // Used in a number of places.
 const char kMailtoScheme[] = "mailto";
 
-const int kNumStandardURLSchemes = 5;
+const int kNumStandardURLSchemes = 7;
 const char* kStandardURLSchemes[kNumStandardURLSchemes] = {
   "http",
   "https",
   kFileScheme,  // Yes, file urls can have a hostname!
   "ftp",
   "gopher",
+  "ws",  // WebSocket.
+  "wss",  // WebSocket secure.
 };
 
 // List of the currently installed standard schemes. This list is lazily
@@ -72,6 +75,9 @@ const char* kStandardURLSchemes[kNumStandardURLSchemes] = {
 // any destructors from being called that will slow us down or cause problems.
 std::vector<const char*>* standard_schemes = NULL;
 
+// See the LockStandardSchemes declaration in the header.
+bool standard_schemes_locked = false;
+
 // Ensures that the standard_schemes list is initialized, does nothing if it
 // already has values.
 void InitStandardSchemes() {
@@ -96,10 +102,9 @@ inline bool CompareSchemeComponent(const CHAR* spec,
 }
 
 // Returns true if the given scheme identified by |scheme| within |spec| is one
-// of the registered "standard" schemes. Note that this does not check for
-// "://", use IsStandard for that.
+// of the registered "standard" schemes.
 template<typename CHAR>
-bool IsStandardScheme(const CHAR* spec, const url_parse::Component& scheme) {
+bool DoIsStandard(const CHAR* spec, const url_parse::Component& scheme) {
   if (!scheme.is_nonempty())
     return false;  // Empty or invalid schemes are non-standard.
 
@@ -112,34 +117,20 @@ bool IsStandardScheme(const CHAR* spec, const url_parse::Component& scheme) {
   return false;
 }
 
-// Returns true if the stuff following the scheme in the given spec indicates
-// a "standard" URL. The presence of "://" after the scheme indicates that
-// there is a hostname, etc. which we call a standard URL.
-template<typename CHAR>
-bool HasStandardSchemeSeparator(const CHAR* spec, int spec_len,
-                                const url_parse::Component& scheme) {
-  int after_scheme = scheme.end();
-  if (spec_len < after_scheme + 3)
-    return false;
-  return spec[after_scheme] == ':' &&
-         spec[after_scheme + 1] == '/' &&
-         spec[after_scheme + 2] == '/';
-}
-
-template<typename CHAR>
-bool DoIsStandard(const CHAR* spec, int spec_len,
-                  const url_parse::Component& scheme) {
-  return HasStandardSchemeSeparator(spec, spec_len, scheme) ||
-         IsStandardScheme(spec, scheme);
-}
-
 template<typename CHAR>
 bool DoFindAndCompareScheme(const CHAR* str,
                             int str_len,
                             const char* compare,
                             url_parse::Component* found_scheme) {
+  // Before extracting scheme, canonicalize the URL to remove any whitespace.
+  // This matches the canonicalization done in DoCanonicalize function.
+  url_canon::RawCanonOutputT<CHAR> whitespace_buffer;
+  int spec_len;
+  const CHAR* spec = RemoveURLWhitespace(str, str_len,
+                                         &whitespace_buffer, &spec_len);
+
   url_parse::Component our_scheme;
-  if (!url_parse::ExtractScheme(str, str_len, &our_scheme)) {
+  if (!url_parse::ExtractScheme(spec, spec_len, &our_scheme)) {
     // No scheme.
     if (found_scheme)
       *found_scheme = url_parse::Component();
@@ -147,7 +138,7 @@ bool DoFindAndCompareScheme(const CHAR* str,
   }
   if (found_scheme)
     *found_scheme = our_scheme;
-  return CompareSchemeComponent(str, our_scheme, compare);
+  return CompareSchemeComponent(spec, our_scheme, compare);
 }
 
 template<typename CHAR>
@@ -184,7 +175,7 @@ bool DoCanonicalize(const CHAR* in_spec, int in_spec_len,
 #endif
 
   url_parse::Component scheme;
-  if(!url_parse::ExtractScheme(spec, spec_len, &scheme))
+  if (!url_parse::ExtractScheme(spec, spec_len, &scheme))
     return false;
 
   // This is the parsed version of the input URL, we have to canonicalize it
@@ -197,7 +188,7 @@ bool DoCanonicalize(const CHAR* in_spec, int in_spec_len,
                                              charset_converter,
                                              output, output_parsed);
 
-  } else if (IsStandard(spec, spec_len, scheme)) {
+  } else if (DoIsStandard(spec, scheme)) {
     // All "normal" URLs.
     url_parse::ParseStandardURL(spec, spec_len, &parsed_input);
     success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input,
@@ -239,7 +230,7 @@ bool DoResolveRelative(const char* base_spec,
   // See if our base URL should be treated as "standard".
   bool standard_base_scheme =
       base_parsed.scheme.is_nonempty() &&
-      IsStandard(base_spec, base_spec_len, base_parsed.scheme);
+      DoIsStandard(base_spec, base_parsed.scheme);
 
   bool is_relative;
   url_parse::Component relative_component;
@@ -275,53 +266,111 @@ bool DoReplaceComponents(const char* spec,
                          url_canon::CharsetConverter* charset_converter,
                          url_canon::CanonOutput* output,
                          url_parse::Parsed* out_parsed) {
-  // Note that we dispatch to the parser according the the scheme type of
-  // the OUTPUT URL. Normally, this is the same as our scheme, but if the
-  // scheme is being overridden, we need to test that.
-
-  if (// Either the scheme is not replaced and the old one is a file,
-      (!replacements.IsSchemeOverridden() &&
-       CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) ||
-      // ...or it is being replaced and the new one is a file.
-      (replacements.IsSchemeOverridden() &&
-       CompareSchemeComponent(replacements.sources().scheme,
-                              replacements.components().scheme,
-                              kFileScheme))) {
+  // If the scheme is overridden, just do a simple string substitution and
+  // reparse the whole thing. There are lots of edge cases that we really don't
+  // want to deal with. Like what happens if I replace "http://e:8080/foo"
+  // with a file. Does it become "file:///E:/8080/foo" where the port number
+  // becomes part of the path? Parsing that string as a file URL says "yes"
+  // but almost no sane rule for dealing with the components individually would
+  // come up with that.
+  //
+  // Why allow these crazy cases at all? Programatically, there is almost no
+  // case for replacing the scheme. The most common case for hitting this is
+  // in JS when building up a URL using the location object. In this case, the
+  // JS code expects the string substitution behavior:
+  //   http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3
+  if (replacements.IsSchemeOverridden()) {
+    // Canonicalize the new scheme so it is 8-bit and can be concatenated with
+    // the existing spec.
+    url_canon::RawCanonOutput<128> scheme_replaced;
+    url_parse::Component scheme_replaced_parsed;
+    url_canon::CanonicalizeScheme(
+        replacements.sources().scheme,
+        replacements.components().scheme,
+        &scheme_replaced, &scheme_replaced_parsed);
+
+    // We can assume that the input is canonicalized, which means it always has
+    // a colon after the scheme (or where the scheme would be).
+    int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1
+                                                    : 1;
+    if (spec_len - spec_after_colon > 0) {
+      scheme_replaced.Append(&spec[spec_after_colon],
+                             spec_len - spec_after_colon);
+    }
+
+    // We now need to completely re-parse the resulting string since its meaning
+    // may have changed with the different scheme.
+    url_canon::RawCanonOutput<128> recanonicalized;
+    url_parse::Parsed recanonicalized_parsed;
+    DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(),
+                   charset_converter,
+                   &recanonicalized, &recanonicalized_parsed);
+
+    // Recurse using the version with the scheme already replaced. This will now
+    // use the replacement rules for the new scheme.
+    //
+    // Warning: this code assumes that ReplaceComponents will re-check all
+    // components for validity. This is because we can't fail if DoCanonicalize
+    // failed above since theoretically the thing making it fail could be
+    // getting replaced here. If ReplaceComponents didn't re-check everything,
+    // we wouldn't know if something *not* getting replaced is a problem.
+    // If the scheme-specific replacers are made more intelligent so they don't
+    // re-check everything, we should instead recanonicalize the whole thing
+    // after this call to check validity (this assumes replacing the scheme is
+    // much much less common than other types of replacements, like clearing the
+    // ref).
+    url_canon::Replacements<CHAR> replacements_no_scheme = replacements;
+    replacements_no_scheme.SetScheme(NULL, url_parse::Component());
+    return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(),
+                               recanonicalized_parsed, replacements_no_scheme,
+                               charset_converter, output, out_parsed);
+  }
+
+  // If we get here, then we know the scheme doesn't need to be replaced, so can
+  // just key off the scheme in the spec to know how to do the replacements.
+  if (CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) {
     return url_canon::ReplaceFileURL(spec, parsed, replacements,
                                      charset_converter, output, out_parsed);
   }
-
-  if (// Either the scheme is not replaced and the old one is standard,
-      (!replacements.IsSchemeOverridden() &&
-       IsStandard(spec, spec_len, parsed.scheme)) ||
-      // ...or it is being replaced and the new one is standard.
-      (replacements.IsSchemeOverridden() &&
-       IsStandardScheme(replacements.sources().scheme,
-                        replacements.components().scheme))) {
-    // Standard URL with all parts.
+  if (DoIsStandard(spec, parsed.scheme)) {
     return url_canon::ReplaceStandardURL(spec, parsed, replacements,
                                          charset_converter, output, out_parsed);
   }
-
-  if (// Either the scheme is not replaced and the old one is mailto,
-      (!replacements.IsSchemeOverridden() &&
-       CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) ||
-      // ...or it is being replaced and the new one is a mailto.
-      (replacements.IsSchemeOverridden() &&
-       CompareSchemeComponent(replacements.sources().scheme,
-                              replacements.components().scheme,
-                              kMailtoScheme))) {
+  if (CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) {
      return url_canon::ReplaceMailtoURL(spec, parsed, replacements,
                                         output, out_parsed);
   }
 
+  // Default is a path URL.
   return url_canon::ReplacePathURL(spec, parsed, replacements,
                                    output, out_parsed);
 }
 
 }  // namespace
 
+void Initialize() {
+  InitStandardSchemes();
+}
+
+void Shutdown() {
+  if (standard_schemes) {
+    delete standard_schemes;
+    standard_schemes = NULL;
+  }
+}
+
 void AddStandardScheme(const char* new_scheme) {
+  // If this assert triggers, it means you've called AddStandardScheme after
+  // LockStandardSchemes have been called (see the header file for
+  // LockStandardSchemes for more).
+  //
+  // This normally means you're trying to set up a new standard scheme too late
+  // in your application's init process. Locate where your app does this
+  // initialization and calls LockStandardScheme, and add your new standard
+  // scheme there.
+  DCHECK(!standard_schemes_locked) <<
+      "Trying to add a standard scheme after the list has been locked.";
+
   size_t scheme_len = strlen(new_scheme);
   if (scheme_len == 0)
     return;
@@ -335,14 +384,16 @@ void AddStandardScheme(const char* new_scheme) {
   standard_schemes->push_back(dup_scheme);
 }
 
-bool IsStandard(const char* spec, int spec_len,
-                const url_parse::Component& scheme) {
-  return DoIsStandard(spec, spec_len, scheme);
+void LockStandardSchemes() {
+  standard_schemes_locked = true;
+}
+
+bool IsStandard(const char* spec, const url_parse::Component& scheme) {
+  return DoIsStandard(spec, scheme);
 }
 
-bool IsStandard(const char16* spec, int spec_len,
-                const url_parse::Component& scheme) {
-  return DoIsStandard(spec, spec_len, scheme);
+bool IsStandard(const char16* spec, const url_parse::Component& scheme) {
+  return DoIsStandard(spec, scheme);
 }
 
 bool FindAndCompareScheme(const char* str,
@@ -450,4 +501,53 @@ bool LowerCaseEqualsASCII(const char16* a_begin,
   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 }
 
+void DecodeURLEscapeSequences(const char* input, int length,
+                              url_canon::CanonOutputW* output) {
+  url_canon::RawCanonOutputT<char> unescaped_chars;
+  for (int i = 0; i < length; i++) {
+    if (input[i] == '%') {
+      unsigned char ch;
+      if (url_canon::DecodeEscaped(input, &i, length, &ch)) {
+        unescaped_chars.push_back(ch);
+      } else {
+        // Invalid escape sequence, copy the percent literal.
+        unescaped_chars.push_back('%');
+      }
+    } else {
+      // Regular non-escaped 8-bit character.
+      unescaped_chars.push_back(input[i]);
+    }
+  }
+
+  // Convert that 8-bit to UTF-16. It's not clear IE does this at all to
+  // JavaScript URLs, but Firefox and Safari do.
+  for (int i = 0; i < unescaped_chars.length(); i++) {
+    unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i));
+    if (uch < 0x80) {
+      // Non-UTF-8, just append directly
+      output->push_back(uch);
+    } else {
+      // next_ch will point to the last character of the decoded
+      // character.
+      int next_character = i;
+      unsigned code_point;
+      if (url_canon::ReadUTFChar(unescaped_chars.data(), &next_character,
+                                 unescaped_chars.length(), &code_point)) {
+        // Valid UTF-8 character, convert to UTF-16.
+        url_canon::AppendUTF16Value(code_point, output);
+        i = next_character;
+      } else {
+        // If there are any sequences that are not valid UTF-8, we keep
+        // invalid code points and promote to UTF-16. We copy all characters
+        // from the current position to the end of the identified sequence.
+        while (i < next_character) {
+          output->push_back(static_cast<unsigned char>(unescaped_chars.at(i)));
+          i++;
+        }
+        output->push_back(static_cast<unsigned char>(unescaped_chars.at(i)));
+      }
+    }
+  }
+}
+
 }  // namespace url_util
diff --git a/googleurl/src/url_util.h b/googleurl/src/url_util.h
index 62813a6..ec4cf9e 100644
--- a/googleurl/src/url_util.h
+++ b/googleurl/src/url_util.h
@@ -33,29 +33,69 @@
 #include <string>
 
 #include "base/string16.h"
+#include "googleurl/src/url_common.h"
 #include "googleurl/src/url_parse.h"
 #include "googleurl/src/url_canon.h"
 
 namespace url_util {
 
+// Init ------------------------------------------------------------------------
+
+// Initialization is NOT required, it will be implicitly initialized when first
+// used. However, this implicit initialization is NOT threadsafe. If you are
+// using this library in a threaded environment and don't have a consistent
+// "first call" (an example might be calling "AddStandardScheme" with your
+// special application-specific schemes) then you will want to call initialize
+// before spawning any threads.
+//
+// It is OK to call this function more than once, subsequent calls will simply
+// "noop", unless Shutdown() was called in the mean time. This will also be a
+// "noop" if other calls to the library have forced an initialization
+// beforehand.
+GURL_API void Initialize();
+
+// Cleanup is not required, except some strings may leak. For most user
+// applications, this is fine. If you're using it in a library that may get
+// loaded and unloaded, you'll want to unload to properly clean up your
+// library.
+GURL_API void Shutdown();
+
 // Schemes --------------------------------------------------------------------
 
 // Adds an application-defined scheme to the internal list of "standard" URL
-// schemes.
-void AddStandardScheme(const char* new_scheme);
+// schemes. This function is not threadsafe and can not be called concurrently
+// with any other url_util function. It will assert if the list of standard
+// schemes has been locked (see LockStandardSchemes).
+GURL_API void AddStandardScheme(const char* new_scheme);
+
+// Sets a flag to prevent future calls to AddStandardScheme from succeeding.
+//
+// This is designed to help prevent errors for multithreaded applications.
+// Normal usage would be to call AddStandardScheme for your custom schemes at
+// the beginning of program initialization, and then LockStandardSchemes. This
+// prevents future callers from mistakenly calling AddStandardScheme when the
+// program is running with multiple threads, where such usage would be
+// dangerous.
+//
+// We could have had AddStandardScheme use a lock instead, but that would add
+// some platform-specific dependencies we don't otherwise have now, and is
+// overkill considering the normal usage is so simple.
+GURL_API void LockStandardSchemes();
 
 // Locates the scheme in the given string and places it into |found_scheme|,
 // which may be NULL to indicate the caller does not care about the range.
+//
 // Returns whether the given |compare| scheme matches the scheme found in the
-// input (if any).
-bool FindAndCompareScheme(const char* str,
-                          int str_len,
-                          const char* compare,
-                          url_parse::Component* found_scheme);
-bool FindAndCompareScheme(const char16* str,
-                          int str_len,
-                          const char* compare,
-                          url_parse::Component* found_scheme);
+// input (if any). The |compare| scheme must be a valid canonical scheme or
+// the result of the comparison is undefined.
+GURL_API bool FindAndCompareScheme(const char* str,
+                                   int str_len,
+                                   const char* compare,
+                                   url_parse::Component* found_scheme);
+GURL_API bool FindAndCompareScheme(const char16* str,
+                                   int str_len,
+                                   const char* compare,
+                                   url_parse::Component* found_scheme);
 inline bool FindAndCompareScheme(const std::string& str,
                                  const char* compare,
                                  url_parse::Component* found_scheme) {
@@ -70,12 +110,18 @@ inline bool FindAndCompareScheme(const string16& str,
 }
 
 // Returns true if the given string represents a standard URL. This means that
-// either the scheme is in the list of known standard schemes, or there is a
-// "://" following the scheme.
-bool IsStandard(const char* spec, int spec_len,
-                const url_parse::Component& scheme);
-bool IsStandard(const char16* spec, int spec_len,
-                const url_parse::Component& scheme);
+// either the scheme is in the list of known standard schemes.
+GURL_API bool IsStandard(const char* spec,
+                         const url_parse::Component& scheme);
+GURL_API bool IsStandard(const char16* spec,
+                         const url_parse::Component& scheme);
+
+// TODO(brettw) remove this. This is a temporary compatibility hack to avoid
+// breaking the WebKit build when this version is synced via Chrome.
+inline bool IsStandard(const char* spec, int spec_len,
+                       const url_parse::Component& scheme) {
+  return IsStandard(spec, scheme);
+}
 
 // URL library wrappers -------------------------------------------------------
 
@@ -89,16 +135,16 @@ bool IsStandard(const char16* spec, int spec_len,
 // Returns true if a valid URL was produced, false if not. On failure, the
 // output and parsed structures will still be filled and will be consistent,
 // but they will not represent a loadable URL.
-bool Canonicalize(const char* spec,
-                  int spec_len,
-                  url_canon::CharsetConverter* charset_converter,
-                  url_canon::CanonOutput* output,
-                  url_parse::Parsed* output_parsed);
-bool Canonicalize(const char16* spec,
-                  int spec_len,
-                  url_canon::CharsetConverter* charset_converter,
-                  url_canon::CanonOutput* output,
-                  url_parse::Parsed* output_parsed);
+GURL_API bool Canonicalize(const char* spec,
+                           int spec_len,
+                           url_canon::CharsetConverter* charset_converter,
+                           url_canon::CanonOutput* output,
+                           url_parse::Parsed* output_parsed);
+GURL_API bool Canonicalize(const char16* spec,
+                           int spec_len,
+                           url_canon::CharsetConverter* charset_converter,
+                           url_canon::CanonOutput* output,
+                           url_parse::Parsed* output_parsed);
 
 // Resolves a potentially relative URL relative to the given parsed base URL.
 // The base MUST be valid. The resulting canonical URL and parsed information
@@ -110,41 +156,43 @@ bool Canonicalize(const char16* spec,
 //
 // Returns true if the output is valid, false if the input could not produce
 // a valid URL.
-bool ResolveRelative(const char* base_spec,
-                     int base_spec_len,
-                     const url_parse::Parsed& base_parsed,
-                     const char* relative,
-                     int relative_length,
-                     url_canon::CharsetConverter* charset_converter,
-                     url_canon::CanonOutput* output,
-                     url_parse::Parsed* output_parsed);
-bool ResolveRelative(const char* base_spec,
-                     int base_spec_len,
-                     const url_parse::Parsed& base_parsed,
-                     const char16* relative,
-                     int relative_length,
-                     url_canon::CharsetConverter* charset_converter,
-                     url_canon::CanonOutput* output,
-                     url_parse::Parsed* output_parsed);
+GURL_API bool ResolveRelative(const char* base_spec,
+                              int base_spec_len,
+                              const url_parse::Parsed& base_parsed,
+                              const char* relative,
+                              int relative_length,
+                              url_canon::CharsetConverter* charset_converter,
+                              url_canon::CanonOutput* output,
+                              url_parse::Parsed* output_parsed);
+GURL_API bool ResolveRelative(const char* base_spec,
+                              int base_spec_len,
+                              const url_parse::Parsed& base_parsed,
+                              const char16* relative,
+                              int relative_length,
+                              url_canon::CharsetConverter* charset_converter,
+                              url_canon::CanonOutput* output,
+                              url_parse::Parsed* output_parsed);
 
 // Replaces components in the given VALID input url. The new canonical URL info
 // is written to output and out_parsed.
 //
 // Returns true if the resulting URL is valid.
-bool ReplaceComponents(const char* spec,
-                       int spec_len,
-                       const url_parse::Parsed& parsed,
-                       const url_canon::Replacements<char>& replacements,
-                       url_canon::CharsetConverter* charset_converter,
-                       url_canon::CanonOutput* output,
-                       url_parse::Parsed* out_parsed);
-bool ReplaceComponents(const char* spec,
-                       int spec_len,
-                       const url_parse::Parsed& parsed,
-                       const url_canon::Replacements<char16>& replacements,
-                       url_canon::CharsetConverter* charset_converter,
-                       url_canon::CanonOutput* output,
-                       url_parse::Parsed* out_parsed);
+GURL_API bool ReplaceComponents(
+    const char* spec,
+    int spec_len,
+    const url_parse::Parsed& parsed,
+    const url_canon::Replacements<char>& replacements,
+    url_canon::CharsetConverter* charset_converter,
+    url_canon::CanonOutput* output,
+    url_parse::Parsed* out_parsed);
+GURL_API bool ReplaceComponents(
+    const char* spec,
+    int spec_len,
+    const url_parse::Parsed& parsed,
+    const url_canon::Replacements<char16>& replacements,
+    url_canon::CharsetConverter* charset_converter,
+    url_canon::CanonOutput* output,
+    url_parse::Parsed* out_parsed);
 
 // String helper functions ----------------------------------------------------
 
@@ -154,16 +202,20 @@ bool ReplaceComponents(const char* spec,
 //
 // The versions of this function that don't take a b_end assume that the b
 // string is NULL terminated.
-bool LowerCaseEqualsASCII(const char* a_begin,
-                          const char* a_end,
-                          const char* b);
-bool LowerCaseEqualsASCII(const char* a_begin,
-                          const char* a_end,
-                          const char* b_begin,
-                          const char* b_end);
-bool LowerCaseEqualsASCII(const char16* a_begin,
-                          const char16* a_end,
-                          const char* b);
+GURL_API bool LowerCaseEqualsASCII(const char* a_begin,
+                                   const char* a_end,
+                                   const char* b);
+GURL_API bool LowerCaseEqualsASCII(const char* a_begin,
+                                   const char* a_end,
+                                   const char* b_begin,
+                                   const char* b_end);
+GURL_API bool LowerCaseEqualsASCII(const char16* a_begin,
+                                   const char16* a_end,
+                                   const char* b);
+
+// Unescapes the given string using URL escaping rules.
+GURL_API void DecodeURLEscapeSequences(const char* input, int length,
+                                       url_canon::CanonOutputW* output);
 
 }  // namespace url_util
 
diff --git a/googleurl/src/url_util_unittest.cc b/googleurl/src/url_util_unittest.cc
index 12e5254..442b2ec 100644
--- a/googleurl/src/url_util_unittest.cc
+++ b/googleurl/src/url_util_unittest.cc
@@ -30,6 +30,7 @@
 #include "googleurl/src/url_canon.h"
 #include "googleurl/src/url_canon_stdstring.h"
 #include "googleurl/src/url_parse.h"
+#include "googleurl/src/url_test_utils.h"
 #include "googleurl/src/url_util.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
@@ -64,6 +65,22 @@ TEST(URLUtilTest, FindAndCompareScheme) {
   // But when there is no scheme, it should fail.
   EXPECT_FALSE(url_util::FindAndCompareScheme("", 0, "", &found_scheme));
   EXPECT_TRUE(found_scheme == url_parse::Component());
+
+  // When there is a whitespace char in scheme, it should canonicalize the url
+  // before comparison.
+  const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)";
+  EXPECT_TRUE(url_util::FindAndCompareScheme(
+      whtspc_str, static_cast<int>(strlen(whtspc_str)), "javascript",
+      &found_scheme));
+  EXPECT_TRUE(found_scheme == url_parse::Component(1, 10));
+
+  // Control characters should be stripped out on the ends, and kept in the
+  // middle.
+  const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)";
+  EXPECT_FALSE(url_util::FindAndCompareScheme(
+      ctrl_str, static_cast<int>(strlen(ctrl_str)), "javascript",
+      &found_scheme));
+  EXPECT_TRUE(found_scheme == url_parse::Component(1, 11));
 }
 
 TEST(URLUtilTest, ReplaceComponents) {
@@ -96,3 +113,106 @@ TEST(URLUtilTest, ReplaceComponents) {
                               &new_parsed);
 }
 
+static std::string CheckReplaceScheme(const char* base_url,
+                                      const char* scheme) {
+  // Make sure the input is canonicalized.
+  url_canon::RawCanonOutput<32> original;
+  url_parse::Parsed original_parsed;
+  url_util::Canonicalize(base_url, strlen(base_url), NULL,
+                         &original, &original_parsed);
+
+  url_canon::Replacements<char> replacements;
+  replacements.SetScheme(scheme, url_parse::Component(0, strlen(scheme)));
+
+  std::string output_string;
+  url_canon::StdStringCanonOutput output(&output_string);
+  url_parse::Parsed output_parsed;
+  url_util::ReplaceComponents(original.data(), original.length(),
+                              original_parsed, replacements, NULL,
+                              &output, &output_parsed);
+
+  output.Complete();
+  return output_string;
+}
+
+TEST(URLUtilTest, ReplaceScheme) {
+  EXPECT_EQ("https://google.com/",
+            CheckReplaceScheme("http://google.com/", "https"));
+  EXPECT_EQ("file://google.com/",
+            CheckReplaceScheme("http://google.com/", "file"));
+  EXPECT_EQ("http://home/Build",
+            CheckReplaceScheme("file:///Home/Build", "http"));
+  EXPECT_EQ("javascript:foo",
+            CheckReplaceScheme("about:foo", "javascript"));
+  EXPECT_EQ("://google.com/",
+            CheckReplaceScheme("http://google.com/", ""));
+  EXPECT_EQ("http://google.com/",
+            CheckReplaceScheme("about:google.com", "http"));
+  EXPECT_EQ("http:", CheckReplaceScheme("", "http"));
+
+#ifdef WIN32
+  // Magic Windows drive letter behavior when converting to a file URL.
+  EXPECT_EQ("file:///E:/foo/",
+            CheckReplaceScheme("http://localhost/e:foo/", "file"));
+#endif
+
+  // This will probably change to "about://google.com/" when we fix
+  // http://crbug.com/160 which should also be an acceptable result.
+  EXPECT_EQ("about://google.com/",
+            CheckReplaceScheme("http://google.com/", "about"));
+}
+
+TEST(URLUtilTest, DecodeURLEscapeSequences) {
+  struct DecodeCase {
+    const char* input;
+    const char* output;
+  } decode_cases[] = {
+    {"hello, world", "hello, world"},
+    {"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/",
+     "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"},
+    {"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/",
+     "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"},
+    {"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/",
+     " !\"#$%&'()*+,-.//"},
+    {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/",
+     "0123456789:;<=>?/"},
+    {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/",
+     "@ABCDEFGHIJKLMNO/"},
+    {"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/",
+     "PQRSTUVWXYZ[\\]^_/"},
+    {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/",
+     "`abcdefghijklmno/"},
+    {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/",
+     "pqrstuvwxyz{|}~\x7f/"},
+    // Test un-UTF-8-ization.
+    {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"},
+  };
+
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(decode_cases); i++) {
+    const char* input = decode_cases[i].input;
+    url_canon::RawCanonOutputT<char16> output;
+    url_util::DecodeURLEscapeSequences(input, strlen(input), &output);
+    EXPECT_EQ(decode_cases[i].output,
+              url_test_utils::ConvertUTF16ToUTF8(
+                string16(output.data(), output.length())));
+  }
+
+  // Our decode should decode %00
+  const char zero_input[] = "%00";
+  url_canon::RawCanonOutputT<char16> zero_output;
+  url_util::DecodeURLEscapeSequences(zero_input, strlen(zero_input),
+                                     &zero_output);
+  EXPECT_NE("%00",
+            url_test_utils::ConvertUTF16ToUTF8(
+              string16(zero_output.data(), zero_output.length())));
+
+  // Test the error behavior for invalid UTF-8.
+  const char invalid_input[] = "%e4%a0%e5%a5%bd";
+  const char16 invalid_expected[4] = {0x00e4, 0x00a0, 0x597d, 0};
+  url_canon::RawCanonOutputT<char16> invalid_output;
+  url_util::DecodeURLEscapeSequences(invalid_input, strlen(invalid_input),
+                                     &invalid_output);
+  EXPECT_EQ(string16(invalid_expected),
+            string16(invalid_output.data(), invalid_output.length()));
+}
+
author	Ben Murdoch <benm@google.com>	2010-07-29 17:14:53 +0100
committer	Ben Murdoch <benm@google.com>	2010-08-04 14:29:45 +0100
commit	c407dc5cd9bdc5668497f21b26b09d988ab439de (patch)
tree	7eaf8707c0309516bdb042ad976feedaf72b0bb1 /googleurl
parent	0998b1cdac5733f299c12d88bc31ef9c8035b8fa (diff)
download	external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.zip external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.tar.gz external_chromium-c407dc5cd9bdc5668497f21b26b09d988ab439de.tar.bz2