4 files changed, 81 insertions, 29 deletions
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc
index 4b9db4e..65f71c4 100644
--- a/chrome/browser/autocomplete/autocomplete.cc
+++ b/chrome/browser/autocomplete/autocomplete.cc
@@ -181,8 +181,12 @@ AutocompleteInput::Type AutocompleteInput::Parse(
 
   // See if the host is an IP address.
   bool is_ip_address;
-  net::CanonicalizeHost(host, &is_ip_address);
+  const std::string canon_host(net::CanonicalizeHost(host, &is_ip_address));
   if (is_ip_address) {
+    // If the user typed a valid IPv6 address, treat it as a URL.
+    if (canon_host[0] == '[')
+      return URL;
+
     // If the user originally typed a host that looks like an IP address (a
     // dotted quad), they probably want to open it.  If the original input was
     // something else (like a single number), they probably wanted to search for
diff --git a/chrome/browser/autocomplete/autocomplete_unittest.cc b/chrome/browser/autocomplete/autocomplete_unittest.cc
index 6cdd646..e7fdd4c 100644
--- a/chrome/browser/autocomplete/autocomplete_unittest.cc
+++ b/chrome/browser/autocomplete/autocomplete_unittest.cc
@@ -233,8 +233,12 @@ TEST(AutocompleteTest, InputType) {
 #endif  // defined(OS_WIN)
     { L"http://foo.com/", AutocompleteInput::URL },
     { L"127.0.0.1", AutocompleteInput::URL },
+    { L"127.0.1", AutocompleteInput::UNKNOWN },
+    { L"127.0.1/", AutocompleteInput::UNKNOWN },
     { L"browser.tabs.closeButtons", AutocompleteInput::UNKNOWN },
     { L"\u6d4b\u8bd5", AutocompleteInput::UNKNOWN },
+    { L"[2001:]", AutocompleteInput::QUERY },  // Not a valid IP
+    { L"[2001:dB8::1]", AutocompleteInput::URL },
   };
 
   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(input_cases); ++i) {
diff --git a/chrome/browser/net/url_fixer_upper.cc b/chrome/browser/net/url_fixer_upper.cc
index 4162061..6572183 100644
--- a/chrome/browser/net/url_fixer_upper.cc
+++ b/chrome/browser/net/url_fixer_upper.cc
@@ -19,7 +19,8 @@
 #include "net/base/net_util.h"
 #include "net/base/registry_controlled_domain.h"
 
-using namespace std;
+using std::string;
+using std::wstring;
 
 namespace {
 
@@ -311,6 +312,47 @@ static bool HasPort(const std::string& original_text,
   return true;
 }
 
+// Try to extract a valid scheme from the beginning of |text|.
+// If successful, set |scheme_component| to the text range where the scheme
+// was located, and fill |canon_scheme| with its canonicalized form.
+// Otherwise, return false and leave the outputs in an indeterminate state.
+static bool GetValidScheme(const string &text,
+                           url_parse::Component *scheme_component,
+                           string *canon_scheme) {
+  // Locate everything up to (but not including) the first ':'
+  if (!url_parse::ExtractScheme(text.data(), static_cast<int>(text.length()),
+                                scheme_component))
+    return false;
+
+  // Make sure the scheme contains only valid characters, and convert
+  // to lowercase.  This also catches IPv6 literals like [::1], because
+  // brackets are not in the whitelist.
+  url_canon::StdStringCanonOutput canon_scheme_output(canon_scheme);
+  url_parse::Component canon_scheme_component;
+  if (!url_canon::CanonicalizeScheme(text.data(), *scheme_component,
+                                     &canon_scheme_output,
+                                     &canon_scheme_component))
+    return false;
+
+  // Strip the ':', and any trailing buffer space.
+  DCHECK_EQ(0, canon_scheme_component.begin);
+  canon_scheme->erase(canon_scheme_component.len);
+
+  // We need to fix up the segmentation for "www.example.com:/".  For this
+  // case, we guess that schemes with a "." are not actually schemes.
+  if (canon_scheme->find('.') != string::npos)
+    return false;
+
+  // We need to fix up the segmentation for "www:123/".  For this case, we
+  // will add an HTTP scheme later and make the URL parser happy.
+  // TODO(pkasting): Maybe we should try to use GURL's parser for this?
+  if (HasPort(text, *scheme_component))
+    return false;
+
+  // Everything checks out.
+  return true;
+}
+
 string URLFixerUpper::SegmentURL(const string& text,
                                  url_parse::Parsed* parts) {
   // Initialize the result.
@@ -333,37 +375,13 @@ string URLFixerUpper::SegmentURL(const string& text,
 
   // Otherwise, we need to look at things carefully.
   string scheme;
-  if (url_parse::ExtractScheme(text.data(),
-                               static_cast<int>(text.length()),
-                               &parts->scheme)) {
-    // We were able to extract a scheme.  Remember what we have, but we may
-    // decide to change our minds later.
-    scheme.assign(text.substr(parts->scheme.begin, parts->scheme.len));
-
-    if (parts->scheme.is_valid() &&
-        // Valid schemes are ASCII-only.
-        (!IsStringASCII(scheme) ||
-        // We need to fix up the segmentation for "www.example.com:/".  For this
-        // case, we guess that schemes with a "." are not actually schemes.
-        (scheme.find(".") != wstring::npos) ||
-        // We need to fix up the segmentation for "www:123/".  For this case, we
-        // will add an HTTP scheme later and make the URL parser happy.
-        // TODO(pkasting): Maybe we should try to use GURL's parser for this?
-        HasPort(text, parts->scheme)))
-      parts->scheme.reset();
-  }
-
-  // When we couldn't find a scheme in the input, we need to pick one.  Normally
-  // we choose http, but if the URL starts with "ftp.", we match other browsers
-  // and choose ftp.
-  if (!parts->scheme.is_valid()) {
+  if (!GetValidScheme(text, &parts->scheme, &scheme)) {
+    // Couldn't determine the scheme, so just pick one.
+    parts->scheme.reset();
     scheme.assign(StartsWithASCII(text, "ftp.", false) ?
         chrome::kFtpScheme : chrome::kHttpScheme);
   }
 
-  // Cannonicalize the scheme.
-  StringToLowerASCII(&scheme);
-
   // Not segmenting file schemes or nonstandard schemes.
   if ((scheme == chrome::kFileScheme) ||
       !url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()),
diff --git a/chrome/browser/net/url_fixer_upper_unittest.cc b/chrome/browser/net/url_fixer_upper_unittest.cc
index f7b0592..0405935 100644
--- a/chrome/browser/net/url_fixer_upper_unittest.cc
+++ b/chrome/browser/net/url_fixer_upper_unittest.cc
@@ -88,6 +88,26 @@ static const segment_case segment_cases[] = {
     url_parse::Component(48, 3), // query
     url_parse::Component(52, 3), // ref
   },
+  { "[2001:db8::1]/path", "http",
+    url_parse::Component(), // scheme
+    url_parse::Component(), // username
+    url_parse::Component(), // password
+    url_parse::Component(0, 13), // host
+    url_parse::Component(), // port
+    url_parse::Component(13, 5), // path
+    url_parse::Component(), // query
+    url_parse::Component(), // ref
+  },
+  { "[::1]", "http",
+    url_parse::Component(), // scheme
+    url_parse::Component(), // username
+    url_parse::Component(), // password
+    url_parse::Component(0, 5), // host
+    url_parse::Component(), // port
+    url_parse::Component(), // path
+    url_parse::Component(), // query
+    url_parse::Component(), // ref
+  },
 };
 
 TEST(URLFixerUpperTest, SegmentURL) {
@@ -196,6 +216,12 @@ struct fixup_case {
   { "http://google.com/search?q=\xf0\x90\x80\xa0", "",
     "http://google.com/search?q=\xf0\x90\x80\xa0"
   },
+  // URLs containing IPv6 literals.
+  {"[2001:db8::2]", "", "http://[2001:db8::2]/"},
+  {"[::]:80", "", "http://[::]:80/"},
+  {"[::]:80/path", "", "http://[::]:80/path"},
+  // TODO(pmarks): Maybe we should parse bare IPv6 literals someday.
+  {"::1", "", "::1"},
 };
 
 TEST(URLFixerUpperTest, FixupURL) {