path: root/googleurl/src/
diff options
Diffstat (limited to 'googleurl/src/')
1 files changed, 433 insertions, 0 deletions
diff --git a/googleurl/src/ b/googleurl/src/
new file mode 100644
index 0000000..4e81de6
--- /dev/null
+++ b/googleurl/src/
@@ -0,0 +1,433 @@
+// Copyright 2007 Google Inc. All Rights Reserved.
+// Author: (Brett Wilson)
+#include "googleurl/src/gurl.h"
+#include "googleurl/src/url_canon.h"
+#include "googleurl/src/url_test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+// Some implementations of base/basictypes.h may define ARRAYSIZE.
+// If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro
+// which is in our version of basictypes.h.
+#ifndef ARRAYSIZE
+using url_test_utils::WStringToUTF16;
+using url_test_utils::ConvertUTF8ToUTF16;
+namespace {
+template<typename CHAR>
+void SetupReplacement(void (url_canon::Replacements<CHAR>::*func)(const CHAR*,
+ const url_parse::Component&),
+ url_canon::Replacements<CHAR>* replacements,
+ const CHAR* str) {
+ if (str) {
+ url_parse::Component comp;
+ if (str[0])
+ comp.len = static_cast<int>(strlen(str));
+ (replacements->*func)(str, comp);
+ }
+} // namespace
+// Different types of URLs should be handled differently by url_util, and
+// handed off to different canonicalizers.
+TEST(GURLTest, Types) {
+ struct TypeTest {
+ const char* src;
+ const char* expected;
+ } type_cases[] = {
+ // URLs with "://" should be treated as standard and have a hostname, even
+ // when the scheme is unknown.
+ {"something:///", "something://"},
+ // In the reverse, lacking a "://" means a path URL so no canonicalization
+ // should happen.
+ {"", ""},
+ {"something:/", "something:/"},
+#ifdef WIN32
+ // URLs that look like absolute Windows drive specs.
+ {"c:\\foo.txt", "file:///C:/foo.txt"},
+ {"Z|foo.txt", "file:///Z:/foo.txt"},
+ {"\\\\server\\foo.txt", "file://server/foo.txt"},
+ {"//server/foo.txt", "file://server/foo.txt"},
+ };
+ for (size_t i = 0; i < ARRAYSIZE(type_cases); i++) {
+ GURL gurl(type_cases[i].src);
+ EXPECT_STREQ(type_cases[i].expected, gurl.spec().c_str());
+ }
+// Test the basic creation and querying of components in a GURL. We assume
+// the parser is already tested and works, so we are mostly interested if the
+// object does the right thing with the results.
+TEST(GURLTest, Components) {
+ GURL url(WStringToUTF16(L";bar?q=a#ref"));
+ EXPECT_TRUE(url.is_valid());
+ EXPECT_TRUE(url.SchemeIs("http"));
+ EXPECT_FALSE(url.SchemeIsFile());
+ // This is the narrow version of the URL, which should match the wide input.
+ EXPECT_EQ(";bar?q=a#ref", url.spec());
+ EXPECT_EQ("http", url.scheme());
+ EXPECT_EQ("user", url.username());
+ EXPECT_EQ("pass", url.password());
+ EXPECT_EQ("",;
+ EXPECT_EQ("99", url.port());
+ EXPECT_EQ(99, url.IntPort());
+ EXPECT_EQ("/foo;bar", url.path());
+ EXPECT_EQ("q=a", url.query());
+ EXPECT_EQ("ref", url.ref());
+TEST(GURLTest, Empty) {
+ GURL url;
+ EXPECT_FALSE(url.is_valid());
+ EXPECT_EQ("", url.spec());
+ EXPECT_EQ("", url.scheme());
+ EXPECT_EQ("", url.username());
+ EXPECT_EQ("", url.password());
+ EXPECT_EQ("",;
+ EXPECT_EQ("", url.port());
+ EXPECT_EQ(url_parse::PORT_UNSPECIFIED, url.IntPort());
+ EXPECT_EQ("", url.path());
+ EXPECT_EQ("", url.query());
+ EXPECT_EQ("", url.ref());
+TEST(GURLTest, Copy) {
+ GURL url(WStringToUTF16(L";bar?q=a#ref"));
+ GURL url2(url);
+ EXPECT_TRUE(url2.is_valid());
+ EXPECT_EQ(";bar?q=a#ref", url2.spec());
+ EXPECT_EQ("http", url2.scheme());
+ EXPECT_EQ("user", url2.username());
+ EXPECT_EQ("pass", url2.password());
+ EXPECT_EQ("",;
+ EXPECT_EQ("99", url2.port());
+ EXPECT_EQ(99, url2.IntPort());
+ EXPECT_EQ("/foo;bar", url2.path());
+ EXPECT_EQ("q=a", url2.query());
+ EXPECT_EQ("ref", url2.ref());
+ // Copying of invalid URL should be invalid
+ GURL invalid;
+ GURL invalid2(invalid);
+ EXPECT_FALSE(invalid2.is_valid());
+ EXPECT_EQ("", invalid2.spec());
+ EXPECT_EQ("", invalid2.scheme());
+ EXPECT_EQ("", invalid2.username());
+ EXPECT_EQ("", invalid2.password());
+ EXPECT_EQ("",;
+ EXPECT_EQ("", invalid2.port());
+ EXPECT_EQ(url_parse::PORT_UNSPECIFIED, invalid2.IntPort());
+ EXPECT_EQ("", invalid2.path());
+ EXPECT_EQ("", invalid2.query());
+ EXPECT_EQ("", invalid2.ref());
+// Given an invalid URL, we should still get most of the components.
+TEST(GURLTest, Invalid) {
+ GURL url("");
+ EXPECT_FALSE(url.is_valid());
+ EXPECT_EQ("", url.possibly_invalid_spec());
+ EXPECT_EQ("http", url.scheme());
+ EXPECT_EQ("", url.username());
+ EXPECT_EQ("", url.password());
+ EXPECT_EQ("",;
+ EXPECT_EQ("foo", url.port());
+ EXPECT_EQ(url_parse::PORT_INVALID, url.IntPort());
+ EXPECT_EQ("/", url.path());
+ EXPECT_EQ("", url.query());
+ EXPECT_EQ("", url.ref());
+TEST(GURLTest, Resolve) {
+ // The tricky cases for relative URL resolving are tested in the
+ // canonicalizer unit test. Here, we just test that the GURL integration
+ // works properly.
+ struct ResolveCase {
+ const char* base;
+ const char* relative;
+ bool expected_valid;
+ const char* expected;
+ } resolve_cases[] = {
+ {"", "foo.html", true, ""},
+ {"", "", true, ""},
+ {"", "../../../hello/./world.html?a#b", true, ""},
+ {"", "#com", true, ""},
+ {"", "", true, ""},
+ // Unknown schemes with a "://" should be treated as standard.
+ {"somescheme://foo/", "bar", true, "somescheme://foo/bar"},
+ // Unknown schemes with no "://" are not standard.
+ {"data:blahblah", "", true, ""},
+ {"data:blahblah", "", true, ""},
+ {"data:/blahblah", "file.html", false, ""},
+ };
+ for (size_t i = 0; i < ARRAYSIZE(resolve_cases); i++) {
+ // 8-bit code path.
+ GURL input(resolve_cases[i].base);
+ GURL output = input.Resolve(resolve_cases[i].relative);
+ EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid());
+ EXPECT_EQ(resolve_cases[i].expected, output.spec());
+ // Wide code path.
+ GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base));
+ GURL outputw =
+ input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative));
+ EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid());
+ EXPECT_EQ(resolve_cases[i].expected, outputw.spec());
+ }
+TEST(GURLTest, GetOrigin) {
+ struct TestCase {
+ const char* input;
+ const char* expected;
+ } cases[] = {
+ {"", ""},
+ {"javascript:window.alert(\"hello,world\");", ""},
+ {"", ""},
+ {"", ""},
+ {"", ""},
+ {"", ""},
+ };
+ for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
+ GURL url(cases[i].input);
+ GURL origin = url.GetOrigin();
+ EXPECT_EQ(cases[i].expected, origin.spec());
+ }
+TEST(GURLTest, GetWithEmptyPath) {
+ struct TestCase {
+ const char* input;
+ const char* expected;
+ } cases[] = {
+ {"", ""},
+ {"javascript:window.alert(\"hello, world\");", ""},
+ {"", ""},
+ };
+ for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
+ GURL url(cases[i].input);
+ GURL empty_path = url.GetWithEmptyPath();
+ EXPECT_EQ(cases[i].expected, empty_path.spec());
+ }
+TEST(GURLTest, Replacements) {
+ // The url canonicalizer replacement test will handle most of these case.
+ // The most important thing to do here is to check that the proper
+ // canonicalizer gets called based on the scheme of the input.
+ struct ReplaceCase {
+ const char* base;
+ const char* scheme;
+ const char* username;
+ const char* password;
+ const char* host;
+ const char* port;
+ const char* path;
+ const char* query;
+ const char* ref;
+ const char* expected;
+ } replace_cases[] = {
+ {"", NULL, NULL, NULL, NULL, NULL, "/", "", "", ""},
+ {"", "javascript", "", "", "", "", "'foo');", "", "", "'foo');"},
+ {"file:///C:/foo/bar.txt", "http", NULL, NULL, "", "99", "/foo","search", "ref", ""},
+#ifdef WIN32
+ {"", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"},
+ };
+ for (size_t i = 0; i < ARRAYSIZE(replace_cases); i++) {
+ const ReplaceCase& cur = replace_cases[i];
+ GURL url(cur.base);
+ GURL::Replacements repl;
+ SetupReplacement(&GURL::Replacements::SetScheme, &repl, cur.scheme);
+ SetupReplacement(&GURL::Replacements::SetUsername, &repl, cur.username);
+ SetupReplacement(&GURL::Replacements::SetPassword, &repl, cur.password);
+ SetupReplacement(&GURL::Replacements::SetHost, &repl,;
+ SetupReplacement(&GURL::Replacements::SetPort, &repl, cur.port);
+ SetupReplacement(&GURL::Replacements::SetPath, &repl, cur.path);
+ SetupReplacement(&GURL::Replacements::SetQuery, &repl, cur.query);
+ SetupReplacement(&GURL::Replacements::SetRef, &repl, cur.ref);
+ GURL output = url.ReplaceComponents(repl);
+ EXPECT_EQ(replace_cases[i].expected, output.spec());
+ }
+TEST(GURLTest, PathForRequest) {
+ struct TestCase {
+ const char* input;
+ const char* expected;
+ } cases[] = {
+ {"", "/"},
+ {"", "/"},
+ {"", "/foo/bar.html?baz=22"},
+ {"", "/foo/bar.html"},
+ {"", "/foo/bar.html?query"},
+ };
+ for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
+ GURL url(cases[i].input);
+ std::string path_request = url.PathForRequest();
+ EXPECT_EQ(cases[i].expected, path_request);
+ }
+TEST(GURLTest, EffectiveIntPort) {
+ struct PortTest {
+ const char* spec;
+ int expected_int_port;
+ } port_tests[] = {
+ // http
+ {"", 80},
+ {"", 80},
+ {"", 443},
+ // https
+ {"", 443},
+ {"", 443},
+ {"", 80},
+ // ftp
+ {"", 21},
+ {"", 21},
+ {"", 80},
+ // gopher
+ {"gopher://", 70},
+ {"gopher://", 70},
+ {"gopher://", 80},
+ // file - no port
+ {"file://", url_parse::PORT_UNSPECIFIED},
+ {"file://", url_parse::PORT_UNSPECIFIED},
+ // data - no port
+ {"", url_parse::PORT_UNSPECIFIED},
+ {"", url_parse::PORT_UNSPECIFIED},
+ };
+ for (size_t i = 0; i < ARRAYSIZE(port_tests); i++) {
+ GURL url(port_tests[i].spec);
+ EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort());
+ }
+TEST(GURLTest, IPAddress) {
+ struct IPTest {
+ const char* spec;
+ bool expected_ip;
+ } ip_tests[] = {
+ {"", false},
+ {"", true},
+ {"", false},
+ {"http://192.168.m.1/", false},
+ {"http://2001:db8::1/", false},
+ {"http://[2001:db8::1]/", true},
+ {"", false},
+ {"some random input!", false},
+ };
+ for (size_t i = 0; i < ARRAYSIZE(ip_tests); i++) {
+ GURL url(ip_tests[i].spec);
+ EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress());
+ }
+TEST(GURLTest, HostNoBrackets) {
+ struct TestCase {
+ const char* input;
+ const char* expected_host;
+ const char* expected_plainhost;
+ } cases[] = {
+ {"", "", ""},
+ {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"},
+ {"http://[::]/", "[::]", "::"},
+ // Don't require a valid URL, but don't crash either.
+ {"http://[]/", "[]", ""},
+ {"http://[x]/", "[x]", "x"},
+ {"http://[x/", "[x", "[x"},
+ {"http://x]/", "x]", "x]"},
+ {"http://[/", "[", "["},
+ {"http://]/", "]", "]"},
+ {"", "", ""},
+ };
+ for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
+ GURL url(cases[i].input);
+ EXPECT_EQ(cases[i].expected_host,;
+ EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets());
+ }
+TEST(GURLTest, DomainIs) {
+ const char google_domain[] = "";
+ GURL url_1("");
+ EXPECT_TRUE(url_1.DomainIs(google_domain));
+ GURL url_2("");
+ EXPECT_TRUE(url_2.DomainIs(google_domain));
+ GURL url_3("");
+ EXPECT_TRUE(url_3.DomainIs(google_domain));
+ GURL url_4("");
+ EXPECT_FALSE(url_4.DomainIs(""));
+ GURL url_5("");
+ EXPECT_TRUE(url_5.DomainIs(""));
+ GURL url_6("");
+ EXPECT_TRUE(url_6.DomainIs(".com."));
+ GURL url_7("");
+ EXPECT_FALSE(url_7.DomainIs(google_domain));
+ GURL url_8("");
+ EXPECT_FALSE(url_8.DomainIs(google_domain));
+ GURL url_9("");
+ EXPECT_FALSE(url_9.DomainIs(google_domain));
+ GURL url_10("");
+ EXPECT_FALSE(url_10.DomainIs(".com"));
+// Newlines should be stripped from inputs.
+TEST(GURLTest, Newlines) {
+ // Constructor.
+ GURL url_1(" \t ht\ntp://\\\ndf \n ");
+ EXPECT_EQ("", url_1.spec());
+ // Relative path resolver.
+ GURL url_2 = url_1.Resolve(" \n /fo\to\r ");
+ EXPECT_EQ("", url_2.spec());
+ // Note that newlines are NOT stripped from ReplaceComponents.
+TEST(GURLTest, IsStandard) {
+ GURL a("http:foo/bar");
+ EXPECT_TRUE(a.IsStandard());
+ GURL b("foo:bar/baz");
+ EXPECT_FALSE(b.IsStandard());
+ GURL c("foo://bar/baz");
+ EXPECT_TRUE(c.IsStandard());