// Copyright 2007 Google Inc. All Rights Reserved. // Author: brettw@google.com (Brett Wilson) #include "googleurl/src/gurl.h" #include "googleurl/src/url_canon.h" #include "googleurl/src/url_test_utils.h" #include "testing/gtest/include/gtest/gtest.h" // Some implementations of base/basictypes.h may define ARRAYSIZE. // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro // which is in our version of basictypes.h. #ifndef ARRAYSIZE #define ARRAYSIZE ARRAYSIZE_UNSAFE #endif using url_test_utils::WStringToUTF16; using url_test_utils::ConvertUTF8ToUTF16; namespace { template void SetupReplacement(void (url_canon::Replacements::*func)(const CHAR*, const url_parse::Component&), url_canon::Replacements* replacements, const CHAR* str) { if (str) { url_parse::Component comp; if (str[0]) comp.len = static_cast(strlen(str)); (replacements->*func)(str, comp); } } // Returns the canonicalized string for the given URL string for the // GURLTest.Types test. std::string TypesTestCase(const char* src) { GURL gurl(src); return gurl.possibly_invalid_spec(); } } // namespace // Different types of URLs should be handled differently by url_util, and // handed off to different canonicalizers. TEST(GURLTest, Types) { // URLs with unknown schemes should be treated as path URLs, even when they // have things like "://". EXPECT_EQ("something:///HOSTNAME.com/", TypesTestCase("something:///HOSTNAME.com/")); // In the reverse, known schemes should always trigger standard URL handling. EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com")); EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com")); EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com")); EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com")); #ifdef WIN32 // URLs that look like absolute Windows drive specs. EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt")); EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt")); EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt")); EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt")); #endif } // Test the basic creation and querying of components in a GURL. We assume // the parser is already tested and works, so we are mostly interested if the // object does the right thing with the results. TEST(GURLTest, Components) { GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref")); EXPECT_TRUE(url.is_valid()); EXPECT_TRUE(url.SchemeIs("http")); EXPECT_FALSE(url.SchemeIsFile()); // This is the narrow version of the URL, which should match the wide input. EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec()); EXPECT_EQ("http", url.scheme()); EXPECT_EQ("user", url.username()); EXPECT_EQ("pass", url.password()); EXPECT_EQ("google.com", url.host()); EXPECT_EQ("99", url.port()); EXPECT_EQ(99, url.IntPort()); EXPECT_EQ("/foo;bar", url.path()); EXPECT_EQ("q=a", url.query()); EXPECT_EQ("ref", url.ref()); } TEST(GURLTest, Empty) { GURL url; EXPECT_FALSE(url.is_valid()); EXPECT_EQ("", url.spec()); EXPECT_EQ("", url.scheme()); EXPECT_EQ("", url.username()); EXPECT_EQ("", url.password()); EXPECT_EQ("", url.host()); EXPECT_EQ("", url.port()); EXPECT_EQ(url_parse::PORT_UNSPECIFIED, url.IntPort()); EXPECT_EQ("", url.path()); EXPECT_EQ("", url.query()); EXPECT_EQ("", url.ref()); } TEST(GURLTest, Copy) { GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref")); GURL url2(url); EXPECT_TRUE(url2.is_valid()); EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec()); EXPECT_EQ("http", url2.scheme()); EXPECT_EQ("user", url2.username()); EXPECT_EQ("pass", url2.password()); EXPECT_EQ("google.com", url2.host()); EXPECT_EQ("99", url2.port()); EXPECT_EQ(99, url2.IntPort()); EXPECT_EQ("/foo;bar", url2.path()); EXPECT_EQ("q=a", url2.query()); EXPECT_EQ("ref", url2.ref()); // Copying of invalid URL should be invalid GURL invalid; GURL invalid2(invalid); EXPECT_FALSE(invalid2.is_valid()); EXPECT_EQ("", invalid2.spec()); EXPECT_EQ("", invalid2.scheme()); EXPECT_EQ("", invalid2.username()); EXPECT_EQ("", invalid2.password()); EXPECT_EQ("", invalid2.host()); EXPECT_EQ("", invalid2.port()); EXPECT_EQ(url_parse::PORT_UNSPECIFIED, invalid2.IntPort()); EXPECT_EQ("", invalid2.path()); EXPECT_EQ("", invalid2.query()); EXPECT_EQ("", invalid2.ref()); } // Given an invalid URL, we should still get most of the components. TEST(GURLTest, Invalid) { GURL url("http:google.com:foo"); EXPECT_FALSE(url.is_valid()); EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec()); EXPECT_EQ("http", url.scheme()); EXPECT_EQ("", url.username()); EXPECT_EQ("", url.password()); EXPECT_EQ("google.com", url.host()); EXPECT_EQ("foo", url.port()); EXPECT_EQ(url_parse::PORT_INVALID, url.IntPort()); EXPECT_EQ("/", url.path()); EXPECT_EQ("", url.query()); EXPECT_EQ("", url.ref()); } TEST(GURLTest, Resolve) { // The tricky cases for relative URL resolving are tested in the // canonicalizer unit test. Here, we just test that the GURL integration // works properly. struct ResolveCase { const char* base; const char* relative; bool expected_valid; const char* expected; } resolve_cases[] = { {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"}, {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"}, {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"}, {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"}, {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"}, // Unknown schemes are not standard. {"data:blahblah", "http://google.com/", true, "http://google.com/"}, {"data:blahblah", "http:google.com", true, "http://google.com/"}, {"data:/blahblah", "file.html", false, ""}, }; for (size_t i = 0; i < ARRAYSIZE(resolve_cases); i++) { // 8-bit code path. GURL input(resolve_cases[i].base); GURL output = input.Resolve(resolve_cases[i].relative); EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i; EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i; // Wide code path. GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base)); GURL outputw = input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative)); EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i; EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i; } } TEST(GURLTest, GetOrigin) { struct TestCase { const char* input; const char* expected; } cases[] = { {"http://www.google.com", "http://www.google.com/"}, {"javascript:window.alert(\"hello,world\");", ""}, {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/"}, {"http://user@www.google.com", "http://www.google.com/"}, {"http://:pass@www.google.com", "http://www.google.com/"}, {"http://:@www.google.com", "http://www.google.com/"}, }; for (size_t i = 0; i < ARRAYSIZE(cases); i++) { GURL url(cases[i].input); GURL origin = url.GetOrigin(); EXPECT_EQ(cases[i].expected, origin.spec()); } } TEST(GURLTest, GetWithEmptyPath) { struct TestCase { const char* input; const char* expected; } cases[] = { {"http://www.google.com", "http://www.google.com/"}, {"javascript:window.alert(\"hello, world\");", ""}, {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"}, }; for (size_t i = 0; i < ARRAYSIZE(cases); i++) { GURL url(cases[i].input); GURL empty_path = url.GetWithEmptyPath(); EXPECT_EQ(cases[i].expected, empty_path.spec()); } } TEST(GURLTest, Replacements) { // The url canonicalizer replacement test will handle most of these case. // The most important thing to do here is to check that the proper // canonicalizer gets called based on the scheme of the input. struct ReplaceCase { const char* base; const char* scheme; const char* username; const char* password; const char* host; const char* port; const char* path; const char* query; const char* ref; const char* expected; } replace_cases[] = { {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"}, {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"}, {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"}, #ifdef WIN32 {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"}, #endif }; for (size_t i = 0; i < ARRAYSIZE(replace_cases); i++) { const ReplaceCase& cur = replace_cases[i]; GURL url(cur.base); GURL::Replacements repl; SetupReplacement(&GURL::Replacements::SetScheme, &repl, cur.scheme); SetupReplacement(&GURL::Replacements::SetUsername, &repl, cur.username); SetupReplacement(&GURL::Replacements::SetPassword, &repl, cur.password); SetupReplacement(&GURL::Replacements::SetHost, &repl, cur.host); SetupReplacement(&GURL::Replacements::SetPort, &repl, cur.port); SetupReplacement(&GURL::Replacements::SetPath, &repl, cur.path); SetupReplacement(&GURL::Replacements::SetQuery, &repl, cur.query); SetupReplacement(&GURL::Replacements::SetRef, &repl, cur.ref); GURL output = url.ReplaceComponents(repl); EXPECT_EQ(replace_cases[i].expected, output.spec()); } } TEST(GURLTest, PathForRequest) { struct TestCase { const char* input; const char* expected; } cases[] = { {"http://www.google.com", "/"}, {"http://www.google.com/", "/"}, {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22"}, {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html"}, {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query"}, }; for (size_t i = 0; i < ARRAYSIZE(cases); i++) { GURL url(cases[i].input); std::string path_request = url.PathForRequest(); EXPECT_EQ(cases[i].expected, path_request); } } TEST(GURLTest, EffectiveIntPort) { struct PortTest { const char* spec; int expected_int_port; } port_tests[] = { // http {"http://www.google.com/", 80}, {"http://www.google.com:80/", 80}, {"http://www.google.com:443/", 443}, // https {"https://www.google.com/", 443}, {"https://www.google.com:443/", 443}, {"https://www.google.com:80/", 80}, // ftp {"ftp://www.google.com/", 21}, {"ftp://www.google.com:21/", 21}, {"ftp://www.google.com:80/", 80}, // gopher {"gopher://www.google.com/", 70}, {"gopher://www.google.com:70/", 70}, {"gopher://www.google.com:80/", 80}, // file - no port {"file://www.google.com/", url_parse::PORT_UNSPECIFIED}, {"file://www.google.com:443/", url_parse::PORT_UNSPECIFIED}, // data - no port {"data:www.google.com:90", url_parse::PORT_UNSPECIFIED}, {"data:www.google.com", url_parse::PORT_UNSPECIFIED}, }; for (size_t i = 0; i < ARRAYSIZE(port_tests); i++) { GURL url(port_tests[i].spec); EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort()); } } TEST(GURLTest, IPAddress) { struct IPTest { const char* spec; bool expected_ip; } ip_tests[] = { {"http://www.google.com/", false}, {"http://192.168.9.1/", true}, {"http://192.168.9.1.2/", false}, {"http://192.168.m.1/", false}, {"http://2001:db8::1/", false}, {"http://[2001:db8::1]/", true}, {"", false}, {"some random input!", false}, }; for (size_t i = 0; i < ARRAYSIZE(ip_tests); i++) { GURL url(ip_tests[i].spec); EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress()); } } TEST(GURLTest, HostNoBrackets) { struct TestCase { const char* input; const char* expected_host; const char* expected_plainhost; } cases[] = { {"http://www.google.com", "www.google.com", "www.google.com"}, {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"}, {"http://[::]/", "[::]", "::"}, // Don't require a valid URL, but don't crash either. {"http://[]/", "[]", ""}, {"http://[x]/", "[x]", "x"}, {"http://[x/", "[x", "[x"}, {"http://x]/", "x]", "x]"}, {"http://[/", "[", "["}, {"http://]/", "]", "]"}, {"", "", ""}, }; for (size_t i = 0; i < ARRAYSIZE(cases); i++) { GURL url(cases[i].input); EXPECT_EQ(cases[i].expected_host, url.host()); EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets()); } } TEST(GURLTest, DomainIs) { const char google_domain[] = "google.com"; GURL url_1("http://www.google.com:99/foo"); EXPECT_TRUE(url_1.DomainIs(google_domain)); GURL url_2("http://google.com:99/foo"); EXPECT_TRUE(url_2.DomainIs(google_domain)); GURL url_3("http://google.com./foo"); EXPECT_TRUE(url_3.DomainIs(google_domain)); GURL url_4("http://google.com/foo"); EXPECT_FALSE(url_4.DomainIs("google.com.")); GURL url_5("http://google.com./foo"); EXPECT_TRUE(url_5.DomainIs("google.com.")); GURL url_6("http://www.google.com./foo"); EXPECT_TRUE(url_6.DomainIs(".com.")); GURL url_7("http://www.balabala.com/foo"); EXPECT_FALSE(url_7.DomainIs(google_domain)); GURL url_8("http://www.google.com.cn/foo"); EXPECT_FALSE(url_8.DomainIs(google_domain)); GURL url_9("http://www.iamnotgoogle.com/foo"); EXPECT_FALSE(url_9.DomainIs(google_domain)); GURL url_10("http://www.iamnotgoogle.com../foo"); EXPECT_FALSE(url_10.DomainIs(".com")); } // Newlines should be stripped from inputs. TEST(GURLTest, Newlines) { // Constructor. GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n "); EXPECT_EQ("http://www.google.com/asdf", url_1.spec()); // Relative path resolver. GURL url_2 = url_1.Resolve(" \n /fo\to\r "); EXPECT_EQ("http://www.google.com/foo", url_2.spec()); // Note that newlines are NOT stripped from ReplaceComponents. } TEST(GURLTest, IsStandard) { GURL a("http:foo/bar"); EXPECT_TRUE(a.IsStandard()); GURL b("foo:bar/baz"); EXPECT_FALSE(b.IsStandard()); GURL c("foo://bar/baz"); EXPECT_FALSE(c.IsStandard()); }