diff options
-rw-r--r-- | components/cronet.gypi | 12 | ||||
-rw-r--r-- | components/cronet/android/cronet_jni.cc | 7 | ||||
-rw-r--r-- | net/android/net_jni_registrar.cc | 4 | ||||
-rw-r--r-- | net/base/net_util_icu_unittest.cc | 1071 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 1052 | ||||
-rw-r--r-- | net/net.gyp | 26 | ||||
-rw-r--r-- | net/net.gypi | 1 | ||||
-rw-r--r-- | net/proxy/proxy_script_fetcher_impl_unittest.cc | 4 | ||||
-rw-r--r-- | net/url_request/url_request_unittest.cc | 156 |
9 files changed, 1201 insertions, 1132 deletions
diff --git a/components/cronet.gypi b/components/cronet.gypi index d749c8e..5832c6e 100644 --- a/components/cronet.gypi +++ b/components/cronet.gypi @@ -89,7 +89,17 @@ '-landroid', '-Wl,--gc-sections', '-Wl,--exclude-libs,ALL' - ] + ], + 'conditions': [ + [ 'use_icu_alternatives_on_android == 1', { + 'dependencies!': [ + '../base/base.gyp:base_i18n', + '../third_party/icu/icu.gyp:icui18n', + '../third_party/icu/icu.gyp:icuuc', + ] + }, + ], + ], }, { 'target_name': 'cronet', diff --git a/components/cronet/android/cronet_jni.cc b/components/cronet/android/cronet_jni.cc index 6a370a3..f67425b 100644 --- a/components/cronet/android/cronet_jni.cc +++ b/components/cronet/android/cronet_jni.cc @@ -6,12 +6,15 @@ #include "base/android/jni_android.h" #include "base/android/jni_registrar.h" #include "base/at_exit.h" -#include "base/i18n/icu_util.h" #include "components/cronet/android/org_chromium_net_UrlRequest.h" #include "components/cronet/android/org_chromium_net_UrlRequestContext.h" #include "net/android/net_jni_registrar.h" #include "url/android/url_jni_registrar.h" +#if !defined(USE_ICU_ALTERNATIVES_ON_ANDROID) +#include "base/i18n/icu_util.h" +#endif + namespace { const base::android::RegistrationMethod kCronetRegisteredMethods[] = { @@ -42,7 +45,9 @@ extern "C" jint JNI_OnLoad(JavaVM* vm, void* reserved) { g_at_exit_manager = new base::AtExitManager(); +#if !defined(USE_ICU_ALTERNATIVES_ON_ANDROID) base::i18n::InitializeICU(); +#endif return JNI_VERSION_1_6; } diff --git a/net/android/net_jni_registrar.cc b/net/android/net_jni_registrar.cc index ebc7276..bdebf0e 100644 --- a/net/android/net_jni_registrar.cc +++ b/net/android/net_jni_registrar.cc @@ -15,7 +15,7 @@ #include "net/cert/x509_util_android.h" #include "net/proxy/proxy_config_service_android.h" -#if defined(USE_ICU_ALTERNATIVES) +#if defined(USE_ICU_ALTERNATIVES_ON_ANDROID) #include "net/base/net_string_util_icu_alternatives_android.h" #endif @@ -32,7 +32,7 @@ static base::android::RegistrationMethod kNetRegisteredMethods[] = { net::NetworkChangeNotifierAndroid::Register }, { "ProxyConfigService", net::ProxyConfigServiceAndroid::Register }, { "X509Util", net::RegisterX509Util }, -#if defined(USE_ICU_ALTERNATIVES) +#if defined(USE_ICU_ALTERNATIVES_ON_ANDROID) { "NetStringUtils", net::RegisterNetStringUtils } #endif }; diff --git a/net/base/net_util_icu_unittest.cc b/net/base/net_util_icu_unittest.cc new file mode 100644 index 0000000..9beb434 --- /dev/null +++ b/net/base/net_util_icu_unittest.cc @@ -0,0 +1,1071 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/base/net_util.h" + +#include <string.h> + +#include <vector> + +#include "base/format_macros.h" +#include "base/strings/string_number_conversions.h" +#include "base/strings/utf_string_conversions.h" +#include "base/time/time.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "url/gurl.h" + +using base::ASCIIToUTF16; +using base::WideToUTF16; + +namespace net { + +namespace { + +static const size_t kNpos = base::string16::npos; + +const char* kLanguages[] = { + "", "en", "zh-CN", "ja", "ko", + "he", "ar", "ru", "el", "fr", + "de", "pt", "sv", "th", "hi", + "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en", + "zh,ru,en" +}; + +struct IDNTestCase { + const char* input; + const wchar_t* unicode_output; + const bool unicode_allowed[arraysize(kLanguages)]; +}; + +// TODO(jungshik) This is just a random sample of languages and is far +// from exhaustive. We may have to generate all the combinations +// of languages (powerset of a set of all the languages). +const IDNTestCase idn_cases[] = { + // No IDN + {"www.google.com", L"www.google.com", + {true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true}}, + {"www.google.com.", L"www.google.com.", + {true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true}}, + {".", L".", + {true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true}}, + {"", L"", + {true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true}}, + // IDN + // Hanzi (Traditional Chinese) + {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", + {true, false, true, true, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, true, true, false, + true}}, + // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh) + {"xn--cy2a840a.com", L"\x89c6\x9891.com", + {true, false, true, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + true}}, + // Hanzi + '123' + {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", + {true, false, true, true, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, true, true, false, + true}}, + // Hanzi + Latin : U+56FD is simplified and is regarded + // as not supported in zh-TW. + {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", + {false, false, true, true, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, true, false, + true}}, + // Kanji + Kana (Japanese) + {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", + {true, false, false, true, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, true, false, + false}}, + // Katakana including U+30FC + {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", + {true, false, false, true, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, true, false, + }}, + {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", + {true, false, false, true, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, true, false, + }}, + // Katakana + Latin (Japanese) + // TODO(jungshik): Change 'false' in the first element to 'true' + // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead + // of our IsIDNComponentInSingleScript(). + {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", + {false, false, false, true, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, true, false, + }}, + {"xn--3bkxe.jp", L"\x30c8\x309a.jp", + {false, false, false, true, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, true, false, + }}, + // Hangul (Korean) + {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", + {true, false, false, false, true, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, true, false, + false}}, + // b<u-umlaut>cher (German) + {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", + {true, false, false, false, false, + false, false, false, false, true, + true, false, false, false, false, + true, false, false, false, false, + false}}, + // a with diaeresis + {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", + {true, false, false, false, false, + false, false, false, false, false, + true, false, true, false, false, + true, false, false, false, false, + false}}, + // c-cedilla (French) + {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", + {true, false, false, false, false, + false, false, false, false, true, + false, true, false, false, false, + false, false, false, false, false, + false}}, + // caf'e with acute accent' (French) + {"xn--caf-dma.fr", L"caf\x00e9.fr", + {true, false, false, false, false, + false, false, false, false, true, + false, true, true, false, false, + false, false, false, false, false, + false}}, + // c-cedillla and a with tilde (Portuguese) + {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", + {true, false, false, false, false, + false, false, false, false, false, + false, true, false, false, false, + false, false, false, false, false, + false}}, + // s with caron + {"xn--achy-f6a.com", L"\x0161" L"achy.com", + {true, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // TODO(jungshik) : Add examples with Cyrillic letters + // only used in some languages written in Cyrillic. + // Eutopia (Greek) + {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", + {true, false, false, false, false, + false, false, false, true, false, + false, false, false, false, false, + false, true, false, false, false, + false}}, + // Eutopia + 123 (Greek) + {"xn---123-pldm0haj2bk.gr", + L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", + {true, false, false, false, false, + false, false, false, true, false, + false, false, false, false, false, + false, true, false, false, false, + false}}, + // Cyrillic (Russian) + {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", + {true, false, false, false, false, + false, false, true, false, false, + false, false, false, false, false, + false, false, false, false, true, + true}}, + // Cyrillic + 123 (Russian) + {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", + {true, false, false, false, false, + false, false, true, false, false, + false, false, false, false, false, + false, false, false, false, true, + true}}, + // Arabic + {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", + {true, false, false, false, false, + false, true, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // Hebrew + {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", + {true, false, false, false, false, + true, false, false, false, false, + false, false, false, false, false, + false, false, false, false, true, + false}}, + // Thai + {"xn--12c2cc4ag3b4ccu.th", + L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", + {true, false, false, false, false, + false, false, false, false, false, + false, false, false, true, false, + false, false, false, false, false, + false}}, + // Devangari (Hindi) + {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", + {true, false, false, false, false, + false, false, false, false, false, + false, false, false, false, true, + false, false, false, false, false, + false}}, + // Invalid IDN + {"xn--hello?world.com", NULL, + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // Unsafe IDNs + // "payp<alpha>l.com" + {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // google.gr with Greek omicron and epsilon + {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // google.ru with Cyrillic o + {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // h<e with acute>llo<China in Han>.cn + {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // <Greek rho><Cyrillic a><Cyrillic u>.ru + {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // One that's really long that will force a buffer realloc + {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaa", + L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + L"aaaaaaaa", + {true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true}}, + // Test cases for characters we blacklisted although allowed in IDN. + // Embedded spaces will be turned to %20 in the display. + // TODO(jungshik): We need to have more cases. This is a typical + // data-driven trap. The following test cases need to be separated + // and tested only for a couple of languages. + {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + }}, + {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + }}, + {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + }}, + {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + }}, +#if 0 + // These two cases are special. We need a separate test. + // U+3000 and U+3002 are normalized to ASCII space and dot. + {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", + {false, false, true, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, true, false, false, + true}}, + {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", + {false, false, true, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, true, false, false, + true}}, +#endif +}; + +struct AdjustOffsetCase { + size_t input_offset; + size_t output_offset; +}; + +struct UrlTestData { + const char* description; + const char* input; + const char* languages; + FormatUrlTypes format_types; + UnescapeRule::Type escape_rules; + const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. + size_t prefix_len; +}; + +// A helper for IDN*{Fast,Slow}. +// Append "::<language list>" to |expected| and |actual| to make it +// easy to tell which sub-case fails without debugging. +void AppendLanguagesToOutputs(const char* languages, + base::string16* expected, + base::string16* actual) { + base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages); + expected->append(to_append); + actual->append(to_append); +} + +// A pair of helpers for the FormatUrlWithOffsets() test. +void VerboseExpect(size_t expected, + size_t actual, + const std::string& original_url, + size_t position, + const base::string16& formatted_url) { + EXPECT_EQ(expected, actual) << "Original URL: " << original_url + << " (at char " << position << ")\nFormatted URL: " << formatted_url; +} + +void CheckAdjustedOffsets(const std::string& url_string, + const std::string& languages, + FormatUrlTypes format_types, + UnescapeRule::Type unescape_rules, + const size_t* output_offsets) { + GURL url(url_string); + size_t url_length = url_string.length(); + std::vector<size_t> offsets; + for (size_t i = 0; i <= url_length + 1; ++i) + offsets.push_back(i); + offsets.push_back(500000); // Something larger than any input length. + offsets.push_back(std::string::npos); + base::string16 formatted_url = FormatUrlWithOffsets(url, languages, + format_types, unescape_rules, NULL, NULL, &offsets); + for (size_t i = 0; i < url_length; ++i) + VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); + VerboseExpect(formatted_url.length(), offsets[url_length], url_string, + url_length, formatted_url); + VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, + 500000, formatted_url); + VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, + std::string::npos, formatted_url); +} + +} // anonymous namespace + +TEST(NetUtilTest, IDNToUnicodeFast) { + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) { + for (size_t j = 0; j < arraysize(kLanguages); j++) { + // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow + if (j == 3 || j == 17 || j == 18) + continue; + base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); + base::string16 expected(idn_cases[i].unicode_allowed[j] ? + WideToUTF16(idn_cases[i].unicode_output) : + ASCIIToUTF16(idn_cases[i].input)); + AppendLanguagesToOutputs(kLanguages[j], &expected, &output); + EXPECT_EQ(expected, output); + } + } +} + +TEST(NetUtilTest, IDNToUnicodeSlow) { + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) { + for (size_t j = 0; j < arraysize(kLanguages); j++) { + // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast + if (!(j == 3 || j == 17 || j == 18)) + continue; + base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); + base::string16 expected(idn_cases[i].unicode_allowed[j] ? + WideToUTF16(idn_cases[i].unicode_output) : + ASCIIToUTF16(idn_cases[i].input)); + AppendLanguagesToOutputs(kLanguages[j], &expected, &output); + EXPECT_EQ(expected, output); + } + } +} + +TEST(NetUtilTest, StripWWW) { + EXPECT_EQ(base::string16(), StripWWW(base::string16())); + EXPECT_EQ(base::string16(), StripWWW(ASCIIToUTF16("www."))); + EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah"))); + EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah"))); +} + +// This is currently a windows specific function. +#if defined(OS_WIN) +namespace { + +struct GetDirectoryListingEntryCase { + const wchar_t* name; + const char* raw_bytes; + bool is_dir; + int64 filesize; + base::Time time; + const char* expected; +}; + +} // namespace + +TEST(NetUtilTest, GetDirectoryListingEntry) { + const GetDirectoryListingEntryCase test_cases[] = { + {L"Foo", + "", + false, + 10000, + base::Time(), + "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"}, + {L"quo\"tes", + "", + false, + 10000, + base::Time(), + "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>" + "\n"}, + {L"quo\"tes", + "quo\"tes", + false, + 10000, + base::Time(), + "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>" + "\n"}, + // U+D55C0 U+AE00. raw_bytes is empty (either a local file with + // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8 + {L"\xD55C\xAE00.txt", + "", + false, + 10000, + base::Time(), + "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\"," + "\"%ED%95%9C%EA%B8%80.txt\",0,\"9.8 kB\",\"\");</script>\n"}, + // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence: + // a local or remote file in EUC-KR. + {L"\xD55C\xAE00.txt", + "\xC7\xD1\xB1\xDB.txt", + false, + 10000, + base::Time(), + "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\",\"%C7%D1%B1%DB.txt\"" + ",0,\"9.8 kB\",\"\");</script>\n"}, + }; + + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { + const std::string results = GetDirectoryListingEntry( + WideToUTF16(test_cases[i].name), + test_cases[i].raw_bytes, + test_cases[i].is_dir, + test_cases[i].filesize, + test_cases[i].time); + EXPECT_EQ(test_cases[i].expected, results); + } +} + +#endif + +TEST(NetUtilTest, FormatUrl) { + FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; + const UrlTestData tests[] = { + {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0}, + + {"Simple URL", + "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL, + L"http://www.google.com/", 7}, + + {"With a port number and a reference", + "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, + UnescapeRule::NORMAL, + L"http://www.google.com:8080/#\x30B0", 7}, + + // -------- IDN tests -------- + {"Japanese IDN with ja", + "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type, + UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, + + {"Japanese IDN with en", + "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type, + UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7}, + + {"Japanese IDN without any languages", + "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type, + UnescapeRule::NORMAL, + // Single script is safe for empty languages. + L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, + + {"mailto: with Japanese IDN", + "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type, + UnescapeRule::NORMAL, + // GURL doesn't assume an email address's domain part as a host name. + L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, + + {"file: with Japanese IDN", + "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type, + UnescapeRule::NORMAL, + L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, + + {"ftp: with Japanese IDN", + "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type, + UnescapeRule::NORMAL, + L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, + + // -------- omit_username_password flag tests -------- + {"With username and password, omit_username_password=false", + "http://user:passwd@example.com/foo", "", + kFormatUrlOmitNothing, UnescapeRule::NORMAL, + L"http://user:passwd@example.com/foo", 19}, + + {"With username and password, omit_username_password=true", + "http://user:passwd@example.com/foo", "", default_format_type, + UnescapeRule::NORMAL, L"http://example.com/foo", 7}, + + {"With username and no password", + "http://user@example.com/foo", "", default_format_type, + UnescapeRule::NORMAL, L"http://example.com/foo", 7}, + + {"Just '@' without username and password", + "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL, + L"http://example.com/foo", 7}, + + // GURL doesn't think local-part of an email address is username for URL. + {"mailto:, omit_username_password=true", + "mailto:foo@example.com", "", default_format_type, UnescapeRule::NORMAL, + L"mailto:foo@example.com", 7}, + + // -------- unescape flag tests -------- + {"Do not unescape", + "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" + "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" + "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type, + UnescapeRule::NONE, + // GURL parses %-encoded hostnames into Punycode. + L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" + L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7}, + + {"Unescape normally", + "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" + "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" + "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type, + UnescapeRule::NORMAL, + L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" + L"?q=\x30B0\x30FC\x30B0\x30EB", 7}, + + {"Unescape normally with BiDi control character", + "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", default_format_type, + UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, + + {"Unescape normally including unescape spaces", + "http://www.google.com/search?q=Hello%20World", "en", default_format_type, + UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7}, + + /* + {"unescape=true with some special characters", + "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", + kFormatUrlOmitNothing, UnescapeRule::NORMAL, + L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, + */ + // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". + + // -------- omit http: -------- + {"omit http with user name", + "http://user@example.com/foo", "", kFormatUrlOmitAll, + UnescapeRule::NORMAL, L"example.com/foo", 0}, + + {"omit http", + "http://www.google.com/", "en", kFormatUrlOmitHTTP, + UnescapeRule::NORMAL, L"www.google.com/", + 0}, + + {"omit http with https", + "https://www.google.com/", "en", kFormatUrlOmitHTTP, + UnescapeRule::NORMAL, L"https://www.google.com/", + 8}, + + {"omit http starts with ftp.", + "http://ftp.google.com/", "en", kFormatUrlOmitHTTP, + UnescapeRule::NORMAL, L"http://ftp.google.com/", + 7}, + + // -------- omit trailing slash on bare hostname -------- + {"omit slash when it's the entire path", + "http://www.google.com/", "en", + kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, + L"http://www.google.com", 7}, + {"omit slash when there's a ref", + "http://www.google.com/#ref", "en", + kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, + L"http://www.google.com/#ref", 7}, + {"omit slash when there's a query", + "http://www.google.com/?", "en", + kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, + L"http://www.google.com/?", 7}, + {"omit slash when it's not the entire path", + "http://www.google.com/foo", "en", + kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, + L"http://www.google.com/foo", 7}, + {"omit slash for nonstandard URLs", + "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname, + UnescapeRule::NORMAL, L"data:/", 5}, + {"omit slash for file URLs", + "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname, + UnescapeRule::NORMAL, L"file:///", 7}, + + // -------- view-source: -------- + {"view-source", + "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type, + UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", + 19}, + + {"view-source of view-source", + "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", + default_format_type, UnescapeRule::NORMAL, + L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, + + // view-source should omit http and trailing slash where non-view-source + // would. + {"view-source omit http", + "view-source:http://a.b/c", "en", kFormatUrlOmitAll, + UnescapeRule::NORMAL, L"view-source:a.b/c", + 12}, + {"view-source omit http starts with ftp.", + "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll, + UnescapeRule::NORMAL, L"view-source:http://ftp.b/c", + 19}, + {"view-source omit slash when it's the entire path", + "view-source:http://a.b/", "en", kFormatUrlOmitAll, + UnescapeRule::NORMAL, L"view-source:a.b", + 12}, + }; + + for (size_t i = 0; i < arraysize(tests); ++i) { + size_t prefix_len; + base::string16 formatted = FormatUrl( + GURL(tests[i].input), tests[i].languages, tests[i].format_types, + tests[i].escape_rules, NULL, &prefix_len, NULL); + EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; + EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; + } +} + +TEST(NetUtilTest, FormatUrlParsed) { + // No unescape case. + url::Parsed parsed; + base::string16 formatted = FormatUrl( + GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" + "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), + "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL, + NULL); + EXPECT_EQ(WideToUTF16( + L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" + L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); + EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), + formatted.substr(parsed.username.begin, parsed.username.len)); + EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), + formatted.substr(parsed.password.begin, parsed.password.len)); + EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(WideToUTF16(L"8080"), + formatted.substr(parsed.port.begin, parsed.port.len)); + EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"), + formatted.substr(parsed.path.begin, parsed.path.len)); + EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"), + formatted.substr(parsed.query.begin, parsed.query.len)); + EXPECT_EQ(WideToUTF16(L"\x30B0"), + formatted.substr(parsed.ref.begin, parsed.ref.len)); + + // Unescape case. + formatted = FormatUrl( + GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" + "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), + "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL, + NULL); + EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" + L"/\x30B0/?q=\x30B0#\x30B0"), formatted); + EXPECT_EQ(WideToUTF16(L"\x30B0"), + formatted.substr(parsed.username.begin, parsed.username.len)); + EXPECT_EQ(WideToUTF16(L"\x30FC"), + formatted.substr(parsed.password.begin, parsed.password.len)); + EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(WideToUTF16(L"8080"), + formatted.substr(parsed.port.begin, parsed.port.len)); + EXPECT_EQ(WideToUTF16(L"/\x30B0/"), + formatted.substr(parsed.path.begin, parsed.path.len)); + EXPECT_EQ(WideToUTF16(L"q=\x30B0"), + formatted.substr(parsed.query.begin, parsed.query.len)); + EXPECT_EQ(WideToUTF16(L"\x30B0"), + formatted.substr(parsed.ref.begin, parsed.ref.len)); + + // Omit_username_password + unescape case. + formatted = FormatUrl( + GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" + "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), + "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed, + NULL, NULL); + EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" + L"/\x30B0/?q=\x30B0#\x30B0"), formatted); + EXPECT_FALSE(parsed.username.is_valid()); + EXPECT_FALSE(parsed.password.is_valid()); + EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(WideToUTF16(L"8080"), + formatted.substr(parsed.port.begin, parsed.port.len)); + EXPECT_EQ(WideToUTF16(L"/\x30B0/"), + formatted.substr(parsed.path.begin, parsed.path.len)); + EXPECT_EQ(WideToUTF16(L"q=\x30B0"), + formatted.substr(parsed.query.begin, parsed.query.len)); + EXPECT_EQ(WideToUTF16(L"\x30B0"), + formatted.substr(parsed.ref.begin, parsed.ref.len)); + + // View-source case. + formatted = + FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"), + std::string(), + kFormatUrlOmitUsernamePassword, + UnescapeRule::NORMAL, + &parsed, + NULL, + NULL); + EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"), + formatted); + EXPECT_EQ(WideToUTF16(L"view-source:http"), + formatted.substr(parsed.scheme.begin, parsed.scheme.len)); + EXPECT_FALSE(parsed.username.is_valid()); + EXPECT_FALSE(parsed.password.is_valid()); + EXPECT_EQ(WideToUTF16(L"host"), + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(WideToUTF16(L"81"), + formatted.substr(parsed.port.begin, parsed.port.len)); + EXPECT_EQ(WideToUTF16(L"/path"), + formatted.substr(parsed.path.begin, parsed.path.len)); + EXPECT_EQ(WideToUTF16(L"query"), + formatted.substr(parsed.query.begin, parsed.query.len)); + EXPECT_EQ(WideToUTF16(L"ref"), + formatted.substr(parsed.ref.begin, parsed.ref.len)); + + // omit http case. + formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"), + std::string(), + kFormatUrlOmitHTTP, + UnescapeRule::NORMAL, + &parsed, + NULL, + NULL); + EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted); + EXPECT_FALSE(parsed.scheme.is_valid()); + EXPECT_FALSE(parsed.username.is_valid()); + EXPECT_FALSE(parsed.password.is_valid()); + EXPECT_EQ(WideToUTF16(L"host"), + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(WideToUTF16(L"8000"), + formatted.substr(parsed.port.begin, parsed.port.len)); + EXPECT_EQ(WideToUTF16(L"/a"), + formatted.substr(parsed.path.begin, parsed.path.len)); + EXPECT_EQ(WideToUTF16(L"b=c"), + formatted.substr(parsed.query.begin, parsed.query.len)); + EXPECT_EQ(WideToUTF16(L"d"), + formatted.substr(parsed.ref.begin, parsed.ref.len)); + + // omit http starts with ftp case. + formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"), + std::string(), + kFormatUrlOmitHTTP, + UnescapeRule::NORMAL, + &parsed, + NULL, + NULL); + EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted); + EXPECT_TRUE(parsed.scheme.is_valid()); + EXPECT_FALSE(parsed.username.is_valid()); + EXPECT_FALSE(parsed.password.is_valid()); + EXPECT_EQ(WideToUTF16(L"http"), + formatted.substr(parsed.scheme.begin, parsed.scheme.len)); + EXPECT_EQ(WideToUTF16(L"ftp.host"), + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(WideToUTF16(L"8000"), + formatted.substr(parsed.port.begin, parsed.port.len)); + EXPECT_EQ(WideToUTF16(L"/a"), + formatted.substr(parsed.path.begin, parsed.path.len)); + EXPECT_EQ(WideToUTF16(L"b=c"), + formatted.substr(parsed.query.begin, parsed.query.len)); + EXPECT_EQ(WideToUTF16(L"d"), + formatted.substr(parsed.ref.begin, parsed.ref.len)); + + // omit http starts with 'f' case. + formatted = FormatUrl(GURL("http://f/"), + std::string(), + kFormatUrlOmitHTTP, + UnescapeRule::NORMAL, + &parsed, + NULL, + NULL); + EXPECT_EQ(WideToUTF16(L"f/"), formatted); + EXPECT_FALSE(parsed.scheme.is_valid()); + EXPECT_FALSE(parsed.username.is_valid()); + EXPECT_FALSE(parsed.password.is_valid()); + EXPECT_FALSE(parsed.port.is_valid()); + EXPECT_TRUE(parsed.path.is_valid()); + EXPECT_FALSE(parsed.query.is_valid()); + EXPECT_FALSE(parsed.ref.is_valid()); + EXPECT_EQ(WideToUTF16(L"f"), + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(WideToUTF16(L"/"), + formatted.substr(parsed.path.begin, parsed.path.len)); +} + +// Make sure that calling FormatUrl on a GURL and then converting back to a GURL +// results in the original GURL, for each ASCII character in the path. +TEST(NetUtilTest, FormatUrlRoundTripPathASCII) { + for (unsigned char test_char = 32; test_char < 128; ++test_char) { + GURL url(std::string("http://www.google.com/") + + static_cast<char>(test_char)); + size_t prefix_len; + base::string16 formatted = FormatUrl(url, + std::string(), + kFormatUrlOmitUsernamePassword, + UnescapeRule::NORMAL, + NULL, + &prefix_len, + NULL); + EXPECT_EQ(url.spec(), GURL(formatted).spec()); + } +} + +// Make sure that calling FormatUrl on a GURL and then converting back to a GURL +// results in the original GURL, for each escaped ASCII character in the path. +TEST(NetUtilTest, FormatUrlRoundTripPathEscaped) { + for (unsigned char test_char = 32; test_char < 128; ++test_char) { + std::string original_url("http://www.google.com/"); + original_url.push_back('%'); + original_url.append(base::HexEncode(&test_char, 1)); + + GURL url(original_url); + size_t prefix_len; + base::string16 formatted = FormatUrl(url, + std::string(), + kFormatUrlOmitUsernamePassword, + UnescapeRule::NORMAL, + NULL, + &prefix_len, + NULL); + EXPECT_EQ(url.spec(), GURL(formatted).spec()); + } +} + +// Make sure that calling FormatUrl on a GURL and then converting back to a GURL +// results in the original GURL, for each ASCII character in the query. +TEST(NetUtilTest, FormatUrlRoundTripQueryASCII) { + for (unsigned char test_char = 32; test_char < 128; ++test_char) { + GURL url(std::string("http://www.google.com/?") + + static_cast<char>(test_char)); + size_t prefix_len; + base::string16 formatted = FormatUrl(url, + std::string(), + kFormatUrlOmitUsernamePassword, + UnescapeRule::NORMAL, + NULL, + &prefix_len, + NULL); + EXPECT_EQ(url.spec(), GURL(formatted).spec()); + } +} + +// Make sure that calling FormatUrl on a GURL and then converting back to a GURL +// only results in a different GURL for certain characters. +TEST(NetUtilTest, FormatUrlRoundTripQueryEscaped) { + // A full list of characters which FormatURL should unescape and GURL should + // not escape again, when they appear in a query string. + const char* kUnescapedCharacters = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~"; + for (unsigned char test_char = 0; test_char < 128; ++test_char) { + std::string original_url("http://www.google.com/?"); + original_url.push_back('%'); + original_url.append(base::HexEncode(&test_char, 1)); + + GURL url(original_url); + size_t prefix_len; + base::string16 formatted = FormatUrl(url, + std::string(), + kFormatUrlOmitUsernamePassword, + UnescapeRule::NORMAL, + NULL, + &prefix_len, + NULL); + + if (test_char && + strchr(kUnescapedCharacters, static_cast<char>(test_char))) { + EXPECT_NE(url.spec(), GURL(formatted).spec()); + } else { + EXPECT_EQ(url.spec(), GURL(formatted).spec()); + } + } +} + +TEST(NetUtilTest, FormatUrlWithOffsets) { + CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, + UnescapeRule::NORMAL, NULL); + + const size_t basic_offsets[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25 + }; + CheckAdjustedOffsets("http://www.google.com/foo/", "en", + kFormatUrlOmitNothing, UnescapeRule::NORMAL, + basic_offsets); + + const size_t omit_auth_offsets_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, + 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 + }; + CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", + kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, + omit_auth_offsets_1); + + const size_t omit_auth_offsets_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21 + }; + CheckAdjustedOffsets("http://foo@www.google.com/", "en", + kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, + omit_auth_offsets_2); + + const size_t dont_omit_auth_offsets[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31 + }; + // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". + CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", + kFormatUrlOmitNothing, UnescapeRule::NORMAL, + dont_omit_auth_offsets); + + const size_t view_source_offsets[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, + kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 + }; + CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", + kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, + view_source_offsets); + + const size_t idn_hostname_offsets_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, + 13, 14, 15, 16, 17, 18, 19 + }; + // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". + CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", + kFormatUrlOmitNothing, UnescapeRule::NORMAL, + idn_hostname_offsets_1); + + const size_t idn_hostname_offsets_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, 19, 20, 21, 22, 23, 24 + }; + // Convert punycode to + // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". + CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", + "zh-CN", kFormatUrlOmitNothing, UnescapeRule::NORMAL, + idn_hostname_offsets_2); + + const size_t unescape_offsets[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos + }; + // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". + CheckAdjustedOffsets( + "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", + "en", kFormatUrlOmitNothing, UnescapeRule::SPACES, unescape_offsets); + + const size_t ref_offsets[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, + 33 + }; + // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". + CheckAdjustedOffsets( + "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", + kFormatUrlOmitNothing, UnescapeRule::NORMAL, ref_offsets); + + const size_t omit_http_offsets[] = { + 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14 + }; + CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, + UnescapeRule::NORMAL, omit_http_offsets); + + const size_t omit_http_start_with_ftp_offsets[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 + }; + CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, + UnescapeRule::NORMAL, omit_http_start_with_ftp_offsets); + + const size_t omit_all_offsets[] = { + 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, + 0, 1, 2, 3, 4, 5, 6, 7 + }; + CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, + UnescapeRule::NORMAL, omit_all_offsets); +} + +} // namespace net diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index ebc3307..674a725 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -6,7 +6,7 @@ #include <string.h> -#include <algorithm> +#include <ostream> #include "base/files/file_path.h" #include "base/format_macros.h" @@ -14,7 +14,6 @@ #include "base/strings/string_number_conversions.h" #include "base/strings/string_util.h" #include "base/strings/stringprintf.h" -#include "base/strings/sys_string_conversions.h" #include "base/strings/utf_string_conversions.h" #include "base/sys_byteorder.h" #include "base/time/time.h" @@ -36,364 +35,17 @@ namespace net { namespace { -static const size_t kNpos = base::string16::npos; - struct HeaderCase { const char* header_name; const char* expected; }; -struct HeaderParamCase { - const char* header_name; - const char* param_name; - const char* expected; -}; - -const char* kLanguages[] = { - "", "en", "zh-CN", "ja", "ko", - "he", "ar", "ru", "el", "fr", - "de", "pt", "sv", "th", "hi", - "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en", - "zh,ru,en" -}; - -struct IDNTestCase { - const char* input; - const wchar_t* unicode_output; - const bool unicode_allowed[arraysize(kLanguages)]; -}; - -// TODO(jungshik) This is just a random sample of languages and is far -// from exhaustive. We may have to generate all the combinations -// of languages (powerset of a set of all the languages). -const IDNTestCase idn_cases[] = { - // No IDN - {"www.google.com", L"www.google.com", - {true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true}}, - {"www.google.com.", L"www.google.com.", - {true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true}}, - {".", L".", - {true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true}}, - {"", L"", - {true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true}}, - // IDN - // Hanzi (Traditional Chinese) - {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", - {true, false, true, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, true, true, false, - true}}, - // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh) - {"xn--cy2a840a.com", L"\x89c6\x9891.com", - {true, false, true, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - true}}, - // Hanzi + '123' - {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", - {true, false, true, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, true, true, false, - true}}, - // Hanzi + Latin : U+56FD is simplified and is regarded - // as not supported in zh-TW. - {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", - {false, false, true, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - true}}, - // Kanji + Kana (Japanese) - {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", - {true, false, false, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - false}}, - // Katakana including U+30FC - {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", - {true, false, false, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - }}, - {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", - {true, false, false, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - }}, - // Katakana + Latin (Japanese) - // TODO(jungshik): Change 'false' in the first element to 'true' - // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead - // of our IsIDNComponentInSingleScript(). - {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", - {false, false, false, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - }}, - {"xn--3bkxe.jp", L"\x30c8\x309a.jp", - {false, false, false, true, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - }}, - // Hangul (Korean) - {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", - {true, false, false, false, true, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - false}}, - // b<u-umlaut>cher (German) - {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", - {true, false, false, false, false, - false, false, false, false, true, - true, false, false, false, false, - true, false, false, false, false, - false}}, - // a with diaeresis - {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", - {true, false, false, false, false, - false, false, false, false, false, - true, false, true, false, false, - true, false, false, false, false, - false}}, - // c-cedilla (French) - {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", - {true, false, false, false, false, - false, false, false, false, true, - false, true, false, false, false, - false, false, false, false, false, - false}}, - // caf'e with acute accent' (French) - {"xn--caf-dma.fr", L"caf\x00e9.fr", - {true, false, false, false, false, - false, false, false, false, true, - false, true, true, false, false, - false, false, false, false, false, - false}}, - // c-cedillla and a with tilde (Portuguese) - {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", - {true, false, false, false, false, - false, false, false, false, false, - false, true, false, false, false, - false, false, false, false, false, - false}}, - // s with caron - {"xn--achy-f6a.com", L"\x0161" L"achy.com", - {true, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // TODO(jungshik) : Add examples with Cyrillic letters - // only used in some languages written in Cyrillic. - // Eutopia (Greek) - {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", - {true, false, false, false, false, - false, false, false, true, false, - false, false, false, false, false, - false, true, false, false, false, - false}}, - // Eutopia + 123 (Greek) - {"xn---123-pldm0haj2bk.gr", - L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", - {true, false, false, false, false, - false, false, false, true, false, - false, false, false, false, false, - false, true, false, false, false, - false}}, - // Cyrillic (Russian) - {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", - {true, false, false, false, false, - false, false, true, false, false, - false, false, false, false, false, - false, false, false, false, true, - true}}, - // Cyrillic + 123 (Russian) - {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", - {true, false, false, false, false, - false, false, true, false, false, - false, false, false, false, false, - false, false, false, false, true, - true}}, - // Arabic - {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", - {true, false, false, false, false, - false, true, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // Hebrew - {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", - {true, false, false, false, false, - true, false, false, false, false, - false, false, false, false, false, - false, false, false, false, true, - false}}, - // Thai - {"xn--12c2cc4ag3b4ccu.th", - L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", - {true, false, false, false, false, - false, false, false, false, false, - false, false, false, true, false, - false, false, false, false, false, - false}}, - // Devangari (Hindi) - {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", - {true, false, false, false, false, - false, false, false, false, false, - false, false, false, false, true, - false, false, false, false, false, - false}}, - // Invalid IDN - {"xn--hello?world.com", NULL, - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // Unsafe IDNs - // "payp<alpha>l.com" - {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // google.gr with Greek omicron and epsilon - {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // google.ru with Cyrillic o - {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // h<e with acute>llo<China in Han>.cn - {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // <Greek rho><Cyrillic a><Cyrillic u>.ru - {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - // One that's really long that will force a buffer realloc - {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - "aaaaaaa", - L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - L"aaaaaaaa", - {true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true, true, true, true, true, - true}}, - // Test cases for characters we blacklisted although allowed in IDN. - // Embedded spaces will be turned to %20 in the display. - // TODO(jungshik): We need to have more cases. This is a typical - // data-driven trap. The following test cases need to be separated - // and tested only for a couple of languages. - {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false}}, - {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - }}, - {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - }}, - {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - }}, - {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", - {false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, false, false, false, - }}, -#if 0 - // These two cases are special. We need a separate test. - // U+3000 and U+3002 are normalized to ASCII space and dot. - {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", - {false, false, true, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, true, false, false, - true}}, - {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", - {false, false, true, false, false, - false, false, false, false, false, - false, false, false, false, false, - false, false, true, false, false, - true}}, -#endif -}; - -struct AdjustOffsetCase { - size_t input_offset; - size_t output_offset; -}; - struct CompliantHostCase { const char* host; const char* desired_tld; bool expected_output; }; -struct UrlTestData { - const char* description; - const char* input; - const char* languages; - FormatUrlTypes format_types; - UnescapeRule::Type escape_rules; - const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. - size_t prefix_len; -}; - // Fills in sockaddr for the given 32-bit address (IPv4.) // |bytes| should be an array of length 4. void MakeIPv4Address(const uint8* bytes, int port, SockaddrStorage* storage) { @@ -416,51 +68,6 @@ void MakeIPv6Address(const uint8* bytes, int port, SockaddrStorage* storage) { memcpy(&addr6->sin6_addr, bytes, 16); } -// A helper for IDN*{Fast,Slow}. -// Append "::<language list>" to |expected| and |actual| to make it -// easy to tell which sub-case fails without debugging. -void AppendLanguagesToOutputs(const char* languages, - base::string16* expected, - base::string16* actual) { - base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages); - expected->append(to_append); - actual->append(to_append); -} - -// A pair of helpers for the FormatUrlWithOffsets() test. -void VerboseExpect(size_t expected, - size_t actual, - const std::string& original_url, - size_t position, - const base::string16& formatted_url) { - EXPECT_EQ(expected, actual) << "Original URL: " << original_url - << " (at char " << position << ")\nFormatted URL: " << formatted_url; -} - -void CheckAdjustedOffsets(const std::string& url_string, - const std::string& languages, - FormatUrlTypes format_types, - UnescapeRule::Type unescape_rules, - const size_t* output_offsets) { - GURL url(url_string); - size_t url_length = url_string.length(); - std::vector<size_t> offsets; - for (size_t i = 0; i <= url_length + 1; ++i) - offsets.push_back(i); - offsets.push_back(500000); // Something larger than any input length. - offsets.push_back(std::string::npos); - base::string16 formatted_url = FormatUrlWithOffsets(url, languages, - format_types, unescape_rules, NULL, NULL, &offsets); - for (size_t i = 0; i < url_length; ++i) - VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); - VerboseExpect(formatted_url.length(), offsets[url_length], url_string, - url_length, formatted_url); - VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, - 500000, formatted_url); - VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, - std::string::npos, formatted_url); -} - // Helper to strignize an IP number (used to define expectations). std::string DumpIPNumber(const IPAddressNumber& v) { std::string out; @@ -591,38 +198,6 @@ TEST(NetUtilTest, GetSpecificHeader) { } } -TEST(NetUtilTest, IDNToUnicodeFast) { - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) { - for (size_t j = 0; j < arraysize(kLanguages); j++) { - // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow - if (j == 3 || j == 17 || j == 18) - continue; - base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); - base::string16 expected(idn_cases[i].unicode_allowed[j] ? - WideToUTF16(idn_cases[i].unicode_output) : - ASCIIToUTF16(idn_cases[i].input)); - AppendLanguagesToOutputs(kLanguages[j], &expected, &output); - EXPECT_EQ(expected, output); - } - } -} - -TEST(NetUtilTest, IDNToUnicodeSlow) { - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) { - for (size_t j = 0; j < arraysize(kLanguages); j++) { - // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast - if (!(j == 3 || j == 17 || j == 18)) - continue; - base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); - base::string16 expected(idn_cases[i].unicode_allowed[j] ? - WideToUTF16(idn_cases[i].unicode_output) : - ASCIIToUTF16(idn_cases[i].input)); - AppendLanguagesToOutputs(kLanguages[j], &expected, &output); - EXPECT_EQ(expected, output); - } - } -} - TEST(NetUtilTest, CompliantHost) { const CompliantHostCase compliant_host_cases[] = { {"", "", false}, @@ -657,82 +232,6 @@ TEST(NetUtilTest, CompliantHost) { } } -TEST(NetUtilTest, StripWWW) { - EXPECT_EQ(base::string16(), StripWWW(base::string16())); - EXPECT_EQ(base::string16(), StripWWW(ASCIIToUTF16("www."))); - EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah"))); - EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah"))); -} - -// This is currently a windows specific function. -#if defined(OS_WIN) -namespace { - -struct GetDirectoryListingEntryCase { - const wchar_t* name; - const char* raw_bytes; - bool is_dir; - int64 filesize; - base::Time time; - const char* expected; -}; - -} // namespace -TEST(NetUtilTest, GetDirectoryListingEntry) { - const GetDirectoryListingEntryCase test_cases[] = { - {L"Foo", - "", - false, - 10000, - base::Time(), - "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"}, - {L"quo\"tes", - "", - false, - 10000, - base::Time(), - "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>" - "\n"}, - {L"quo\"tes", - "quo\"tes", - false, - 10000, - base::Time(), - "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>" - "\n"}, - // U+D55C0 U+AE00. raw_bytes is empty (either a local file with - // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8 - {L"\xD55C\xAE00.txt", - "", - false, - 10000, - base::Time(), - "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\"," - "\"%ED%95%9C%EA%B8%80.txt\",0,\"9.8 kB\",\"\");</script>\n"}, - // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence: - // a local or remote file in EUC-KR. - {L"\xD55C\xAE00.txt", - "\xC7\xD1\xB1\xDB.txt", - false, - 10000, - base::Time(), - "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\",\"%C7%D1%B1%DB.txt\"" - ",0,\"9.8 kB\",\"\");</script>\n"}, - }; - - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { - const std::string results = GetDirectoryListingEntry( - WideToUTF16(test_cases[i].name), - test_cases[i].raw_bytes, - test_cases[i].is_dir, - test_cases[i].filesize, - test_cases[i].time); - EXPECT_EQ(test_cases[i].expected, results); - } -} - -#endif - TEST(NetUtilTest, ParseHostAndPort) { const struct { const char* input; @@ -906,555 +405,6 @@ TEST(NetUtilTest, GetHostName) { EXPECT_FALSE(hostname.empty()); } -TEST(NetUtilTest, FormatUrl) { - FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; - const UrlTestData tests[] = { - {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0}, - - {"Simple URL", - "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL, - L"http://www.google.com/", 7}, - - {"With a port number and a reference", - "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, - UnescapeRule::NORMAL, - L"http://www.google.com:8080/#\x30B0", 7}, - - // -------- IDN tests -------- - {"Japanese IDN with ja", - "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type, - UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, - - {"Japanese IDN with en", - "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type, - UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7}, - - {"Japanese IDN without any languages", - "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type, - UnescapeRule::NORMAL, - // Single script is safe for empty languages. - L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, - - {"mailto: with Japanese IDN", - "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type, - UnescapeRule::NORMAL, - // GURL doesn't assume an email address's domain part as a host name. - L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, - - {"file: with Japanese IDN", - "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type, - UnescapeRule::NORMAL, - L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, - - {"ftp: with Japanese IDN", - "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type, - UnescapeRule::NORMAL, - L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, - - // -------- omit_username_password flag tests -------- - {"With username and password, omit_username_password=false", - "http://user:passwd@example.com/foo", "", - kFormatUrlOmitNothing, UnescapeRule::NORMAL, - L"http://user:passwd@example.com/foo", 19}, - - {"With username and password, omit_username_password=true", - "http://user:passwd@example.com/foo", "", default_format_type, - UnescapeRule::NORMAL, L"http://example.com/foo", 7}, - - {"With username and no password", - "http://user@example.com/foo", "", default_format_type, - UnescapeRule::NORMAL, L"http://example.com/foo", 7}, - - {"Just '@' without username and password", - "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL, - L"http://example.com/foo", 7}, - - // GURL doesn't think local-part of an email address is username for URL. - {"mailto:, omit_username_password=true", - "mailto:foo@example.com", "", default_format_type, UnescapeRule::NORMAL, - L"mailto:foo@example.com", 7}, - - // -------- unescape flag tests -------- - {"Do not unescape", - "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" - "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" - "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type, - UnescapeRule::NONE, - // GURL parses %-encoded hostnames into Punycode. - L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" - L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7}, - - {"Unescape normally", - "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" - "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" - "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type, - UnescapeRule::NORMAL, - L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" - L"?q=\x30B0\x30FC\x30B0\x30EB", 7}, - - {"Unescape normally with BiDi control character", - "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", default_format_type, - UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, - - {"Unescape normally including unescape spaces", - "http://www.google.com/search?q=Hello%20World", "en", default_format_type, - UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7}, - - /* - {"unescape=true with some special characters", - "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", - kFormatUrlOmitNothing, UnescapeRule::NORMAL, - L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, - */ - // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". - - // -------- omit http: -------- - {"omit http with user name", - "http://user@example.com/foo", "", kFormatUrlOmitAll, - UnescapeRule::NORMAL, L"example.com/foo", 0}, - - {"omit http", - "http://www.google.com/", "en", kFormatUrlOmitHTTP, - UnescapeRule::NORMAL, L"www.google.com/", - 0}, - - {"omit http with https", - "https://www.google.com/", "en", kFormatUrlOmitHTTP, - UnescapeRule::NORMAL, L"https://www.google.com/", - 8}, - - {"omit http starts with ftp.", - "http://ftp.google.com/", "en", kFormatUrlOmitHTTP, - UnescapeRule::NORMAL, L"http://ftp.google.com/", - 7}, - - // -------- omit trailing slash on bare hostname -------- - {"omit slash when it's the entire path", - "http://www.google.com/", "en", - kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, - L"http://www.google.com", 7}, - {"omit slash when there's a ref", - "http://www.google.com/#ref", "en", - kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, - L"http://www.google.com/#ref", 7}, - {"omit slash when there's a query", - "http://www.google.com/?", "en", - kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, - L"http://www.google.com/?", 7}, - {"omit slash when it's not the entire path", - "http://www.google.com/foo", "en", - kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, - L"http://www.google.com/foo", 7}, - {"omit slash for nonstandard URLs", - "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname, - UnescapeRule::NORMAL, L"data:/", 5}, - {"omit slash for file URLs", - "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname, - UnescapeRule::NORMAL, L"file:///", 7}, - - // -------- view-source: -------- - {"view-source", - "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type, - UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", - 19}, - - {"view-source of view-source", - "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", - default_format_type, UnescapeRule::NORMAL, - L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, - - // view-source should omit http and trailing slash where non-view-source - // would. - {"view-source omit http", - "view-source:http://a.b/c", "en", kFormatUrlOmitAll, - UnescapeRule::NORMAL, L"view-source:a.b/c", - 12}, - {"view-source omit http starts with ftp.", - "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll, - UnescapeRule::NORMAL, L"view-source:http://ftp.b/c", - 19}, - {"view-source omit slash when it's the entire path", - "view-source:http://a.b/", "en", kFormatUrlOmitAll, - UnescapeRule::NORMAL, L"view-source:a.b", - 12}, - }; - - for (size_t i = 0; i < arraysize(tests); ++i) { - size_t prefix_len; - base::string16 formatted = FormatUrl( - GURL(tests[i].input), tests[i].languages, tests[i].format_types, - tests[i].escape_rules, NULL, &prefix_len, NULL); - EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; - EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; - } -} - -TEST(NetUtilTest, FormatUrlParsed) { - // No unescape case. - url::Parsed parsed; - base::string16 formatted = FormatUrl( - GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" - "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL, - NULL); - EXPECT_EQ(WideToUTF16( - L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" - L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); - EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), - formatted.substr(parsed.username.begin, parsed.username.len)); - EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), - formatted.substr(parsed.password.begin, parsed.password.len)); - EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"8080"), - formatted.substr(parsed.port.begin, parsed.port.len)); - EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"), - formatted.substr(parsed.path.begin, parsed.path.len)); - EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"), - formatted.substr(parsed.query.begin, parsed.query.len)); - EXPECT_EQ(WideToUTF16(L"\x30B0"), - formatted.substr(parsed.ref.begin, parsed.ref.len)); - - // Unescape case. - formatted = FormatUrl( - GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" - "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL, - NULL); - EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" - L"/\x30B0/?q=\x30B0#\x30B0"), formatted); - EXPECT_EQ(WideToUTF16(L"\x30B0"), - formatted.substr(parsed.username.begin, parsed.username.len)); - EXPECT_EQ(WideToUTF16(L"\x30FC"), - formatted.substr(parsed.password.begin, parsed.password.len)); - EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"8080"), - formatted.substr(parsed.port.begin, parsed.port.len)); - EXPECT_EQ(WideToUTF16(L"/\x30B0/"), - formatted.substr(parsed.path.begin, parsed.path.len)); - EXPECT_EQ(WideToUTF16(L"q=\x30B0"), - formatted.substr(parsed.query.begin, parsed.query.len)); - EXPECT_EQ(WideToUTF16(L"\x30B0"), - formatted.substr(parsed.ref.begin, parsed.ref.len)); - - // Omit_username_password + unescape case. - formatted = FormatUrl( - GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" - "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed, - NULL, NULL); - EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" - L"/\x30B0/?q=\x30B0#\x30B0"), formatted); - EXPECT_FALSE(parsed.username.is_valid()); - EXPECT_FALSE(parsed.password.is_valid()); - EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"8080"), - formatted.substr(parsed.port.begin, parsed.port.len)); - EXPECT_EQ(WideToUTF16(L"/\x30B0/"), - formatted.substr(parsed.path.begin, parsed.path.len)); - EXPECT_EQ(WideToUTF16(L"q=\x30B0"), - formatted.substr(parsed.query.begin, parsed.query.len)); - EXPECT_EQ(WideToUTF16(L"\x30B0"), - formatted.substr(parsed.ref.begin, parsed.ref.len)); - - // View-source case. - formatted = - FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"), - std::string(), - kFormatUrlOmitUsernamePassword, - UnescapeRule::NORMAL, - &parsed, - NULL, - NULL); - EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"), - formatted); - EXPECT_EQ(WideToUTF16(L"view-source:http"), - formatted.substr(parsed.scheme.begin, parsed.scheme.len)); - EXPECT_FALSE(parsed.username.is_valid()); - EXPECT_FALSE(parsed.password.is_valid()); - EXPECT_EQ(WideToUTF16(L"host"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"81"), - formatted.substr(parsed.port.begin, parsed.port.len)); - EXPECT_EQ(WideToUTF16(L"/path"), - formatted.substr(parsed.path.begin, parsed.path.len)); - EXPECT_EQ(WideToUTF16(L"query"), - formatted.substr(parsed.query.begin, parsed.query.len)); - EXPECT_EQ(WideToUTF16(L"ref"), - formatted.substr(parsed.ref.begin, parsed.ref.len)); - - // omit http case. - formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"), - std::string(), - kFormatUrlOmitHTTP, - UnescapeRule::NORMAL, - &parsed, - NULL, - NULL); - EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted); - EXPECT_FALSE(parsed.scheme.is_valid()); - EXPECT_FALSE(parsed.username.is_valid()); - EXPECT_FALSE(parsed.password.is_valid()); - EXPECT_EQ(WideToUTF16(L"host"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"8000"), - formatted.substr(parsed.port.begin, parsed.port.len)); - EXPECT_EQ(WideToUTF16(L"/a"), - formatted.substr(parsed.path.begin, parsed.path.len)); - EXPECT_EQ(WideToUTF16(L"b=c"), - formatted.substr(parsed.query.begin, parsed.query.len)); - EXPECT_EQ(WideToUTF16(L"d"), - formatted.substr(parsed.ref.begin, parsed.ref.len)); - - // omit http starts with ftp case. - formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"), - std::string(), - kFormatUrlOmitHTTP, - UnescapeRule::NORMAL, - &parsed, - NULL, - NULL); - EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted); - EXPECT_TRUE(parsed.scheme.is_valid()); - EXPECT_FALSE(parsed.username.is_valid()); - EXPECT_FALSE(parsed.password.is_valid()); - EXPECT_EQ(WideToUTF16(L"http"), - formatted.substr(parsed.scheme.begin, parsed.scheme.len)); - EXPECT_EQ(WideToUTF16(L"ftp.host"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"8000"), - formatted.substr(parsed.port.begin, parsed.port.len)); - EXPECT_EQ(WideToUTF16(L"/a"), - formatted.substr(parsed.path.begin, parsed.path.len)); - EXPECT_EQ(WideToUTF16(L"b=c"), - formatted.substr(parsed.query.begin, parsed.query.len)); - EXPECT_EQ(WideToUTF16(L"d"), - formatted.substr(parsed.ref.begin, parsed.ref.len)); - - // omit http starts with 'f' case. - formatted = FormatUrl(GURL("http://f/"), - std::string(), - kFormatUrlOmitHTTP, - UnescapeRule::NORMAL, - &parsed, - NULL, - NULL); - EXPECT_EQ(WideToUTF16(L"f/"), formatted); - EXPECT_FALSE(parsed.scheme.is_valid()); - EXPECT_FALSE(parsed.username.is_valid()); - EXPECT_FALSE(parsed.password.is_valid()); - EXPECT_FALSE(parsed.port.is_valid()); - EXPECT_TRUE(parsed.path.is_valid()); - EXPECT_FALSE(parsed.query.is_valid()); - EXPECT_FALSE(parsed.ref.is_valid()); - EXPECT_EQ(WideToUTF16(L"f"), - formatted.substr(parsed.host.begin, parsed.host.len)); - EXPECT_EQ(WideToUTF16(L"/"), - formatted.substr(parsed.path.begin, parsed.path.len)); -} - -// Make sure that calling FormatUrl on a GURL and then converting back to a GURL -// results in the original GURL, for each ASCII character in the path. -TEST(NetUtilTest, FormatUrlRoundTripPathASCII) { - for (unsigned char test_char = 32; test_char < 128; ++test_char) { - GURL url(std::string("http://www.google.com/") + - static_cast<char>(test_char)); - size_t prefix_len; - base::string16 formatted = FormatUrl(url, - std::string(), - kFormatUrlOmitUsernamePassword, - UnescapeRule::NORMAL, - NULL, - &prefix_len, - NULL); - EXPECT_EQ(url.spec(), GURL(formatted).spec()); - } -} - -// Make sure that calling FormatUrl on a GURL and then converting back to a GURL -// results in the original GURL, for each escaped ASCII character in the path. -TEST(NetUtilTest, FormatUrlRoundTripPathEscaped) { - for (unsigned char test_char = 32; test_char < 128; ++test_char) { - std::string original_url("http://www.google.com/"); - original_url.push_back('%'); - original_url.append(base::HexEncode(&test_char, 1)); - - GURL url(original_url); - size_t prefix_len; - base::string16 formatted = FormatUrl(url, - std::string(), - kFormatUrlOmitUsernamePassword, - UnescapeRule::NORMAL, - NULL, - &prefix_len, - NULL); - EXPECT_EQ(url.spec(), GURL(formatted).spec()); - } -} - -// Make sure that calling FormatUrl on a GURL and then converting back to a GURL -// results in the original GURL, for each ASCII character in the query. -TEST(NetUtilTest, FormatUrlRoundTripQueryASCII) { - for (unsigned char test_char = 32; test_char < 128; ++test_char) { - GURL url(std::string("http://www.google.com/?") + - static_cast<char>(test_char)); - size_t prefix_len; - base::string16 formatted = FormatUrl(url, - std::string(), - kFormatUrlOmitUsernamePassword, - UnescapeRule::NORMAL, - NULL, - &prefix_len, - NULL); - EXPECT_EQ(url.spec(), GURL(formatted).spec()); - } -} - -// Make sure that calling FormatUrl on a GURL and then converting back to a GURL -// only results in a different GURL for certain characters. -TEST(NetUtilTest, FormatUrlRoundTripQueryEscaped) { - // A full list of characters which FormatURL should unescape and GURL should - // not escape again, when they appear in a query string. - const char* kUnescapedCharacters = - "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~"; - for (unsigned char test_char = 0; test_char < 128; ++test_char) { - std::string original_url("http://www.google.com/?"); - original_url.push_back('%'); - original_url.append(base::HexEncode(&test_char, 1)); - - GURL url(original_url); - size_t prefix_len; - base::string16 formatted = FormatUrl(url, - std::string(), - kFormatUrlOmitUsernamePassword, - UnescapeRule::NORMAL, - NULL, - &prefix_len, - NULL); - - if (test_char && - strchr(kUnescapedCharacters, static_cast<char>(test_char))) { - EXPECT_NE(url.spec(), GURL(formatted).spec()); - } else { - EXPECT_EQ(url.spec(), GURL(formatted).spec()); - } - } -} - -TEST(NetUtilTest, FormatUrlWithOffsets) { - CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, - UnescapeRule::NORMAL, NULL); - - const size_t basic_offsets[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25 - }; - CheckAdjustedOffsets("http://www.google.com/foo/", "en", - kFormatUrlOmitNothing, UnescapeRule::NORMAL, - basic_offsets); - - const size_t omit_auth_offsets_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, - 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 - }; - CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, - omit_auth_offsets_1); - - const size_t omit_auth_offsets_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21 - }; - CheckAdjustedOffsets("http://foo@www.google.com/", "en", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, - omit_auth_offsets_2); - - const size_t dont_omit_auth_offsets[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31 - }; - // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". - CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", - kFormatUrlOmitNothing, UnescapeRule::NORMAL, - dont_omit_auth_offsets); - - const size_t view_source_offsets[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, - kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 - }; - CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, - view_source_offsets); - - const size_t idn_hostname_offsets_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, - 13, 14, 15, 16, 17, 18, 19 - }; - // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". - CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", - kFormatUrlOmitNothing, UnescapeRule::NORMAL, - idn_hostname_offsets_1); - - const size_t idn_hostname_offsets_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, 19, 20, 21, 22, 23, 24 - }; - // Convert punycode to - // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". - CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", - "zh-CN", kFormatUrlOmitNothing, UnescapeRule::NORMAL, - idn_hostname_offsets_2); - - const size_t unescape_offsets[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos - }; - // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". - CheckAdjustedOffsets( - "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", - "en", kFormatUrlOmitNothing, UnescapeRule::SPACES, unescape_offsets); - - const size_t ref_offsets[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, - 33 - }; - // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". - CheckAdjustedOffsets( - "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", - kFormatUrlOmitNothing, UnescapeRule::NORMAL, ref_offsets); - - const size_t omit_http_offsets[] = { - 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14 - }; - CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, - UnescapeRule::NORMAL, omit_http_offsets); - - const size_t omit_http_start_with_ftp_offsets[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 - }; - CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, - UnescapeRule::NORMAL, omit_http_start_with_ftp_offsets); - - const size_t omit_all_offsets[] = { - 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, - 0, 1, 2, 3, 4, 5, 6, 7 - }; - CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, - UnescapeRule::NORMAL, omit_all_offsets); -} - TEST(NetUtilTest, SimplifyUrlForRequest) { struct { const char* input_url; diff --git a/net/net.gyp b/net/net.gyp index a454e7d..054d066 100644 --- a/net/net.gyp +++ b/net/net.gyp @@ -439,8 +439,15 @@ }, ], [ 'use_icu_alternatives_on_android == 1', { + 'dependencies!': [ + '../base/base.gyp:base_i18n', + '../third_party/icu/icu.gyp:icui18n', + '../third_party/icu/icu.gyp:icuuc', + ], 'sources!': [ + 'base/filename_util_icu.cc', 'base/net_string_util_icu.cc', + 'base/net_util_icu.cc', ], 'sources': [ 'base/net_string_util_icu_alternatives_android.cc', @@ -641,7 +648,10 @@ ], }, ], - [ 'use_v8_in_net==1', { + # Always need use_v8_in_net to be 1 to run gyp on Android, so just + # remove net_unittest's dependency on v8 when using icu alternatives + # instead of setting use_v8_in_net to 0. + [ 'use_v8_in_net==1 and use_icu_alternatives_on_android==0', { 'dependencies': [ 'net_with_v8', ], @@ -749,10 +759,22 @@ }, ], ['OS == "android" and gtest_target_type == "shared_library"', { + # TODO(mmenke): This depends on test_support_base, which depends on + # icu. Figure out a way to remove that dependency. 'dependencies': [ '../testing/android/native_test.gyp:native_test_native_code', ] }], + [ 'use_icu_alternatives_on_android == 1', { + 'dependencies!': [ + '../base/base.gyp:base_i18n', + ], + 'sources!': [ + 'base/filename_util_unittest.cc', + 'base/net_util_icu_unittest.cc', + ], + }, + ], ], 'target_conditions': [ # These source files are excluded by default platform rules, but they @@ -823,6 +845,8 @@ ], 'export_dependent_settings': [ '../base/base.gyp:base', + # TODO(mmenke): This depends on icu, figure out a way to build tests + # without icu. '../base/base.gyp:test_support_base', '../testing/gtest.gyp:gtest', '../testing/gmock.gyp:gmock', diff --git a/net/net.gypi b/net/net.gypi index 0176268..4a59cf7 100644 --- a/net/net.gypi +++ b/net/net.gypi @@ -1250,6 +1250,7 @@ 'base/net_log_unittest.cc', 'base/net_log_unittest.h', 'base/net_util_unittest.cc', + 'base/net_util_icu_unittest.cc', 'base/network_change_notifier_win_unittest.cc', 'base/prioritized_dispatcher_unittest.cc', 'base/priority_queue_unittest.cc', diff --git a/net/proxy/proxy_script_fetcher_impl_unittest.cc b/net/proxy/proxy_script_fetcher_impl_unittest.cc index b902850..2536b02 100644 --- a/net/proxy/proxy_script_fetcher_impl_unittest.cc +++ b/net/proxy/proxy_script_fetcher_impl_unittest.cc @@ -91,6 +91,7 @@ class RequestContext : public URLRequestContext { URLRequestContextStorage storage_; }; +#if !defined(DISABLE_FILE_SUPPORT) // Get a file:// url relative to net/data/proxy/proxy_script_fetcher_unittest. GURL GetTestFileUrl(const std::string& relpath) { base::FilePath path; @@ -101,6 +102,7 @@ GURL GetTestFileUrl(const std::string& relpath) { GURL base_url = FilePathToFileURL(path); return GURL(base_url.spec() + "/" + relpath); } +#endif // !defined(DISABLE_FILE_SUPPORT) // Really simple NetworkDelegate so we can allow local file access on ChromeOS // without introducing layering violations. Also causes a test failure if a @@ -352,7 +354,9 @@ TEST_F(ProxyScriptFetcherImplTest, TooLarge) { // These two URLs are the same file, but are http:// vs file:// GURL urls[] = { test_server_.GetURL("files/large-pac.nsproxy"), +#if !defined(DISABLE_FILE_SUPPORT) GetTestFileUrl("large-pac.nsproxy") +#endif }; // Try fetching URLs that are 101 bytes large. We should abort the request diff --git a/net/url_request/url_request_unittest.cc b/net/url_request/url_request_unittest.cc index 7a526a3..cec9049 100644 --- a/net/url_request/url_request_unittest.cc +++ b/net/url_request/url_request_unittest.cc @@ -30,7 +30,6 @@ #include "base/strings/stringprintf.h" #include "base/strings/utf_string_conversions.h" #include "net/base/capturing_net_log.h" -#include "net/base/filename_util.h" #include "net/base/load_flags.h" #include "net/base/load_timing_info.h" #include "net/base/load_timing_info_test_util.h" @@ -77,6 +76,7 @@ #include "testing/platform_test.h" #if !defined(DISABLE_FILE_SUPPORT) +#include "net/base/filename_util.h" #include "net/url_request/file_protocol_handler.h" #include "net/url_request/url_request_file_dir_job.h" #endif @@ -697,6 +697,7 @@ TEST_F(URLRequestTest, DataURLImageTest) { } } +#if !defined(DISABLE_FILE_SUPPORT) TEST_F(URLRequestTest, FileTest) { base::FilePath app_path; PathService::Get(base::FILE_EXE, &app_path); @@ -895,80 +896,6 @@ TEST_F(URLRequestTest, AllowFileURLs) { } } -TEST_F(URLRequestTest, InvalidUrlTest) { - TestDelegate d; - { - URLRequest r(GURL("invalid url"), DEFAULT_PRIORITY, &d, &default_context_); - - r.Start(); - EXPECT_TRUE(r.is_pending()); - - base::RunLoop().Run(); - EXPECT_TRUE(d.request_failed()); - } -} - -#if defined(OS_WIN) -TEST_F(URLRequestTest, ResolveShortcutTest) { - base::FilePath app_path; - PathService::Get(base::DIR_SOURCE_ROOT, &app_path); - app_path = app_path.AppendASCII("net"); - app_path = app_path.AppendASCII("data"); - app_path = app_path.AppendASCII("url_request_unittest"); - app_path = app_path.AppendASCII("with-headers.html"); - - std::wstring lnk_path = app_path.value() + L".lnk"; - - base::win::ScopedCOMInitializer com_initializer; - - // Temporarily create a shortcut for test - { - base::win::ScopedComPtr<IShellLink> shell; - ASSERT_TRUE(SUCCEEDED(shell.CreateInstance(CLSID_ShellLink, NULL, - CLSCTX_INPROC_SERVER))); - base::win::ScopedComPtr<IPersistFile> persist; - ASSERT_TRUE(SUCCEEDED(shell.QueryInterface(persist.Receive()))); - EXPECT_TRUE(SUCCEEDED(shell->SetPath(app_path.value().c_str()))); - EXPECT_TRUE(SUCCEEDED(shell->SetDescription(L"ResolveShortcutTest"))); - EXPECT_TRUE(SUCCEEDED(persist->Save(lnk_path.c_str(), TRUE))); - } - - TestDelegate d; - { - URLRequest r(FilePathToFileURL(base::FilePath(lnk_path)), - DEFAULT_PRIORITY, - &d, - &default_context_); - - r.Start(); - EXPECT_TRUE(r.is_pending()); - - base::RunLoop().Run(); - - WIN32_FILE_ATTRIBUTE_DATA data; - GetFileAttributesEx(app_path.value().c_str(), - GetFileExInfoStandard, &data); - HANDLE file = CreateFile(app_path.value().c_str(), GENERIC_READ, - FILE_SHARE_READ, NULL, OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, NULL); - EXPECT_NE(INVALID_HANDLE_VALUE, file); - scoped_ptr<char[]> buffer(new char[data.nFileSizeLow]); - DWORD read_size; - BOOL result; - result = ReadFile(file, buffer.get(), data.nFileSizeLow, - &read_size, NULL); - std::string content(buffer.get(), read_size); - CloseHandle(file); - - EXPECT_TRUE(!r.is_pending()); - EXPECT_EQ(1, d.received_redirect_count()); - EXPECT_EQ(content, d.data_received()); - } - - // Clean the shortcut - DeleteFile(lnk_path.c_str()); -} -#endif // defined(OS_WIN) TEST_F(URLRequestTest, FileDirCancelTest) { // Put in mock resource provider. @@ -1067,7 +994,84 @@ TEST_F(URLRequestTest, FileDirRedirectSingleSlash) { ASSERT_EQ(1, d.received_redirect_count()); ASSERT_FALSE(req.status().is_success()); } -#endif +#endif // defined(OS_WIN) + +#endif // !defined(DISABLE_FILE_SUPPORT) + +TEST_F(URLRequestTest, InvalidUrlTest) { + TestDelegate d; + { + URLRequest r(GURL("invalid url"), DEFAULT_PRIORITY, &d, &default_context_); + + r.Start(); + EXPECT_TRUE(r.is_pending()); + + base::RunLoop().Run(); + EXPECT_TRUE(d.request_failed()); + } +} + +#if defined(OS_WIN) +TEST_F(URLRequestTest, ResolveShortcutTest) { + base::FilePath app_path; + PathService::Get(base::DIR_SOURCE_ROOT, &app_path); + app_path = app_path.AppendASCII("net"); + app_path = app_path.AppendASCII("data"); + app_path = app_path.AppendASCII("url_request_unittest"); + app_path = app_path.AppendASCII("with-headers.html"); + + std::wstring lnk_path = app_path.value() + L".lnk"; + + base::win::ScopedCOMInitializer com_initializer; + + // Temporarily create a shortcut for test + { + base::win::ScopedComPtr<IShellLink> shell; + ASSERT_TRUE(SUCCEEDED(shell.CreateInstance(CLSID_ShellLink, NULL, + CLSCTX_INPROC_SERVER))); + base::win::ScopedComPtr<IPersistFile> persist; + ASSERT_TRUE(SUCCEEDED(shell.QueryInterface(persist.Receive()))); + EXPECT_TRUE(SUCCEEDED(shell->SetPath(app_path.value().c_str()))); + EXPECT_TRUE(SUCCEEDED(shell->SetDescription(L"ResolveShortcutTest"))); + EXPECT_TRUE(SUCCEEDED(persist->Save(lnk_path.c_str(), TRUE))); + } + + TestDelegate d; + { + URLRequest r(FilePathToFileURL(base::FilePath(lnk_path)), + DEFAULT_PRIORITY, + &d, + &default_context_); + + r.Start(); + EXPECT_TRUE(r.is_pending()); + + base::RunLoop().Run(); + + WIN32_FILE_ATTRIBUTE_DATA data; + GetFileAttributesEx(app_path.value().c_str(), + GetFileExInfoStandard, &data); + HANDLE file = CreateFile(app_path.value().c_str(), GENERIC_READ, + FILE_SHARE_READ, NULL, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, NULL); + EXPECT_NE(INVALID_HANDLE_VALUE, file); + scoped_ptr<char[]> buffer(new char[data.nFileSizeLow]); + DWORD read_size; + BOOL result; + result = ReadFile(file, buffer.get(), data.nFileSizeLow, + &read_size, NULL); + std::string content(buffer.get(), read_size); + CloseHandle(file); + + EXPECT_TRUE(!r.is_pending()); + EXPECT_EQ(1, d.received_redirect_count()); + EXPECT_EQ(content, d.data_received()); + } + + // Clean the shortcut + DeleteFile(lnk_path.c_str()); +} +#endif // defined(OS_WIN) // Custom URLRequestJobs for use with interceptor tests class RestartTestJob : public URLRequestTestJob { |