// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "net/tools/dump_cache/url_to_filename_encoder.h" #include #include #include "base/strings/string_util.h" #include "base/strings/stringprintf.h" #include "base/strings/string_piece.h" #include "testing/gtest/include/gtest/gtest.h" using base::StringPiece; using std::string; namespace net { #ifdef WIN32 char kDirSeparator = '\\'; char kOtherDirSeparator = '/'; #else char kDirSeparator = '/'; char kOtherDirSeparator = '\\'; #endif class UrlToFilenameEncoderTest : public ::testing::Test { protected: UrlToFilenameEncoderTest() : escape_(1, UrlToFilenameEncoder::kEscapeChar), dir_sep_(1, kDirSeparator) { } void CheckSegmentLength(const StringPiece& escaped_word) { std::vector components; Tokenize(escaped_word, StringPiece("/"), &components); for (size_t i = 0; i < components.size(); ++i) { EXPECT_GE(UrlToFilenameEncoder::kMaximumSubdirectoryLength, components[i].size()); } } void CheckValidChars(const StringPiece& escaped_word, char invalid_slash) { // These characters are invalid in Windows. We add in ', as that's pretty // inconvenient in a Unix filename. // // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx const string kInvalidChars = "<>:\"|?*'"; for (size_t i = 0; i < escaped_word.size(); ++i) { char c = escaped_word[i]; EXPECT_EQ(string::npos, kInvalidChars.find(c)); EXPECT_NE(invalid_slash, c); EXPECT_NE('\0', c); // only invalid character in Posix EXPECT_GT(0x7E, c); // only English printable characters } } void Validate(const string& in_word, const string& gold_word) { string escaped_word, url; UrlToFilenameEncoder::EncodeSegment( std::string(), in_word, '/', &escaped_word); EXPECT_EQ(gold_word, escaped_word); CheckSegmentLength(escaped_word); CheckValidChars(escaped_word, '\\'); UrlToFilenameEncoder::Decode(escaped_word, '/', &url); EXPECT_EQ(in_word, url); } void ValidateAllSegmentsSmall(const string& in_word) { string escaped_word, url; UrlToFilenameEncoder::EncodeSegment( std::string(), in_word, '/', &escaped_word); CheckSegmentLength(escaped_word); CheckValidChars(escaped_word, '\\'); UrlToFilenameEncoder::Decode(escaped_word, '/', &url); EXPECT_EQ(in_word, url); } void ValidateNoChange(const string& word) { // We always suffix the leaf with kEscapeChar, unless the leaf is empty. Validate(word, word + escape_); } void ValidateEscaped(unsigned char ch) { // We always suffix the leaf with kEscapeChar, unless the leaf is empty. char escaped[100]; const char escape = UrlToFilenameEncoder::kEscapeChar; base::snprintf(escaped, sizeof(escaped), "%c%02X%c", escape, ch, escape); Validate(string(1, ch), escaped); } void ValidateUrl(const string& url, const string& base_path, bool legacy_escape, const string& gold_filename) { string encoded_filename = UrlToFilenameEncoder::Encode( url, base_path, legacy_escape); EXPECT_EQ(gold_filename, encoded_filename); if (!legacy_escape) { CheckSegmentLength(encoded_filename); CheckValidChars(encoded_filename, kOtherDirSeparator); string decoded_url; UrlToFilenameEncoder::Decode(encoded_filename, kDirSeparator, &decoded_url); if (url != decoded_url) { EXPECT_EQ(url, "http://" + decoded_url); } } } void ValidateUrlOldNew(const string& url, const string& gold_old_filename, const string& gold_new_filename) { ValidateUrl(url, std::string(), true, gold_old_filename); ValidateUrl(url, std::string(), false, gold_new_filename); } void ValidateEncodeSame(const string& url1, const string& url2) { string filename1 = UrlToFilenameEncoder::Encode(url1, std::string(), false); string filename2 = UrlToFilenameEncoder::Encode(url2, std::string(), false); EXPECT_EQ(filename1, filename2); } string escape_; string dir_sep_; }; TEST_F(UrlToFilenameEncoderTest, DoesNotEscape) { ValidateNoChange(std::string()); ValidateNoChange("abcdefg"); ValidateNoChange("abcdefghijklmnopqrstuvwxyz"); ValidateNoChange("ZYXWVUT"); ValidateNoChange("ZYXWVUTSRQPONMLKJIHGFEDCBA"); ValidateNoChange("01234567689"); ValidateNoChange("_.=+-"); ValidateNoChange("abcdefghijklmnopqrstuvwxyzZYXWVUTSRQPONMLKJIHGFEDCBA" "01234567689_.=+-"); ValidateNoChange("index.html"); ValidateNoChange("/"); ValidateNoChange("/."); ValidateNoChange("."); ValidateNoChange(".."); } TEST_F(UrlToFilenameEncoderTest, Escapes) { const string bad_chars = "<>:\"\\|?*" // Illegal on Windows "~`!$^&(){}[]';" // Bad for Unix shells "^@" // Build tool doesn't like "#%" // Tool doesn't like ","; // The escape char has to be escaped for (size_t i = 0; i < bad_chars.size(); ++i) { ValidateEscaped(bad_chars[i]); } // Check non-printable characters. ValidateEscaped('\0'); for (size_t i = 127; i < 256; ++i) { ValidateEscaped(static_cast(i)); } } TEST_F(UrlToFilenameEncoderTest, DoesEscapeCorrectly) { Validate("mysite.com&x", "mysite.com" + escape_ + "26x" + escape_); Validate("/./", "/" + escape_ + "./" + escape_); Validate("/../", "/" + escape_ + "../" + escape_); Validate("//", "/" + escape_ + "2F" + escape_); Validate("/./leaf", "/" + escape_ + "./leaf" + escape_); Validate("/../leaf", "/" + escape_ + "../leaf" + escape_); Validate("//leaf", "/" + escape_ + "2Fleaf" + escape_); Validate("mysite/u?param1=x¶m2=y", "mysite/u" + escape_ + "3Fparam1=x" + escape_ + "26param2=y" + escape_); Validate("search?q=dogs&go=&form=QBLH&qs=n", // from Latency Labs bing test. "search" + escape_ + "3Fq=dogs" + escape_ + "26go=" + escape_ + "26form=QBLH" + escape_ + "26qs=n" + escape_); Validate("~joebob/my_neeto-website+with_stuff.asp?id=138&content=true", "" + escape_ + "7Ejoebob/my_neeto-website+with_stuff.asp" + escape_ + "3Fid=138" + escape_ + "26content=true" + escape_); } TEST_F(UrlToFilenameEncoderTest, EncodeUrlCorrectly) { ValidateUrlOldNew("http://www.google.com/index.html", "www.google.com" + dir_sep_ + "indexx2Ehtml", "www.google.com" + dir_sep_ + "index.html" + escape_); ValidateUrlOldNew("http://www.google.com/x/search?hl=en&q=dogs&oq=", "www.google.com" + dir_sep_ + "x" + dir_sep_ + "searchx3Fhlx3Denx26qx3Ddogsx26oqx3D", "www.google.com" + dir_sep_ + "x" + dir_sep_ + "search" + escape_ + "3Fhl=en" + escape_ + "26q=dogs" + escape_ + "26oq=" + escape_); ValidateUrlOldNew("http://www.foo.com/a//", "www.foo.com" + dir_sep_ + "ax255Cx255Cindexx2Ehtml", "www.foo.com" + dir_sep_ + "a" + dir_sep_ + escape_ + "2F" + escape_); // From bug: Double slash preserved. ValidateUrl("http://www.foo.com/u?site=http://www.google.com/index.html", std::string(), false, "www.foo.com" + dir_sep_ + "u" + escape_ + "3Fsite=http" + escape_ + "3A" + dir_sep_ + escape_ + "2Fwww.google.com" + dir_sep_ + "index.html" + escape_); ValidateUrlOldNew( "http://blogutils.net/olct/online.php?" "site=http://thelwordfanfics.blogspot.&interval=600", "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "onlinex2Ephpx3F" "sitex3Dhttpx3Ax255Cx255Cthelwordfanficsx2Eblogspotx2Ex26intervalx3D600", "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "online.php" + escape_ + "3Fsite=http" + escape_ + "3A" + dir_sep_ + escape_ + "2Fthelwordfanfics.blogspot." + escape_ + "26interval=600" + escape_); } // From bug: Escapes treated the same as normal char. TEST_F(UrlToFilenameEncoderTest, UnescapeUrlsBeforeEncode) { for (int i = 0; i < 128; ++i) { string unescaped(1, static_cast(i)); string escaped = base::StringPrintf("%%%02X", i); ValidateEncodeSame(unescaped, escaped); } ValidateEncodeSame( "http://www.blogger.com/navbar.g?bName=God!&Mode=FOO&searchRoot" "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch", "http://www.blogger.com/navbar.g?bName=God%21&Mode=FOO&searchRoot" "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch"); } // From bug: Filename encoding is not prefix-free. TEST_F(UrlToFilenameEncoderTest, EscapeSecondSlash) { Validate("/", "/" + escape_); Validate("//", "/" + escape_ + "2F" + escape_); Validate("///", "/" + escape_ + "2F" + "/" + escape_); } TEST_F(UrlToFilenameEncoderTest, LongTail) { static char long_word[] = "~joebob/briggs/12345678901234567890123456789012345678901234567890" "1234567890123456789012345678901234567890123456789012345678901234567890" "1234567890123456789012345678901234567890123456789012345678901234567890" "1234567890123456789012345678901234567890123456789012345678901234567890" "1234567890123456789012345678901234567890123456789012345678901234567890" "1234567890123456789012345678901234567890123456789012345678901234567890"; // the long lines in the string below are 64 characters, so we can see // the slashes every 128. string gold_long_word = escape_ + "7Ejoebob/briggs/" "1234567890123456789012345678901234567890123456789012345678901234" "56789012345678901234567890123456789012345678901234567890123456" + escape_ + "-/" "7890123456789012345678901234567890123456789012345678901234567890" "12345678901234567890123456789012345678901234567890123456789012" + escape_ + "-/" "3456789012345678901234567890123456789012345678901234567890123456" "78901234567890123456789012345678901234567890123456789012345678" + escape_ + "-/" "9012345678901234567890" + escape_; EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, sizeof(long_word)); Validate(long_word, gold_long_word); } TEST_F(UrlToFilenameEncoderTest, LongTailQuestion) { // Here the '?' in the last path segment expands to @3F, making // it hit 128 chars before the input segment gets that big. static char long_word[] = "~joebob/briggs/1234567?1234567?1234567?1234567?1234567?" "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"; // Notice that at the end of the third segment, we avoid splitting // the (escape_ + "3F") that was generated from the "?", so that segment is // only 127 characters. string pattern = "1234567" + escape_ + "3F"; // 10 characters string gold_long_word = escape_ + "7Ejoebob/briggs/" + pattern + pattern + pattern + pattern + pattern + pattern + "1234" "567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + "123456" + escape_ + "-/" "7" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + pattern + pattern + pattern + pattern + pattern + pattern + pattern + "12" + escape_ + "-/" "34567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + "1234567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + "1234567" + escape_ + "-/" + escape_ + "3F" + pattern + pattern + escape_; EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, sizeof(long_word)); Validate(long_word, gold_long_word); } TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenNoEscape) { // hit corner cases, +/- 4 characters from kMaxLen for (int i = -4; i <= 4; ++i) { string input; input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength, 'x'); ValidateAllSegmentsSmall(input); } } TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenWithEscape) { // hit corner cases, +/- 4 characters from kMaxLen. This time we // leave off the last 'x' and put in a '.', which ensures that we // are truncating with '/' *after* the expansion. for (int i = -4; i <= 4; ++i) { string input; input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength - 1, 'x'); input.append(1, '.'); // this will expand to 3 characters. ValidateAllSegmentsSmall(input); } } TEST_F(UrlToFilenameEncoderTest, LeafBranchAlias) { Validate("/a/b/c", "/a/b/c" + escape_); // c is leaf file "c," Validate("/a/b/c/d", "/a/b/c/d" + escape_); // c is directory "c" Validate("/a/b/c/d/", "/a/b/c/d/" + escape_); } TEST_F(UrlToFilenameEncoderTest, BackslashSeparator) { string long_word; string escaped_word; long_word.append(UrlToFilenameEncoder::kMaximumSubdirectoryLength + 1, 'x'); UrlToFilenameEncoder::EncodeSegment( std::string(), long_word, '\\', &escaped_word); // check that one backslash, plus the escape ",-", and the ending , got added. EXPECT_EQ(long_word.size() + 4, escaped_word.size()); ASSERT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, escaped_word.size()); // Check that the backslash got inserted at the correct spot. EXPECT_EQ('\\', escaped_word[ UrlToFilenameEncoder::kMaximumSubdirectoryLength]); } } // namespace net