diff options
author | mbelshe@chromium.org <mbelshe@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-08-17 23:47:23 +0000 |
---|---|---|
committer | mbelshe@chromium.org <mbelshe@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-08-17 23:47:23 +0000 |
commit | 8cd1ba5a2fb6d73d9c58e15301ed9be572e97289 (patch) | |
tree | df4eaf91557d9287d19b6c3f2160a9b5e9b1e618 | |
parent | 22697f1356f97eab7ccfd6c2469f9e2d52bdd4f1 (diff) | |
download | chromium_src-8cd1ba5a2fb6d73d9c58e15301ed9be572e97289.zip chromium_src-8cd1ba5a2fb6d73d9c58e15301ed9be572e97289.tar.gz chromium_src-8cd1ba5a2fb6d73d9c58e15301ed9be572e97289.tar.bz2 |
Syncing url_to_filename_encoder with internal version.
I had to implement UrlUtilities::Unescape and ported over the tests as well.
Deleted redundant copy of url_to_filename_encoder.h and pointed the one link to that at the new location: net/tools/dump_cache/url_to_filename_encoder.h
This patch is on behalf of sligocki@google.com
BUG=none
TEST=url_to_filename_encoder_unittest.
Review URL: http://codereview.chromium.org/3117019
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@56454 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | net/net.gyp | 6 | ||||
-rw-r--r-- | net/tools/dump_cache/dump_cache.cc | 4 | ||||
-rw-r--r-- | net/tools/dump_cache/url_to_filename_encoder.cc | 58 | ||||
-rw-r--r-- | net/tools/dump_cache/url_to_filename_encoder.h | 160 | ||||
-rw-r--r-- | net/tools/dump_cache/url_to_filename_encoder_unittest.cc | 191 | ||||
-rw-r--r-- | net/tools/dump_cache/url_utilities.cc | 126 | ||||
-rw-r--r-- | net/tools/dump_cache/url_utilities.h | 69 | ||||
-rw-r--r-- | net/tools/dump_cache/url_utilities_unittest.cc | 113 | ||||
-rw-r--r-- | net/tools/flip_server/flip_in_mem_edsm_server.cc | 5 | ||||
-rw-r--r-- | net/tools/flip_server/url_to_filename_encoder.h | 128 | ||||
-rw-r--r-- | net/tools/flip_server/url_utilities.h | 70 |
11 files changed, 510 insertions, 420 deletions
diff --git a/net/net.gyp b/net/net.gyp index 7d34f00..0b5b5d4 100644 --- a/net/net.gyp +++ b/net/net.gyp @@ -798,6 +798,9 @@ 'tools/dump_cache/url_to_filename_encoder.cc', 'tools/dump_cache/url_to_filename_encoder.h', 'tools/dump_cache/url_to_filename_encoder_unittest.cc', + 'tools/dump_cache/url_utilities.h', + 'tools/dump_cache/url_utilities.cc', + 'tools/dump_cache/url_utilities_unittest.cc', 'url_request/url_request_job_tracker_unittest.cc', 'url_request/url_request_unittest.cc', 'url_request/url_request_unittest.h', @@ -1133,7 +1136,8 @@ 'tools/dump_cache/upgrade.cc', 'tools/dump_cache/url_to_filename_encoder.cc', 'tools/dump_cache/url_to_filename_encoder.h', - 'tools/dump_cache/url_utilties.h', + 'tools/dump_cache/url_utilities.h', + 'tools/dump_cache/url_utilities.cc', ], }, ], diff --git a/net/tools/dump_cache/dump_cache.cc b/net/tools/dump_cache/dump_cache.cc index 086a9b5..d60faa4 100644 --- a/net/tools/dump_cache/dump_cache.cc +++ b/net/tools/dump_cache/dump_cache.cc @@ -124,6 +124,10 @@ int main(int argc, const char* argv[]) { bool copy_to_text = false; // TODO(evanm): port to FilePath. std::wstring output_path = command_line.GetSwitchValueNative(kOutputPath); + // Make sure that output directory ends with a slash. + if (output_path.size() >= 1 && output_path[output_path.size() - 1] != '\\') + output_path.push_back('\\'); + if (command_line.HasSwitch(kUpgrade)) upgrade = true; if (command_line.HasSwitch(kDumpToFiles)) diff --git a/net/tools/dump_cache/url_to_filename_encoder.cc b/net/tools/dump_cache/url_to_filename_encoder.cc index 1462faa..84d79c7 100644 --- a/net/tools/dump_cache/url_to_filename_encoder.cc +++ b/net/tools/dump_cache/url_to_filename_encoder.cc @@ -40,16 +40,16 @@ uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) { namespace net { // The escape character choice is made here -- all code and tests in this -// directory are based off of this constant. However, our test ata +// directory are based off of this constant. However, our testdata // has tons of dependencies on this, so it cannot be changed without // re-running those tests and fixing them. -const char kTruncationChar = '-'; -const char kEscapeChar = ','; -const size_t kMaximumSubdirectoryLength = 128; +const char UrlToFilenameEncoder::kEscapeChar = ','; +const char UrlToFilenameEncoder::kTruncationChar = '-'; +const size_t UrlToFilenameEncoder::kMaximumSubdirectoryLength = 128; -void UrlToFilenameEncoder::AppendSegment( - char dir_separator, string* segment, string* dest) { - if (segment->empty() || (*segment == ".") || (*segment == "..")) { +void UrlToFilenameEncoder::AppendSegment(string* segment, string* dest) { + CHECK(!segment->empty()); + if ((*segment == ".") || (*segment == "..")) { dest->append(1, kEscapeChar); dest->append(*segment); segment->clear(); @@ -83,9 +83,11 @@ void UrlToFilenameEncoder::AppendSegment( } void UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix, - const string& filename_ending, + const string& escaped_ending, char dir_separator, string* encoded_filename) { + string filename_ending = UrlUtilities::Unescape(escaped_ending); + char encoded[3]; int encoded_len; string segment; @@ -113,22 +115,17 @@ void UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix, for (; index < filename_ending.length(); ++index) { unsigned char ch = static_cast<unsigned char>(filename_ending[index]); - if (ch == dir_separator) { - AppendSegment(dir_separator, &segment, encoded_filename); + // Note: instead of outputing an empty segment, we let the second slash + // be escaped below. + if ((ch == dir_separator) && !segment.empty()) { + AppendSegment(&segment, encoded_filename); encoded_filename->append(1, dir_separator); segment.clear(); } else { - // & is common in URLs and is legal filename syntax, but is also - // a special Unix shell character, so let's avoid making - // filenames with &, as well as ?. It's probably better to - // blow up query-params than it is to make it hard to work with - // the files in shell-scripts. - if ((ch == 0x5F) || (ch == 0x2E) || // underscore period - (ch == 0x25) || (ch == 0x3D) || // percent equals - (ch == 0x2B) || (ch == 0x2D) || // plus dash - ((0x30 <= ch) && (ch <= 0x39)) || // Digits [0-9] - ((0x41 <= ch) && (ch <= 0x5A)) || // Uppercase [A-Z] - ((0x61 <= ch) && (ch <= 0x7A))) { // Lowercase [a-z] + // After removing unsafe chars the only safe ones are _.=+- and alphanums. + if ((ch == '_') || (ch == '.') || (ch == '=') || (ch == '+') || + (ch == '-') || (('0' <= ch) && (ch <= '9')) || + (('A' <= ch) && (ch <= 'Z')) || (('a' <= ch) && (ch <= 'z'))) { encoded[0] = ch; encoded_len = 1; } else { @@ -141,13 +138,9 @@ void UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix, } segment.append(encoded, encoded_len); - // Note: We chop paths into medium sized 'chunks'. - // This is due to filename limits on Windows and Unix. - // The Windows limit appears to be 128 characters, and - // Unix is larger, but not as large as URLs with large - // numbers of query params. + // If segment is too big, we must chop it into chunks. if (segment.size() > kMaximumSubdirectoryLength) { - AppendSegment(dir_separator, &segment, encoded_filename); + AppendSegment(&segment, encoded_filename); encoded_filename->append(1, dir_separator); } } @@ -159,7 +152,7 @@ void UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix, // us over the 128 char limit, then we will need to append "/" and the // remaining chars. segment += kEscapeChar; - AppendSegment(dir_separator, &segment, encoded_filename); + AppendSegment(&segment, encoded_filename); if (!segment.empty()) { // The last overflow segment is special, because we appended in // kEscapeChar above. We won't need to check it again for size @@ -191,6 +184,8 @@ bool UrlToFilenameEncoder::Decode(const string& encoded_filename, case kStart: if (ch == kEscapeChar) { state = kEscape; + } else if (ch == dir_separator) { + decoded_url->append(1, '/'); // URLs only use '/' not '\\' } else { decoded_url->append(1, ch); } @@ -205,9 +200,9 @@ bool UrlToFilenameEncoder::Decode(const string& encoded_filename, decoded_url->append(1, '.'); state = kEscapeDot; // Look for at most one more dot. } else if (ch == dir_separator) { - // Consider url "//x". This will get encoded to "/,/x,". + // Consider url "//x". This was once encoded to "/,/x,". // This code is what skips the first Escape. - decoded_url->append(1, ch); + decoded_url->append(1, '/'); // URLs only use '/' not '\\' state = kStart; } else { return false; @@ -244,7 +239,7 @@ bool UrlToFilenameEncoder::Decode(const string& encoded_filename, return (state == kEscape); } -// Escapes the given input |path| and chop any individual components +// Escape the given input |path| and chop any individual components // of the path which are greater than kMaximumSubdirectoryLength characters // into two chunks. // @@ -295,4 +290,3 @@ string UrlToFilenameEncoder::LegacyEscape(const string& path) { } } // namespace net - diff --git a/net/tools/dump_cache/url_to_filename_encoder.h b/net/tools/dump_cache/url_to_filename_encoder.h index 9e888ce..0646153 100644 --- a/net/tools/dump_cache/url_to_filename_encoder.h +++ b/net/tools/dump_cache/url_to_filename_encoder.h @@ -14,60 +14,64 @@ // with Facebook Connect. // // We need an escape-character for representing characters that are legal -// in URL paths, but not in filenames, such as '?'. Illegal characters -// in Windows are <>:"/\|?*. For reference, see -// http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx +// in URL paths, but not in filenames, such as '?'. // // We can pick any legal character as an escape, as long as we escape it too. // But as we have a goal of having filenames that humans can correlate with // URLs, we should pick one that doesn't show up frequently in URLs. Candidates // are ~`!@#$%^&()-=_+{}[],. but we would prefer to avoid characters that are -// shell escapes, and characters that occur frequently in URLs. +// shell escapes or that various build tools use. // // .#&%-=_+ occur frequently in URLs. -// ~`!$^&(){}[] are special to Unix shells +// <>:"/\|?* are illegal in Windows +// See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx +// ~`!$^&(){}[]'; are special to Unix shells +// In addition, build tools do not like ^@#% // -// @ might seem like a reasonble option, but some build tools don't appreciate -// filenames with @ in testdata. Perforce does not appreciate # in a filename. +// Josh took a quick look at the frequency of some special characters in +// Sadeesh's slurped directory from Fall 09 and found the following occurances: // -// Though a web-site http://www.vias.org/linux-knowhow/lnag_05_05_09.html -// identifies ^ as a special shell character, it did not appear to be an -// issue to use it unquoted as a filename in bash or tcsh. -// -// Here are some frequencies of some special characters in a data set from Fall -// '09. We find only 3 occurences of "x5E" (^ is ascii 0x53): -// ^ 3 build tools don't like ^ in testdata filenames -// @ 10 build tools don't like @ in testdata filenames +// ^ 3 build tool doesn't like ^ in testdata filenames +// @ 10 build tool doesn't like @ in testdata filenames // . 1676 too frequent in URLs // , 76 THE WINNER -// # 0 build tools doesn't like it +// # 0 build tool doesn't like it // & 487 Prefer to avoid shell escapes // % 374 g4 doesn't like it // = 579 very frequent in URLs -- leave unmodified // - 464 very frequent in URLs -- leave unmodified // _ 798 very frequent in URLs -- leave unmodified // -// It is interesting that there were no slurped URLs with #, but I suspect this -// might be due to the slurping methdology. So let's stick with the relatively -// rare ','. // -// Here's the escaping methodology: +// The escaping algorithm is: +// 1) Escape all unfriendly symbols as ,XX where XX is the hex code. +// 2) Add a ',' at the end (We do not allow ',' at end of any directory name, +// so this assures that e.g. /a and /a/b can coexist in the filesystem). +// 3) Go through the path segment by segment (where a segment is one directory +// or leaf in the path) and +// 3a) If the segment is empty, escape the second slash. i.e. if it was +// www.foo.com//a then we escape the second / like www.foo.com/,2Fa, +// 3a) If it is "." or ".." prepend with ',' (so that we have a non- +// empty and non-reserved filename). +// 3b) If it is over 128 characters, break it up into smaller segments by +// inserting ,-/ (Windows limits paths to 128 chars, other OSes also +// have limits that would restrict us) // +// For example: // URL File // / /, +// /index.html /index.html, // /. /., -// // /,/, +// /a/b /a/b, +// /a/b/ /a/b/, +// /a/b/c /a/b/c, Note: no prefix problem +// /u?foo=bar /u,3Ffoo=bar, +// // /,2F, // /./ /,./, // /../ /,../, // /, /,2C, -// /,/ /,2C/, -// /a/b /a/b, (, at the end of a name indicates a leaf). -// /a/b/ /a/b/, -// -// path segments greater than 128 characters (after escape expansion) are -// suffixed with ,- so we can know that the next "/" is not part of the URL: -// -// /verylongname/ /verylong,-/name +// /,./ /,2C./, +// /very...longname/ /very...long,-/name If very...long is about 126 long. // NOTE: we avoid using some classes here (like FilePath and GURL) because we // share this code with other projects externally. @@ -88,48 +92,56 @@ namespace net { // Helper class for converting a URL into a filename. class UrlToFilenameEncoder { public: - // Given a |url| and a |base_path|, returns a string which represents this - // |url|. + // Given a |url| and a |base_path|, returns a filename which represents this + // |url|. |url| may include URL escaping such as %21 for ! // |legacy_escape| indicates that this function should use the old-style // of encoding. // TODO(mbelshe): delete the legacy_escape code. static std::string Encode(const std::string& url, std::string base_path, bool legacy_escape) { - std::string clean_url(url); - if (clean_url.length() && clean_url[clean_url.length()-1] == '/') - clean_url.append("index.html"); - - std::string host = UrlUtilities::GetUrlHost(clean_url); - std::string filename(base_path); - filename.append("\\"); - filename = filename.append(host); - filename.append("\\"); + std::string filename; + if (!legacy_escape) { + std::string url_no_scheme = UrlUtilities::GetUrlHostPath(url); + EncodeSegment(base_path, url_no_scheme, '/', &filename); +#ifdef WIN32 + ReplaceAll(&filename, "/", "\\"); +#endif + } else { + std::string clean_url(url); + if (clean_url.length() && clean_url[clean_url.length()-1] == '/') + clean_url.append("index.html"); + + std::string host = UrlUtilities::GetUrlHost(clean_url); + filename.append(base_path); + filename.append(host); +#ifdef WIN32 + filename.append("\\"); +#else + filename.append("/"); +#endif - std::string url_filename = UrlUtilities::GetUrlPath(clean_url); - // Strip the leading '/' - if (url_filename[0] == '/') - url_filename = url_filename.substr(1); + std::string url_filename = UrlUtilities::GetUrlPath(clean_url); + // Strip the leading '/'. + if (url_filename[0] == '/') + url_filename = url_filename.substr(1); - // replace '/' with '\' - ConvertToSlashes(&url_filename); + // Replace '/' with '\'. + ConvertToSlashes(&url_filename); - // strip double slashes ("\\") - StripDoubleSlashes(&url_filename); + // Strip double back-slashes ("\\\\"). + StripDoubleSlashes(&url_filename); - // Save path as filesystem-safe characters - if (legacy_escape) { + // Save path as filesystem-safe characters. url_filename = LegacyEscape(url_filename); - } else { - url_filename = Escape(url_filename); - } - filename = filename.append(url_filename); + filename.append(url_filename); #ifndef WIN32 - // Last step - convert to native slashes! - const std::string slash("/"); - const std::string backslash("\\"); - ReplaceAll(&filename, backslash, slash); + // Last step - convert to native slashes. + const std::string slash("/"); + const std::string backslash("\\"); + ReplaceAll(&filename, backslash, slash); #endif + } return filename; } @@ -137,12 +149,13 @@ class UrlToFilenameEncoder { // Rewrite HTML in a form that the SPDY in-memory server // can read. // |filename_prefix| is prepended without escaping. - // |filename_ending| is the URL to be encoded into a filename. + // |escaped_ending| is the URL to be encoded into a filename. It may have URL + // escaped characters (like %21 for !). // |dir_separator| is "/" on Unix, "\" on Windows. // |encoded_filename| is the resultant filename. static void EncodeSegment( const std::string& filename_prefix, - const std::string& filename_ending, + const std::string& escaped_ending, char dir_separator, std::string* encoded_filename); @@ -152,34 +165,28 @@ class UrlToFilenameEncoder { char dir_separator, std::string* decoded_url); + static const char kEscapeChar; + static const char kTruncationChar; + static const size_t kMaximumSubdirectoryLength; + + friend class UrlToFilenameEncoderTest; + private: - // Appends a segment of the path, special-casing ".", "..", and "", and + // Appends a segment of the path, special-casing "." and "..", and // ensuring that the segment does not exceed the path length. If it does, // it chops the end off the segment, writes the segment with a separator of // ",-/", and then rewrites segment to contain just the truncated piece so // it can be used in the next iteration. - // |dir_separator| is "/" on Unix, "\" on Windows. // |segment| is a read/write parameter containing segment to write - static void AppendSegment( - char dir_separator, - std::string* segment, - std::string* dest); - - // Escapes the given input |path| and chop any individual components - // of the path which are greater than kMaximumSubdirectoryLength characters - // into two chunks. - static std::string Escape(const std::string& path) { - std::string output; - EncodeSegment("", path, '\\', &output); - return output; - } + // Note: this should not be called with empty segment. + static void AppendSegment(std::string* segment, std::string* dest); // Allow reading of old slurped files. static std::string LegacyEscape(const std::string& path); // Replace all instances of |from| within |str| as |to|. static void ReplaceAll(std::string* str, const std::string& from, - const std::string& to) { + const std::string& to) { std::string::size_type pos(0); while ((pos = str->find(from, pos)) != std::string::npos) { str->replace(pos, from.size(), to); @@ -205,4 +212,3 @@ class UrlToFilenameEncoder { } // namespace net #endif // NET_TOOLS_DUMP_CACHE_URL_TO_FILE_ENCODER_H_ - diff --git a/net/tools/dump_cache/url_to_filename_encoder_unittest.cc b/net/tools/dump_cache/url_to_filename_encoder_unittest.cc index 32cef99..de0992e 100644 --- a/net/tools/dump_cache/url_to_filename_encoder_unittest.cc +++ b/net/tools/dump_cache/url_to_filename_encoder_unittest.cc @@ -15,37 +15,39 @@ using std::string; namespace net { -// The escape character choice is made here -- all code and tests in this -// directory are based off of this constant. However, our test ata -// has tons of dependencies on this, so it cannot be changed without -// re-running those tests and fixing them. -const char kTruncationChar = '-'; -const char kEscapeChar = ','; -const size_t kMaximumSubdirectoryLength = 128; +#ifdef WIN32 +char kDirSeparator = '\\'; +char kOtherDirSeparator = '/'; +#else +char kDirSeparator = '/'; +char kOtherDirSeparator = '\\'; +#endif class UrlToFilenameEncoderTest : public ::testing::Test { protected: - UrlToFilenameEncoderTest() : escape_(1, kEscapeChar) {} + UrlToFilenameEncoderTest() : escape_(1, UrlToFilenameEncoder::kEscapeChar), + dir_sep_(1, kDirSeparator) { + } void CheckSegmentLength(const StringPiece& escaped_word) { std::vector<StringPiece> components; Tokenize(escaped_word, StringPiece("/"), &components); for (size_t i = 0; i < components.size(); ++i) { - EXPECT_GE(kMaximumSubdirectoryLength, + EXPECT_GE(UrlToFilenameEncoder::kMaximumSubdirectoryLength, components[i].size()); } } - void CheckValidChars(const StringPiece& escaped_word) { - // These characters are invalid in Windows. We will - // ignore / for this test, but add in ', as that's pretty + void CheckValidChars(const StringPiece& escaped_word, char invalid_slash) { + // These characters are invalid in Windows. We add in ', as that's pretty // inconvenient in a Unix filename. // // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx - static const char kInvalidChars[] = "<>:\"\\|?*'"; + const string kInvalidChars = "<>:\"|?*'"; for (size_t i = 0; i < escaped_word.size(); ++i) { char c = escaped_word[i]; - EXPECT_EQ(NULL, strchr(kInvalidChars, c)); + EXPECT_EQ(string::npos, kInvalidChars.find(c)); + EXPECT_NE(invalid_slash, c); EXPECT_NE('\0', c); // only invalid character in Posix EXPECT_GT(0x7E, c); // only English printable characters } @@ -56,7 +58,7 @@ class UrlToFilenameEncoderTest : public ::testing::Test { UrlToFilenameEncoder::EncodeSegment("", in_word, '/', &escaped_word); EXPECT_EQ(gold_word, escaped_word); CheckSegmentLength(escaped_word); - CheckValidChars(escaped_word); + CheckValidChars(escaped_word, '\\'); UrlToFilenameEncoder::Decode(escaped_word, '/', &url); EXPECT_EQ(in_word, url); } @@ -65,7 +67,7 @@ class UrlToFilenameEncoderTest : public ::testing::Test { string escaped_word, url; UrlToFilenameEncoder::EncodeSegment("", in_word, '/', &escaped_word); CheckSegmentLength(escaped_word); - CheckValidChars(escaped_word); + CheckValidChars(escaped_word, '\\'); UrlToFilenameEncoder::Decode(escaped_word, '/', &url); EXPECT_EQ(in_word, url); } @@ -78,12 +80,42 @@ class UrlToFilenameEncoderTest : public ::testing::Test { void ValidateEscaped(unsigned char ch) { // We always suffix the leaf with kEscapeChar, unless the leaf is empty. char escaped[100]; - const char escape = kEscapeChar; + const char escape = UrlToFilenameEncoder::kEscapeChar; base::snprintf(escaped, sizeof(escaped), "%c%02X%c", escape, ch, escape); Validate(string(1, ch), escaped); } + void ValidateUrl(const string& url, const string& base_path, + bool legacy_escape, const string& gold_filename) { + string encoded_filename = UrlToFilenameEncoder::Encode( + url, base_path, legacy_escape); + EXPECT_EQ(gold_filename, encoded_filename); + if (!legacy_escape) { + CheckSegmentLength(encoded_filename); + CheckValidChars(encoded_filename, kOtherDirSeparator); + string decoded_url; + UrlToFilenameEncoder::Decode(encoded_filename, kDirSeparator, + &decoded_url); + if (url != decoded_url) { + EXPECT_EQ(url, "http://" + decoded_url); + } + } + } + + void ValidateUrlOldNew(const string& url, const string& gold_old_filename, + const string& gold_new_filename) { + ValidateUrl(url, "", true, gold_old_filename); + ValidateUrl(url, "", false, gold_new_filename); + } + + void ValidateEncodeSame(const string& url1, const string& url2) { + string filename1 = UrlToFilenameEncoder::Encode(url1, "", false); + string filename2 = UrlToFilenameEncoder::Encode(url2, "", false); + EXPECT_EQ(filename1, filename2); + } + string escape_; + string dir_sep_; }; TEST_F(UrlToFilenameEncoderTest, DoesNotEscape) { @@ -93,49 +125,31 @@ TEST_F(UrlToFilenameEncoderTest, DoesNotEscape) { ValidateNoChange("ZYXWVUT"); ValidateNoChange("ZYXWVUTSRQPONMLKJIHGFEDCBA"); ValidateNoChange("01234567689"); - ValidateNoChange("/-_"); + ValidateNoChange("_.=+-"); ValidateNoChange("abcdefghijklmnopqrstuvwxyzZYXWVUTSRQPONMLKJIHGFEDCBA" - "01234567689/-_"); + "01234567689_.=+-"); ValidateNoChange("index.html"); ValidateNoChange("/"); ValidateNoChange("/."); ValidateNoChange("."); ValidateNoChange(".."); - ValidateNoChange("%"); - ValidateNoChange("="); - ValidateNoChange("+"); - ValidateNoChange("_"); } TEST_F(UrlToFilenameEncoderTest, Escapes) { - ValidateEscaped('!'); - ValidateEscaped('"'); - ValidateEscaped('#'); - ValidateEscaped('$'); - ValidateEscaped('&'); - ValidateEscaped('('); - ValidateEscaped(')'); - ValidateEscaped('*'); - ValidateEscaped(','); - ValidateEscaped(':'); - ValidateEscaped(';'); - ValidateEscaped('<'); - ValidateEscaped('>'); - ValidateEscaped('@'); - ValidateEscaped('['); - ValidateEscaped('\''); - ValidateEscaped('\\'); - ValidateEscaped(']'); - ValidateEscaped('^'); - ValidateEscaped('`'); - ValidateEscaped('{'); - ValidateEscaped('|'); - ValidateEscaped('}'); - ValidateEscaped('~'); - - // check non-printable characters + const string bad_chars = + "<>:\"\\|?*" // Illegal on Windows + "~`!$^&(){}[]';" // Bad for Unix shells + "^@" // Build tool doesn't like + "#%" // Tool doesn't like + ","; // The escape char has to be escaped + + for (size_t i = 0; i < bad_chars.size(); ++i) { + ValidateEscaped(bad_chars[i]); + } + + // Check non-printable characters. ValidateEscaped('\0'); - for (int i = 127; i < 256; ++i) { + for (size_t i = 127; i < 256; ++i) { ValidateEscaped(static_cast<char>(i)); } } @@ -144,10 +158,10 @@ TEST_F(UrlToFilenameEncoderTest, DoesEscapeCorrectly) { Validate("mysite.com&x", "mysite.com" + escape_ + "26x" + escape_); Validate("/./", "/" + escape_ + "./" + escape_); Validate("/../", "/" + escape_ + "../" + escape_); - Validate("//", "/" + escape_ + "/" + escape_); + Validate("//", "/" + escape_ + "2F" + escape_); Validate("/./leaf", "/" + escape_ + "./leaf" + escape_); Validate("/../leaf", "/" + escape_ + "../leaf" + escape_); - Validate("//leaf", "/" + escape_ + "/leaf" + escape_); + Validate("//leaf", "/" + escape_ + "2Fleaf" + escape_); Validate("mysite/u?param1=x¶m2=y", "mysite/u" + escape_ + "3Fparam1=x" + escape_ + "26param2=y" + escape_); @@ -159,6 +173,63 @@ TEST_F(UrlToFilenameEncoderTest, DoesEscapeCorrectly) { "3Fid=138" + escape_ + "26content=true" + escape_); } +TEST_F(UrlToFilenameEncoderTest, EncodeUrlCorrectly) { + ValidateUrlOldNew("http://www.google.com/index.html", + "www.google.com" + dir_sep_ + "indexx2Ehtml", + "www.google.com" + dir_sep_ + "index.html" + escape_); + ValidateUrlOldNew("http://www.google.com/x/search?hl=en&q=dogs&oq=", + "www.google.com" + dir_sep_ + "x" + dir_sep_ + + "searchx3Fhlx3Denx26qx3Ddogsx26oqx3D", + + "www.google.com" + dir_sep_ + "x" + dir_sep_ + "search" + + escape_ + "3Fhl=en" + escape_ + "26q=dogs" + escape_ + + "26oq=" + escape_); + ValidateUrlOldNew("http://www.foo.com/a//", + "www.foo.com" + dir_sep_ + "ax255Cx255Cindexx2Ehtml", + "www.foo.com" + dir_sep_ + "a" + dir_sep_ + escape_ + "2F" + + escape_); + + // From bug: Double slash preserved. + ValidateUrl("http://www.foo.com/u?site=http://www.google.com/index.html", + "", false, + "www.foo.com" + dir_sep_ + "u" + escape_ + "3Fsite=http" + + escape_ + "3A" + dir_sep_ + escape_ + "2Fwww.google.com" + + dir_sep_ + "index.html" + escape_); + ValidateUrlOldNew( + "http://blogutils.net/olct/online.php?" + "site=http://thelwordfanfics.blogspot.&interval=600", + + "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "onlinex2Ephpx3F" + "sitex3Dhttpx3Ax255Cx255Cthelwordfanficsx2Eblogspotx2Ex26intervalx3D600", + + "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "online.php" + escape_ + + "3Fsite=http" + escape_ + "3A" + dir_sep_ + escape_ + + "2Fthelwordfanfics.blogspot." + escape_ + "26interval=600" + escape_); +} + +// From bug: Escapes treated the same as normal char. +TEST_F(UrlToFilenameEncoderTest, UnescapeUrlsBeforeEncode) { + for (int i = 0; i < 128; ++i) { + string unescaped(1, static_cast<char>(i)); + string escaped = StringPrintf("%%%02X", i); + ValidateEncodeSame(unescaped, escaped); + } + + ValidateEncodeSame( + "http://www.blogger.com/navbar.g?bName=God!&Mode=FOO&searchRoot" + "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch", + + "http://www.blogger.com/navbar.g?bName=God%21&Mode=FOO&searchRoot" + "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch"); +} + +// From bug: Filename encoding is not prefix-free. +TEST_F(UrlToFilenameEncoderTest, EscapeSecondSlash) { + Validate("/", "/" + escape_); + Validate("//", "/" + escape_ + "2F" + escape_); + Validate("///", "/" + escape_ + "2F" + "/" + escape_); +} + TEST_F(UrlToFilenameEncoderTest, LongTail) { static char long_word[] = "~joebob/briggs/12345678901234567890123456789012345678901234567890" @@ -182,7 +253,7 @@ TEST_F(UrlToFilenameEncoderTest, LongTail) { "78901234567890123456789012345678901234567890123456789012345678" + escape_ + "-/" "9012345678901234567890" + escape_; - EXPECT_LT(kMaximumSubdirectoryLength, + EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, sizeof(long_word)); Validate(long_word, gold_long_word); } @@ -216,7 +287,7 @@ TEST_F(UrlToFilenameEncoderTest, LongTailQuestion) { + pattern + "1234567" + escape_ + "-/" + escape_ + "3F" + pattern + pattern + escape_; - EXPECT_LT(kMaximumSubdirectoryLength, + EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, sizeof(long_word)); Validate(long_word, gold_long_word); } @@ -225,7 +296,7 @@ TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenNoEscape) { // hit corner cases, +/- 4 characters from kMaxLen for (int i = -4; i <= 4; ++i) { string input; - input.append(i + kMaximumSubdirectoryLength, 'x'); + input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength, 'x'); ValidateAllSegmentsSmall(input); } } @@ -236,7 +307,7 @@ TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenWithEscape) { // are truncating with '/' *after* the expansion. for (int i = -4; i <= 4; ++i) { string input; - input.append(i + kMaximumSubdirectoryLength - 1, 'x'); + input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength - 1, 'x'); input.append(1, '.'); // this will expand to 3 characters. ValidateAllSegmentsSmall(input); } @@ -252,17 +323,17 @@ TEST_F(UrlToFilenameEncoderTest, LeafBranchAlias) { TEST_F(UrlToFilenameEncoderTest, BackslashSeparator) { string long_word; string escaped_word; - long_word.append(kMaximumSubdirectoryLength + 1, 'x'); + long_word.append(UrlToFilenameEncoder::kMaximumSubdirectoryLength + 1, 'x'); UrlToFilenameEncoder::EncodeSegment("", long_word, '\\', &escaped_word); // check that one backslash, plus the escape ",-", and the ending , got added. EXPECT_EQ(long_word.size() + 4, escaped_word.size()); - ASSERT_LT(kMaximumSubdirectoryLength, + ASSERT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, escaped_word.size()); // Check that the backslash got inserted at the correct spot. EXPECT_EQ('\\', escaped_word[ - kMaximumSubdirectoryLength]); + UrlToFilenameEncoder::kMaximumSubdirectoryLength]); } -} // namespace +} // namespace net diff --git a/net/tools/dump_cache/url_utilities.cc b/net/tools/dump_cache/url_utilities.cc new file mode 100644 index 0000000..fe64bd9 --- /dev/null +++ b/net/tools/dump_cache/url_utilities.cc @@ -0,0 +1,126 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/tools/dump_cache/url_utilities.h" + +#include "base/logging.h" +#include "base/string_number_conversions.h" +#include "base/string_util.h" + +namespace net { + +std::string UrlUtilities::GetUrlHost(const std::string& url) { + size_t b = url.find("//"); + if (b == std::string::npos) + b = 0; + else + b += 2; + size_t next_slash = url.find_first_of('/', b); + size_t next_colon = url.find_first_of(':', b); + if (next_slash != std::string::npos + && next_colon != std::string::npos + && next_colon < next_slash) { + return std::string(url, b, next_colon - b); + } + if (next_slash == std::string::npos) { + if (next_colon != std::string::npos) { + return std::string(url, b, next_colon - b); + } else { + next_slash = url.size(); + } + } + return std::string(url, b, next_slash - b); +} + +std::string UrlUtilities::GetUrlHostPath(const std::string& url) { + size_t b = url.find("//"); + if (b == std::string::npos) + b = 0; + else + b += 2; + return std::string(url, b); +} + +std::string UrlUtilities::GetUrlPath(const std::string& url) { + size_t b = url.find("//"); + if (b == std::string::npos) + b = 0; + else + b += 2; + b = url.find("/", b); + if (b == std::string::npos) + return "/"; + + size_t e = url.find("#", b+1); + if (e != std::string::npos) + return std::string(url, b, (e - b)); + return std::string(url, b); +} + +namespace { + +// Parsing states for UrlUtilities::Unescape +enum UnescapeState { + NORMAL, // We are not in the middle of parsing an escape. + ESCAPE1, // We just parsed % . + ESCAPE2 // We just parsed %X for some hex digit X. +}; + +} // namespace + +std::string UrlUtilities::Unescape(const std::string& escaped_url) { + std::string unescaped_url, escape_text; + int escape_value; + UnescapeState state = NORMAL; + std::string::const_iterator iter = escaped_url.begin(); + while (iter < escaped_url.end()) { + char c = *iter; + switch (state) { + case NORMAL: + if (c == '%') { + escape_text.clear(); + state = ESCAPE1; + } else { + unescaped_url.push_back(c); + } + ++iter; + break; + case ESCAPE1: + if (IsHexDigit(c)) { + escape_text.push_back(c); + state = ESCAPE2; + ++iter; + } else { + // Unexpected, % followed by non-hex chars, pass it through. + unescaped_url.push_back('%'); + state = NORMAL; + } + break; + case ESCAPE2: + if (IsHexDigit(c)) { + escape_text.push_back(c); + bool ok = base::HexStringToInt(escape_text, &escape_value); + DCHECK(ok); + unescaped_url.push_back(static_cast<unsigned char>(escape_value)); + state = NORMAL; + ++iter; + } else { + // Unexpected, % followed by non-hex chars, pass it through. + unescaped_url.push_back('%'); + unescaped_url.append(escape_text); + state = NORMAL; + } + break; + } + } + // Unexpected, % followed by end of string, pass it through. + if (state == ESCAPE1 || state == ESCAPE2) { + unescaped_url.push_back('%'); + unescaped_url.append(escape_text); + } + return unescaped_url; +} + +} // namespace net + diff --git a/net/tools/dump_cache/url_utilities.h b/net/tools/dump_cache/url_utilities.h index 5c1f406..7b926f1 100644 --- a/net/tools/dump_cache/url_utilities.h +++ b/net/tools/dump_cache/url_utilities.h @@ -10,56 +10,27 @@ namespace net { -namespace UrlUtilities { - -// Gets the host from an url, strips the port number as well if the url -// has one. -// For example: calling GetUrlHost(www.foo.com:8080/boo) returns www.foo.com -static std::string GetUrlHost(const std::string& url) { - size_t b = url.find("//"); - if (b == std::string::npos) - b = 0; - else - b += 2; - size_t next_slash = url.find_first_of('/', b); - size_t next_colon = url.find_first_of(':', b); - if (next_slash != std::string::npos - && next_colon != std::string::npos - && next_colon < next_slash) { - return std::string(url, b, next_colon - b); - } - if (next_slash == std::string::npos) { - if (next_colon != std::string::npos) { - return std::string(url, next_colon - b); - } else { - next_slash = url.size(); - } - } - return std::string(url, b, next_slash - b); -} - -// Gets the path portion of an url. -// e.g http://www.foo.com/path -// returns /path -static std::string GetUrlPath(const std::string& url) { - size_t b = url.find("//"); - if (b == std::string::npos) - b = 0; - else - b += 2; - b = url.find("/", b); - if (b == std::string::npos) - return "/"; - - size_t e = url.find("#", b+1); - if (e != std::string::npos) - return std::string(url, b, (e - b)); - return std::string(url, b); -} - -} // namespace UrlUtilities +struct UrlUtilities { + // Gets the host from an url, strips the port number as well if the url + // has one. + // For example: calling GetUrlHost(www.foo.com:8080/boo) returns www.foo.com + static std::string GetUrlHost(const std::string& url); + + // Get the host + path portion of an url + // e.g http://www.foo.com/path + // returns www.foo.com/path + static std::string GetUrlHostPath(const std::string& url); + + // Gets the path portion of an url. + // e.g http://www.foo.com/path + // returns /path + static std::string GetUrlPath(const std::string& url); + + // Unescape a url, converting all %XX to the the actual char 0xXX. + // For example, this will convert "foo%21bar" to "foo!bar". + static std::string Unescape(const std::string& escaped_url); +}; } // namespace net #endif // NET_TOOLS_DUMP_CACHE_URL_UTILITIES_H_ - diff --git a/net/tools/dump_cache/url_utilities_unittest.cc b/net/tools/dump_cache/url_utilities_unittest.cc new file mode 100644 index 0000000..3e3f122 --- /dev/null +++ b/net/tools/dump_cache/url_utilities_unittest.cc @@ -0,0 +1,113 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/tools/dump_cache/url_utilities.h" + +#include <string> + +#include "base/string_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace net { + +TEST(UrlUtilitiesTest, GetUrlHost) { + EXPECT_EQ("www.foo.com", + UrlUtilities::GetUrlHost("http://www.foo.com")); + EXPECT_EQ("www.foo.com", + UrlUtilities::GetUrlHost("http://www.foo.com:80")); + EXPECT_EQ("www.foo.com", + UrlUtilities::GetUrlHost("http://www.foo.com:80/")); + EXPECT_EQ("www.foo.com", + UrlUtilities::GetUrlHost("http://www.foo.com/news")); + EXPECT_EQ("www.foo.com", + UrlUtilities::GetUrlHost("www.foo.com:80/news?q=hello")); + EXPECT_EQ("www.foo.com", + UrlUtilities::GetUrlHost("www.foo.com/news?q=a:b")); + EXPECT_EQ("www.foo.com", + UrlUtilities::GetUrlHost("www.foo.com:80")); +} + +TEST(UrlUtilitiesTest, GetUrlHostPath) { + EXPECT_EQ("www.foo.com", + UrlUtilities::GetUrlHostPath("http://www.foo.com")); + EXPECT_EQ("www.foo.com:80", + UrlUtilities::GetUrlHostPath("http://www.foo.com:80")); + EXPECT_EQ("www.foo.com:80/", + UrlUtilities::GetUrlHostPath("http://www.foo.com:80/")); + EXPECT_EQ("www.foo.com/news", + UrlUtilities::GetUrlHostPath("http://www.foo.com/news")); + EXPECT_EQ("www.foo.com:80/news?q=hello", + UrlUtilities::GetUrlHostPath("www.foo.com:80/news?q=hello")); + EXPECT_EQ("www.foo.com/news?q=a:b", + UrlUtilities::GetUrlHostPath("www.foo.com/news?q=a:b")); + EXPECT_EQ("www.foo.com:80", + UrlUtilities::GetUrlHostPath("www.foo.com:80")); +} + +TEST(UrlUtilitiesTest, GetUrlPath) { + EXPECT_EQ("/", + UrlUtilities::GetUrlPath("http://www.foo.com")); + EXPECT_EQ("/", + UrlUtilities::GetUrlPath("http://www.foo.com:80")); + EXPECT_EQ("/", + UrlUtilities::GetUrlPath("http://www.foo.com:80/")); + EXPECT_EQ("/news", + UrlUtilities::GetUrlPath("http://www.foo.com/news")); + EXPECT_EQ("/news?q=hello", + UrlUtilities::GetUrlPath("www.foo.com:80/news?q=hello")); + EXPECT_EQ("/news?q=a:b", + UrlUtilities::GetUrlPath("www.foo.com/news?q=a:b")); + EXPECT_EQ("/", + UrlUtilities::GetUrlPath("www.foo.com:80")); +} + +TEST(UrlUtilitiesTest, Unescape) { + // Basic examples are left alone. + EXPECT_EQ("http://www.foo.com", + UrlUtilities::Unescape("http://www.foo.com")); + EXPECT_EQ("www.foo.com:80/news?q=hello", + UrlUtilities::Unescape("www.foo.com:80/news?q=hello")); + + // All chars can be unescaped. + EXPECT_EQ("~`!@#$%^&*()_-+={[}]|\\:;\"'<,>.?/", + UrlUtilities::Unescape("%7E%60%21%40%23%24%25%5E%26%2A%28%29%5F%2D" + "%2B%3D%7B%5B%7D%5D%7C%5C%3A%3B%22%27%3C%2C" + "%3E%2E%3F%2F")); + for (int c = 0; c < 256; ++c) { + std::string unescaped_char(1, implicit_cast<unsigned char>(c)); + std::string escaped_char = StringPrintf("%%%02X", c); + EXPECT_EQ(unescaped_char, UrlUtilities::Unescape(escaped_char)) + << "escaped_char = " << escaped_char; + escaped_char = StringPrintf("%%%02x", c); + EXPECT_EQ(unescaped_char, UrlUtilities::Unescape(escaped_char)) + << "escaped_char = " << escaped_char; + } + + // All non-% chars are left alone. + EXPECT_EQ("~`!@#$^&*()_-+={[}]|\\:;\"'<,>.?/", + UrlUtilities::Unescape("~`!@#$^&*()_-+={[}]|\\:;\"'<,>.?/")); + for (int c = 0; c < 256; ++c) { + if (c != '%') { + std::string just_char(1, implicit_cast<unsigned char>(c)); + EXPECT_EQ(just_char, UrlUtilities::Unescape(just_char)); + } + } + + // Some examples to unescape. + EXPECT_EQ("Hello, world!", UrlUtilities::Unescape("Hello%2C world%21")); + + // Not actually escapes. + EXPECT_EQ("%", UrlUtilities::Unescape("%")); + EXPECT_EQ("%www", UrlUtilities::Unescape("%www")); + EXPECT_EQ("%foo", UrlUtilities::Unescape("%foo")); + EXPECT_EQ("%1", UrlUtilities::Unescape("%1")); + EXPECT_EQ("%1x", UrlUtilities::Unescape("%1x")); + EXPECT_EQ("%%", UrlUtilities::Unescape("%%")); + // Escapes following non-escapes. + EXPECT_EQ("%!", UrlUtilities::Unescape("%%21")); + EXPECT_EQ("%2!", UrlUtilities::Unescape("%2%21")); +} + +} // namespace net + diff --git a/net/tools/flip_server/flip_in_mem_edsm_server.cc b/net/tools/flip_server/flip_in_mem_edsm_server.cc index 974994d..6a1da08 100644 --- a/net/tools/flip_server/flip_in_mem_edsm_server.cc +++ b/net/tools/flip_server/flip_in_mem_edsm_server.cc @@ -23,6 +23,8 @@ #include "net/spdy/spdy_frame_builder.h" #include "net/spdy/spdy_framer.h" #include "net/spdy/spdy_protocol.h" +#include "net/tools/dump_cache/url_to_filename_encoder.h" +#include "net/tools/dump_cache/url_utilities.h" #include "net/tools/flip_server/balsa_enums.h" #include "net/tools/flip_server/balsa_frame.h" #include "net/tools/flip_server/balsa_headers.h" @@ -34,8 +36,6 @@ #include "net/tools/flip_server/ring_buffer.h" #include "net/tools/flip_server/simple_buffer.h" #include "net/tools/flip_server/split.h" -#include "net/tools/flip_server/url_to_filename_encoder.h" -#include "net/tools/flip_server/url_utilities.h" //////////////////////////////////////////////////////////////////////////////// @@ -2290,4 +2290,3 @@ int main(int argc, char**argv) { } return 0; } - diff --git a/net/tools/flip_server/url_to_filename_encoder.h b/net/tools/flip_server/url_to_filename_encoder.h deleted file mode 100644 index 429da25..0000000 --- a/net/tools/flip_server/url_to_filename_encoder.h +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef NET_TOOLS_FLIP_SERVER_URL_TO_FILE_ENCODER_H__ -#define NET_TOOLS_FLIP_SERVER_URL_TO_FILE_ENCODER_H__ -#pragma once - -#include <string> -#include "net/tools/flip_server/url_utilities.h" - -namespace net { - -// Helper class for converting a URL into a filename. -class UrlToFilenameEncoder { - public: - // Given a |url| and a |base_path|, returns a string which represents this - // |url|. - static std::string Encode(const std::string& url, std::string base_path) { - std::string clean_url(url); - if (clean_url.length() && clean_url[clean_url.length()-1] == '/') - clean_url.append("index.html"); - - std::string host = UrlUtilities::GetUrlHost(clean_url); - std::string filename(base_path); - filename = filename.append(host + "/"); - - std::string url_filename = UrlUtilities::GetUrlPath(clean_url); - // Strip the leading '/' - if (url_filename[0] == '/') - url_filename = url_filename.substr(1); - - // replace '/' with '\' - ConvertToSlashes(url_filename); - - // strip double slashes ("\\") - StripDoubleSlashes(url_filename); - - // Save path as filesystem-safe characters - url_filename = Escape(url_filename); - filename = filename.append(url_filename); - -#ifndef WIN32 - // Last step - convert to native slashes! - const std::string slash("/"); - const std::string backslash("\\"); - ReplaceAll(filename, backslash, slash); -#endif - - return filename; - } - - private: - static const unsigned int kMaximumSubdirectoryLength = 128; - - - // Escape the given input |path| and chop any individual components - // of the path which are greater than kMaximumSubdirectoryLength characters - // into two chunks. - static std::string Escape(const std::string& path) { - std::string output; - - // Note: We also chop paths into medium sized 'chunks'. - // This is due to the incompetence of the windows - // filesystem, which still hasn't figured out how - // to deal with long filenames. - unsigned int last_slash = 0; - for (size_t index = 0; index < path.length(); index++) { - char ch = path[index]; - if (ch == 0x5C) - last_slash = index; - if ((ch == 0x2D) || // hyphen - (ch == 0x5C) || (ch == 0x5F) || // backslash, underscore - ((0x30 <= ch) && (ch <= 0x39)) || // Digits [0-9] - ((0x41 <= ch) && (ch <= 0x5A)) || // Uppercase [A-Z] - ((0x61 <= ch) && (ch <= 0x7A))) { // Lowercase [a-z] - output.append(&path[index],1); - } else { - char encoded[3]; - encoded[0] = 'x'; - encoded[1] = ch / 16; - encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0'; - encoded[2] = ch % 16; - encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0'; - output.append(encoded, 3); - } - if (index - last_slash > kMaximumSubdirectoryLength) { -#ifdef WIN32 - char slash = '\\'; -#else - char slash = '/'; -#endif - output.append(&slash, 1); - last_slash = index; - } - } - return output; - } - - // Replace all instances of |from| within |str| as |to|. - static void ReplaceAll(std::string& str, const std::string& from, - const std::string& to) { - std::string::size_type pos(0); - while ((pos = str.find(from, pos)) != std::string::npos) { - str.replace(pos, from.size(), to); - pos += from.size(); - } - } - - // Replace all instances of "/" with "\" in |path|. - static void ConvertToSlashes(std::string& path) { - const std::string slash("/"); - const std::string backslash("\\"); - ReplaceAll(path, slash, backslash); - } - - // Replace all instances of "\\" with "%5C%5C" in |path|. - static void StripDoubleSlashes(std::string& path) { - const std::string doubleslash("\\\\"); - const std::string escaped_doubleslash("%5C%5C"); - ReplaceAll(path, doubleslash, escaped_doubleslash); - } -}; - -} // namespace net - -#endif // NET_TOOLS_FLIP_SERVER_URL_TO_FILE_ENCODER_H__ - diff --git a/net/tools/flip_server/url_utilities.h b/net/tools/flip_server/url_utilities.h deleted file mode 100644 index 39d0372..0000000 --- a/net/tools/flip_server/url_utilities.h +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef NET_TOOLS_FLIP_SERVER_URL_UTILITIES_H__ -#define NET_TOOLS_FLIP_SERVER_URL_UTILITIES_H__ -#pragma once - -#include <string> - -namespace net { - -struct UrlUtilities { - // Get the host from an url - static std::string GetUrlHost(const std::string& url) { - size_t b = url.find("//"); - if (b == std::string::npos) - b = 0; - else - b += 2; - size_t next_slash = url.find_first_of('/', b); - size_t next_colon = url.find_first_of(':', b); - if (next_slash != std::string::npos - && next_colon != std::string::npos - && next_colon < next_slash) { - return std::string(url, b, next_colon - b); - } - if (next_slash == std::string::npos) { - if (next_colon != std::string::npos) { - return std::string(url, next_colon - b); - } else { - next_slash = url.size(); - } - } - return std::string(url, b, next_slash - b); - } - - // Get the host + path portion of an url - // e.g http://www.foo.com/path - // returns www.foo.com/path - static std::string GetUrlHostPath(const std::string& url) { - size_t b = url.find("//"); - if (b == std::string::npos) - b = 0; - else - b += 2; - return std::string(url, b); - } - - // Get the path portion of an url - // e.g http://www.foo.com/path - // returns /path - static std::string GetUrlPath(const std::string& url) { - size_t b = url.find("//"); - if (b == std::string::npos) - b = 0; - else - b += 2; - b = url.find("/", b+1); - if (b == std::string::npos) - return "/"; - - return std::string(url, b); - } -}; - -} // namespace net - -#endif // NET_TOOLS_FLIP_SERVER_URL_UTILITIES_H__ - |