diff options
author | michaeln@google.com <michaeln@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-08-06 21:35:45 +0000 |
---|---|---|
committer | michaeln@google.com <michaeln@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-08-06 21:35:45 +0000 |
commit | 9b26746efa2127fe8ec019e02970a69db17e5115 (patch) | |
tree | dd051086101521a297130a9fdba7a1838467a376 /webkit/appcache | |
parent | 1f74cfc185bb4d47532994a206a901bc708d3ff6 (diff) | |
download | chromium_src-9b26746efa2127fe8ec019e02970a69db17e5115.zip chromium_src-9b26746efa2127fe8ec019e02970a69db17e5115.tar.gz chromium_src-9b26746efa2127fe8ec019e02970a69db17e5115.tar.bz2 |
Port of WebKit's appcache manifest parser code.
Added unittests for manifest parser to test_shell_tests.
This is a clone of jennb's CL here.
http://codereview.chromium.org/160608
TBR=jennb
BUG=none
TEST=manifest_parser_unittest.cc
Review URL: http://codereview.chromium.org/165072
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@22673 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'webkit/appcache')
-rw-r--r-- | webkit/appcache/manifest_parser.cc | 337 | ||||
-rw-r--r-- | webkit/appcache/manifest_parser.h | 33 | ||||
-rw-r--r-- | webkit/appcache/manifest_parser_unittest.cc | 294 |
3 files changed, 501 insertions, 163 deletions
diff --git a/webkit/appcache/manifest_parser.cc b/webkit/appcache/manifest_parser.cc index ef0f96d..b5e8cb8 100644 --- a/webkit/appcache/manifest_parser.cc +++ b/webkit/appcache/manifest_parser.cc @@ -29,156 +29,197 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "config.h" -#include "ManifestParser.h" - -#if ENABLE(OFFLINE_WEB_APPLICATIONS) - -#include "CharacterNames.h" -#include "KURL.h" -#include "TextResourceDecoder.h" - -using namespace std; - -namespace WebCore { - -enum Mode { Explicit, Fallback, OnlineWhitelist, Unknown }; - -bool parseManifest(const KURL& manifestURL, const char* data, int length, Manifest& manifest) -{ - ASSERT(manifest.explicitURLs.isEmpty()); - ASSERT(manifest.onlineWhitelistedURLs.isEmpty()); - ASSERT(manifest.fallbackURLs.isEmpty()); - - Mode mode = Explicit; - - RefPtr<TextResourceDecoder> decoder = TextResourceDecoder::create("text/cache-manifest", "UTF-8"); - String s = decoder->decode(data, length); - s += decoder->flush(); - - // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" (the BOM is removed by TextResourceDecoder). - // Example: "CACHE MANIFEST #comment" is a valid signature. - // Example: "CACHE MANIFEST;V2" is not. - if (!s.startsWith("CACHE MANIFEST")) - return false; - - const UChar* end = s.characters() + s.length(); - const UChar* p = s.characters() + 14; // "CACHE MANIFEST" is 14 characters. - - if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r') - return false; - - // Skip to the end of the line. +#include "manifest_parser.h" + +#include "base/logging.h" +#include "base/string_util.h" +#include "googleurl/src/gurl.h" + +namespace appcache { + +enum Mode { + kExplicit, + kFallback, + kOnlineWhitelist, + kUnknown, +}; + +bool ParseManifest(const GURL& manifest_url, const char* data, int length, + Manifest& manifest) { + static const std::wstring kSignature(L"CACHE MANIFEST"); + + DCHECK(manifest.explicit_urls.empty()); + DCHECK(manifest.online_whitelisted_urls.empty()); + DCHECK(manifest.fallback_urls.empty()); + + Mode mode = kExplicit; + + std::wstring data_string; + // TODO(jennb): cannot do UTF8ToWide(data, length, &data_string); + // until UTF8ToWide uses 0xFFFD Unicode replacement character. + CodepageToWide(std::string(data, length), "UTF-8", + OnStringUtilConversionError::SUBSTITUTE, &data_string); + const wchar_t* p = data_string.c_str(); + const wchar_t* end = p + data_string.length(); + + // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" + // Example: "CACHE MANIFEST #comment" is a valid signature. + // Example: "CACHE MANIFEST;V2" is not. + + // When the input data starts with a UTF-8 Byte-Order-Mark + // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a + // Unicode BOM (U+FEFF). Skip a converted Unicode BOM if it exists. + int bom_offset = 0; + if (!data_string.empty() && data_string[0] == 0xFEFF) { + bom_offset = 1; + ++p; + } + + if (p >= end || + data_string.compare(bom_offset, kSignature.length(), kSignature)) { + return false; + } + + p += kSignature.length(); // Skip past "CACHE MANIFEST" + + // Character after "CACHE MANIFEST" must be whitespace. + if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r') + return false; + + // Skip to the end of the line. + while (p < end && *p != '\r' && *p != '\n') + ++p; + + while (1) { + // Skip whitespace + while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t')) + ++p; + + if (p == end) + break; + + const wchar_t* line_start = p; + + // Find the end of the line while (p < end && *p != '\r' && *p != '\n') - p++; - - while (1) { - // Skip whitespace - while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t')) - p++; - - if (p == end) - break; - - const UChar* lineStart = p; - - // Find the end of the line - while (p < end && *p != '\r' && *p != '\n') - p++; - - // Check if we have a comment - if (*lineStart == '#') - continue; - - // Get rid of trailing whitespace - const UChar* tmp = p - 1; - while (tmp > lineStart && (*tmp == ' ' || *tmp == '\t')) - tmp--; - - String line(lineStart, tmp - lineStart + 1); - - if (line == "CACHE:") - mode = Explicit; - else if (line == "FALLBACK:") - mode = Fallback; - else if (line == "NETWORK:") - mode = OnlineWhitelist; - else if (line.endsWith(":")) - mode = Unknown; - else if (mode == Unknown) - continue; - else if (mode == Explicit || mode == OnlineWhitelist) { - const UChar* p = line.characters(); - const UChar* lineEnd = p + line.length(); - - // Look for whitespace separating the URL from subsequent ignored tokens. - while (p < lineEnd && *p != '\t' && *p != ' ') - p++; - - KURL url(manifestURL, String(line.characters(), p - line.characters())); - - if (!url.isValid()) - continue; - - if (url.hasRef()) - url.setRef(String()); - - if (!equalIgnoringCase(url.protocol(), manifestURL.protocol())) - continue; - - if (mode == Explicit) - manifest.explicitURLs.add(url.string()); - else - manifest.onlineWhitelistedURLs.append(url); - - } else if (mode == Fallback) { - const UChar* p = line.characters(); - const UChar* lineEnd = p + line.length(); - - // Look for whitespace separating the two URLs - while (p < lineEnd && *p != '\t' && *p != ' ') - p++; - - if (p == lineEnd) { - // There was no whitespace separating the URLs. - continue; - } - - KURL namespaceURL(manifestURL, String(line.characters(), p - line.characters())); - if (!namespaceURL.isValid()) - continue; - if (namespaceURL.hasRef()) - namespaceURL.setRef(String()); - - if (!protocolHostAndPortAreEqual(manifestURL, namespaceURL)) - continue; - - // Skip whitespace separating fallback namespace from URL. - while (p < lineEnd && (*p == '\t' || *p == ' ')) - p++; - - // Look for whitespace separating the URL from subsequent ignored tokens. - const UChar* fallbackStart = p; - while (p < lineEnd && *p != '\t' && *p != ' ') - p++; - - KURL fallbackURL(manifestURL, String(fallbackStart, p - fallbackStart)); - if (!fallbackURL.isValid()) - continue; - if (fallbackURL.hasRef()) - fallbackURL.setRef(String()); - - if (!protocolHostAndPortAreEqual(manifestURL, fallbackURL)) - continue; - - manifest.fallbackURLs.append(make_pair(namespaceURL, fallbackURL)); - } else - ASSERT_NOT_REACHED(); + ++p; + + // Check if we have a comment + if (*line_start == '#') + continue; + + // Get rid of trailing whitespace + const wchar_t* tmp = p - 1; + while (tmp > line_start && (*tmp == ' ' || *tmp == '\t')) + --tmp; + + std::wstring line(line_start, tmp - line_start + 1); + + if (line == L"CACHE:") { + mode = kExplicit; + } else if (line == L"FALLBACK:") { + mode = kFallback; + } else if (line == L"NETWORK:") { + mode = kOnlineWhitelist; + } else if (*(line.end() - 1) == ':') { + mode = kUnknown; + } else if (mode == kUnknown) { + continue; + } else if (mode == kExplicit || mode == kOnlineWhitelist) { + const wchar_t *line_p = line.c_str(); + const wchar_t *line_end = line_p + line.length(); + + // Look for whitespace separating the URL from subsequent ignored tokens. + while (line_p < line_end && *line_p != '\t' && *p != ' ') + ++line_p; + + string16 url16; + WideToUTF16(line.c_str(), line_p - line.c_str(), &url16); + GURL url = manifest_url.Resolve(url16); + if (!url.is_valid()) + continue; + if (url.has_ref()) { + GURL::Replacements replacements; + replacements.ClearRef(); + url = url.ReplaceComponents(replacements); + } + + // Scheme component must be the same as the manifest URL's. + if (url.scheme() != manifest_url.scheme()) { + continue; + } + + if (mode == kExplicit) { + manifest.explicit_urls.insert(url.spec()); + } else { + manifest.online_whitelisted_urls.push_back(url); + } + } else if (mode == kFallback) { + const wchar_t* line_p = line.c_str(); + const wchar_t* line_end = line_p + line.length(); + + // Look for whitespace separating the two URLs + while (line_p < line_end && *line_p != '\t' && *line_p != ' ') + ++line_p; + + if (line_p == line_end) { + // There was no whitespace separating the URLs. + continue; + } + + string16 namespace_url16; + WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16); + GURL namespace_url = manifest_url.Resolve(namespace_url16); + if (!namespace_url.is_valid()) + continue; + if (namespace_url.has_ref()) { + GURL::Replacements replacements; + replacements.ClearRef(); + namespace_url = namespace_url.ReplaceComponents(replacements); + } + + // Fallback namespace URL must have the same scheme, host and port + // as the manifest's URL. + if (manifest_url.GetOrigin() != namespace_url.GetOrigin()) { + continue; + } + + // Skip whitespace separating fallback namespace from URL. + while (line_p < line_end && (*line_p == '\t' || *line_p == ' ')) + ++line_p; + + // Look for whitespace separating the URL from subsequent ignored tokens. + const wchar_t* fallback_start = line_p; + while (line_p < line_end && *line_p != '\t' && *line_p != ' ') + ++line_p; + + string16 fallback_url16; + WideToUTF16(fallback_start, line_p - fallback_start, &fallback_url16); + GURL fallback_url = manifest_url.Resolve(fallback_url16); + if (!fallback_url.is_valid()) + continue; + if (fallback_url.has_ref()) { + GURL::Replacements replacements; + replacements.ClearRef(); + fallback_url = fallback_url.ReplaceComponents(replacements); + } + + // Fallback entry URL must have the same scheme, host and port + // as the manifest's URL. + if (manifest_url.GetOrigin() != fallback_url.GetOrigin()) { + continue; + } + + // Store regardless of duplicate namespace URL. Only first match + // will ever be used. + manifest.fallback_urls.push_back( + std::make_pair(namespace_url, fallback_url)); + } else { + NOTREACHED(); } + } - return true; -} - + return true; } -#endif // ENABLE(OFFLINE_WEB_APPLICATIONS) +} // namespace appcache diff --git a/webkit/appcache/manifest_parser.h b/webkit/appcache/manifest_parser.h index 5ccd075..3bff98a 100644 --- a/webkit/appcache/manifest_parser.h +++ b/webkit/appcache/manifest_parser.h @@ -29,27 +29,30 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef ManifestParser_h -#define ManifestParser_h +#ifndef WEBKIT_APPCACHE_MANIFEST_PARSER_H_ +#define WEBKIT_APPCACHE_MANIFEST_PARSER_H_ -#if ENABLE(OFFLINE_WEB_APPLICATIONS) +#include <string> +#include <vector> -#include "ApplicationCache.h" +#include "base/hash_tables.h" -namespace WebCore { +class GURL; - class KURL; +namespace appcache { - struct Manifest { - Vector<KURL> onlineWhitelistedURLs; - HashSet<String> explicitURLs; - FallbackURLVector fallbackURLs; - }; +typedef std::vector<std::pair<GURL, GURL> > FallbackUrlVector; - bool parseManifest(const KURL& manifestURL, const char* data, int length, Manifest&); +// TODO(jennb): spec changed since webkit implementation. Update in next CL. +struct Manifest { + std::vector<GURL> online_whitelisted_urls; + base::hash_set<std::string> explicit_urls; + FallbackUrlVector fallback_urls; +}; -} +bool ParseManifest(const GURL& manifest_url, const char* data, int length, + Manifest& manifest); -#endif // ENABLE(OFFLINE_WEB_APPLICATIONS) +} // namespace appcache -#endif // ManifestParser_h +#endif // WEBKIT_APPCACHE_MANIFEST_PARSER_H_ diff --git a/webkit/appcache/manifest_parser_unittest.cc b/webkit/appcache/manifest_parser_unittest.cc new file mode 100644 index 0000000..07721ab --- /dev/null +++ b/webkit/appcache/manifest_parser_unittest.cc @@ -0,0 +1,294 @@ +// Copyright (c) 2009 The Chromium Authos. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <string> + +#include "googleurl/src/gurl.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "webkit/appcache/manifest_parser.h" + +using appcache::FallbackUrlVector; +using appcache::Manifest; +using appcache::ParseManifest; + +namespace { + +class ManifestParserTest : public testing::Test { +}; + +} // namespace + +TEST(ManifestParserTest, NoData) { + GURL url; + Manifest manifest; + EXPECT_FALSE(ParseManifest(url, "", 0, manifest)); + EXPECT_FALSE(ParseManifest(url, "CACHE MANIFEST\r", 0, manifest)); // 0 len +} + +TEST(ManifestParserTest, CheckSignature) { + GURL url; + Manifest manifest; + + const std::string kBadSignatures[] = { + "foo", + "CACHE MANIFEST;V2\r", // not followed by whitespace + "CACHE MANIFEST#bad\r", // no whitespace before comment + "cache manifest ", // wrong case + "#CACHE MANIFEST\r", // comment + "xCACHE MANIFEST\n", // bad first char + " CACHE MANIFEST\r", // begins with whitespace + "\xEF\xBE\xBF" "CACHE MANIFEST\r", // bad UTF-8 BOM value + }; + + for (size_t i = 0; i < arraysize(kBadSignatures); ++i) { + const std::string bad = kBadSignatures[i]; + EXPECT_FALSE(ParseManifest(url, bad.c_str(), bad.length(), manifest)); + } + + const std::string kGoodSignatures[] = { + "CACHE MANIFEST", + "CACHE MANIFEST ", + "CACHE MANIFEST\r", + "CACHE MANIFEST\n", + "CACHE MANIFEST\r\n", + "CACHE MANIFEST\t# ignore me\r", + "CACHE MANIFEST ignore\r\n", + "\xEF\xBB\xBF" "CACHE MANIFEST \r\n", // BOM present + }; + + for (size_t i = 0; i < arraysize(kGoodSignatures); ++i) { + const std::string good = kGoodSignatures[i]; + EXPECT_TRUE(ParseManifest(url, good.c_str(), good.length(), manifest)); + } +} + +TEST(ManifestParserTest, NoManifestUrl) { + Manifest manifest; + const std::string kData("CACHE MANIFEST\r" + "relative/tobase.com\r" + "http://absolute.com/addme.com"); + const GURL kUrl = GURL::EmptyGURL(); + EXPECT_TRUE(ParseManifest(kUrl, kData.c_str(), kData.length(), manifest)); + EXPECT_TRUE(manifest.explicit_urls.empty()); + EXPECT_TRUE(manifest.online_whitelisted_urls.empty()); + EXPECT_TRUE(manifest.fallback_urls.empty()); +} + +TEST(ManifestParserTest, ExplicitUrls) { + Manifest manifest; + const GURL kUrl("http://www.foo.com"); + const std::string kData("CACHE MANIFEST\r" + "relative/one\r" + "# some comment\r" + "http://www.foo.com/two#strip\r\n" + "NETWORK:\r" + " \t CACHE:\r" + "HTTP://www.diff.com/three\r" + "FALLBACK:\r" + " \t # another comment with leading whitespace\n" + "IGNORE:\r" + "http://www.foo.com/ignore\r" + "CACHE: \r" + "garbage:#!@\r" + "https://www.foo.com/diffscheme \t \r" + " \t relative/four#stripme\n\r"); + + EXPECT_TRUE(ParseManifest(kUrl, kData.c_str(), kData.length(), manifest)); + EXPECT_TRUE(manifest.online_whitelisted_urls.empty()); + EXPECT_TRUE(manifest.fallback_urls.empty()); + + base::hash_set<std::string> urls = manifest.explicit_urls; + const size_t kExpected = 4; + ASSERT_EQ(kExpected, urls.size()); + EXPECT_TRUE(urls.find("http://www.foo.com/relative/one") != urls.end()); + EXPECT_TRUE(urls.find("http://www.foo.com/two") != urls.end()); + EXPECT_TRUE(urls.find("http://www.diff.com/three") != urls.end()); + EXPECT_TRUE(urls.find("http://www.foo.com/relative/four") != urls.end()); +} + +TEST(ManifestParserTest, WhitelistUrls) { + Manifest manifest; + const GURL kUrl("http://www.bar.com"); + const std::string kData("CACHE MANIFEST\r" + "NETWORK:\r" + "relative/one\r" + "# a comment\r" + "http://www.bar.com/two\r" + "HTTP://www.diff.com/three#strip\n\r" + "FALLBACK:\r" + "garbage\r" + "UNKNOWN:\r" + "http://www.bar.com/ignore\r" + "CACHE:\r" + "NETWORK:\r" + "https://www.wrongscheme.com\n" + "relative/four#stripref \t \r" + "http://www.five.com\r\n"); + + EXPECT_TRUE(ParseManifest(kUrl, kData.c_str(), kData.length(), manifest)); + EXPECT_TRUE(manifest.explicit_urls.empty()); + EXPECT_TRUE(manifest.fallback_urls.empty()); + + std::vector<GURL> online = manifest.online_whitelisted_urls; + const size_t kExpected = 5; + ASSERT_EQ(kExpected, online.size()); + EXPECT_EQ(GURL("http://www.bar.com/relative/one"), online[0]); + EXPECT_EQ(GURL("http://www.bar.com/two"), online[1]); + EXPECT_EQ(GURL("http://www.diff.com/three"), online[2]); + EXPECT_EQ(GURL("http://www.bar.com/relative/four"), online[3]); + EXPECT_EQ(GURL("http://www.five.com"), online[4]); +} + +TEST(ManifestParserTest, FallbackUrls) { + Manifest manifest; + const GURL kUrl("http://glorp.com"); + const std::string kData("CACHE MANIFEST\r" + "# a comment\r" + "CACHE:\r" + "NETWORK:\r" + "UNKNOWN:\r" + "FALLBACK:\r" + "relative/one \t \t http://glorp.com/onefb \t \r" + "https://glorp.com/wrong http://glorp.com/wrongfb\r" + "http://glorp.com/two#strip relative/twofb\r" + "HTTP://glorp.com/three relative/threefb#strip\n" + "http://glorp.com/three http://glorp.com/three-dup\r" + "http://glorp.com/solo \t \r\n" + "http://diff.com/ignore http://glorp.com/wronghost\r" + "http://glorp.com/wronghost http://diff.com/ohwell\r" + "relative/badscheme ftp://glorp.com/ignored\r" + "garbage\r\n" + "CACHE:\r" + "# only fallback urls in this test\r" + "FALLBACK:\n" + "relative/four#strip relative/fourfb#strip\r" + "http://www.glorp.com/notsame relative/skipped\r"); + + EXPECT_TRUE(ParseManifest(kUrl, kData.c_str(), kData.length(), manifest)); + EXPECT_TRUE(manifest.explicit_urls.empty()); + EXPECT_TRUE(manifest.online_whitelisted_urls.empty()); + + FallbackUrlVector fallbacks = manifest.fallback_urls; + const size_t kExpected = 5; + ASSERT_EQ(kExpected, fallbacks.size()); + EXPECT_EQ(GURL("http://glorp.com/relative/one"), + fallbacks[0].first); + EXPECT_EQ(GURL("http://glorp.com/onefb"), + fallbacks[0].second); + EXPECT_EQ(GURL("http://glorp.com/two"), + fallbacks[1].first); + EXPECT_EQ(GURL("http://glorp.com/relative/twofb"), + fallbacks[1].second); + EXPECT_EQ(GURL("http://glorp.com/three"), + fallbacks[2].first); + EXPECT_EQ(GURL("http://glorp.com/relative/threefb"), + fallbacks[2].second); + EXPECT_EQ(GURL("http://glorp.com/three"), // duplicates are stored + fallbacks[3].first); + EXPECT_EQ(GURL("http://glorp.com/three-dup"), + fallbacks[3].second); + EXPECT_EQ(GURL("http://glorp.com/relative/four"), + fallbacks[4].first); + EXPECT_EQ(GURL("http://glorp.com/relative/fourfb"), + fallbacks[4].second); +} + +TEST(ManifestParserTest, FallbackUrlsWithPort) { + Manifest manifest; + const GURL kUrl("http://www.portme.com:1234"); + const std::string kData("CACHE MANIFEST\r" + "FALLBACK:\r" + "http://www.portme.com:1234/one relative/onefb\r" + "HTTP://www.portme.com:9876/wrong http://www.portme.com:1234/ignore\r" + "http://www.portme.com:1234/stillwrong http://www.portme.com:42/boo\r" + "relative/two relative/twofb\r" + "http://www.portme.com:1234/three HTTP://www.portme.com:1234/threefb\r" + "http://www.portme.com/noport http://www.portme.com:1234/skipped\r" + "http://www.portme.com:1234/skipme http://www.portme.com/noport\r"); + + EXPECT_TRUE(ParseManifest(kUrl, kData.c_str(), kData.length(), manifest)); + EXPECT_TRUE(manifest.explicit_urls.empty()); + EXPECT_TRUE(manifest.online_whitelisted_urls.empty()); + + FallbackUrlVector fallbacks = manifest.fallback_urls; + const size_t kExpected = 3; + ASSERT_EQ(kExpected, fallbacks.size()); + EXPECT_EQ(GURL("http://www.portme.com:1234/one"), + fallbacks[0].first); + EXPECT_EQ(GURL("http://www.portme.com:1234/relative/onefb"), + fallbacks[0].second); + EXPECT_EQ(GURL("http://www.portme.com:1234/relative/two"), + fallbacks[1].first); + EXPECT_EQ(GURL("http://www.portme.com:1234/relative/twofb"), + fallbacks[1].second); + EXPECT_EQ(GURL("http://www.portme.com:1234/three"), + fallbacks[2].first); + EXPECT_EQ(GURL("http://www.portme.com:1234/threefb"), + fallbacks[2].second); +} + +TEST(ManifestParserTest, ComboUrls) { + Manifest manifest; + const GURL kUrl("http://combo.com:42"); + const std::string kData("CACHE MANIFEST\r" + "relative/explicit-1\r" + "# some comment\r" + "http://combo.com:99/explicit-2#strip\r" + "NETWORK:\r" + "http://combo.com/whitelist-1\r" + "HTTP://www.diff.com/whitelist-2#strip\r" + "CACHE:\n\r" + "http://www.diff.com/explicit-3\r" + "FALLBACK:\r" + "http://combo.com:42/fallback-1 http://combo.com:42/fallback-1b\r" + "relative/fallback-2 relative/fallback-2b\r" + "UNKNOWN:\r\n" + "http://combo.com/ignoreme\r" + "relative/still-ignored\r" + "NETWORK:\r\n" + "relative/whitelist-3#strip\r" + "http://combo.com:99/whitelist-4\r"); + EXPECT_TRUE(ParseManifest(kUrl, kData.c_str(), kData.length(), manifest)); + + base::hash_set<std::string> urls = manifest.explicit_urls; + size_t expected = 3; + ASSERT_EQ(expected, urls.size()); + EXPECT_TRUE(urls.find("http://combo.com:42/relative/explicit-1") != + urls.end()); + EXPECT_TRUE(urls.find("http://combo.com:99/explicit-2") != urls.end()); + EXPECT_TRUE(urls.find("http://www.diff.com/explicit-3") != urls.end()); + + std::vector<GURL> online = manifest.online_whitelisted_urls; + expected = 4; + ASSERT_EQ(expected, online.size()); + EXPECT_EQ(GURL("http://combo.com/whitelist-1"), online[0]); + EXPECT_EQ(GURL("http://www.diff.com/whitelist-2"), online[1]); + EXPECT_EQ(GURL("http://combo.com:42/relative/whitelist-3"), online[2]); + EXPECT_EQ(GURL("http://combo.com:99/whitelist-4"), online[3]); + + FallbackUrlVector fallbacks = manifest.fallback_urls; + expected = 2; + ASSERT_EQ(expected, fallbacks.size()); + EXPECT_EQ(GURL("http://combo.com:42/fallback-1"), + fallbacks[0].first); + EXPECT_EQ(GURL("http://combo.com:42/fallback-1b"), + fallbacks[0].second); + EXPECT_EQ(GURL("http://combo.com:42/relative/fallback-2"), + fallbacks[1].first); + EXPECT_EQ(GURL("http://combo.com:42/relative/fallback-2b"), + fallbacks[1].second); +} + +TEST(ManifestParserTest, UnusualUtf8) { + Manifest manifest; + const GURL kUrl("http://bad.com"); + const std::string kData("CACHE MANIFEST\r" + "\xC0" "invalidutf8\r" + "nonbmp" "\xF1\x84\xAB\xBC\r"); + EXPECT_TRUE(ParseManifest(kUrl, kData.c_str(), kData.length(), manifest)); + + base::hash_set<std::string> urls = manifest.explicit_urls; + EXPECT_TRUE(urls.find("http://bad.com/%EF%BF%BDinvalidutf8") != urls.end()); + EXPECT_TRUE(urls.find("http://bad.com/nonbmp%F1%84%AB%BC") != urls.end()); +} |