summaryrefslogtreecommitdiffstats
path: root/webkit/appcache/manifest_parser.cc
diff options
context:
space:
mode:
authormichaeln@google.com <michaeln@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2009-08-06 21:35:45 +0000
committermichaeln@google.com <michaeln@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2009-08-06 21:35:45 +0000
commit9b26746efa2127fe8ec019e02970a69db17e5115 (patch)
treedd051086101521a297130a9fdba7a1838467a376 /webkit/appcache/manifest_parser.cc
parent1f74cfc185bb4d47532994a206a901bc708d3ff6 (diff)
downloadchromium_src-9b26746efa2127fe8ec019e02970a69db17e5115.zip
chromium_src-9b26746efa2127fe8ec019e02970a69db17e5115.tar.gz
chromium_src-9b26746efa2127fe8ec019e02970a69db17e5115.tar.bz2
Port of WebKit's appcache manifest parser code.
Added unittests for manifest parser to test_shell_tests. This is a clone of jennb's CL here. http://codereview.chromium.org/160608 TBR=jennb BUG=none TEST=manifest_parser_unittest.cc Review URL: http://codereview.chromium.org/165072 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@22673 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'webkit/appcache/manifest_parser.cc')
-rw-r--r--webkit/appcache/manifest_parser.cc337
1 files changed, 189 insertions, 148 deletions
diff --git a/webkit/appcache/manifest_parser.cc b/webkit/appcache/manifest_parser.cc
index ef0f96d..b5e8cb8 100644
--- a/webkit/appcache/manifest_parser.cc
+++ b/webkit/appcache/manifest_parser.cc
@@ -29,156 +29,197 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include "config.h"
-#include "ManifestParser.h"
-
-#if ENABLE(OFFLINE_WEB_APPLICATIONS)
-
-#include "CharacterNames.h"
-#include "KURL.h"
-#include "TextResourceDecoder.h"
-
-using namespace std;
-
-namespace WebCore {
-
-enum Mode { Explicit, Fallback, OnlineWhitelist, Unknown };
-
-bool parseManifest(const KURL& manifestURL, const char* data, int length, Manifest& manifest)
-{
- ASSERT(manifest.explicitURLs.isEmpty());
- ASSERT(manifest.onlineWhitelistedURLs.isEmpty());
- ASSERT(manifest.fallbackURLs.isEmpty());
-
- Mode mode = Explicit;
-
- RefPtr<TextResourceDecoder> decoder = TextResourceDecoder::create("text/cache-manifest", "UTF-8");
- String s = decoder->decode(data, length);
- s += decoder->flush();
-
- // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" (the BOM is removed by TextResourceDecoder).
- // Example: "CACHE MANIFEST #comment" is a valid signature.
- // Example: "CACHE MANIFEST;V2" is not.
- if (!s.startsWith("CACHE MANIFEST"))
- return false;
-
- const UChar* end = s.characters() + s.length();
- const UChar* p = s.characters() + 14; // "CACHE MANIFEST" is 14 characters.
-
- if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r')
- return false;
-
- // Skip to the end of the line.
+#include "manifest_parser.h"
+
+#include "base/logging.h"
+#include "base/string_util.h"
+#include "googleurl/src/gurl.h"
+
+namespace appcache {
+
+enum Mode {
+ kExplicit,
+ kFallback,
+ kOnlineWhitelist,
+ kUnknown,
+};
+
+bool ParseManifest(const GURL& manifest_url, const char* data, int length,
+ Manifest& manifest) {
+ static const std::wstring kSignature(L"CACHE MANIFEST");
+
+ DCHECK(manifest.explicit_urls.empty());
+ DCHECK(manifest.online_whitelisted_urls.empty());
+ DCHECK(manifest.fallback_urls.empty());
+
+ Mode mode = kExplicit;
+
+ std::wstring data_string;
+ // TODO(jennb): cannot do UTF8ToWide(data, length, &data_string);
+ // until UTF8ToWide uses 0xFFFD Unicode replacement character.
+ CodepageToWide(std::string(data, length), "UTF-8",
+ OnStringUtilConversionError::SUBSTITUTE, &data_string);
+ const wchar_t* p = data_string.c_str();
+ const wchar_t* end = p + data_string.length();
+
+ // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?"
+ // Example: "CACHE MANIFEST #comment" is a valid signature.
+ // Example: "CACHE MANIFEST;V2" is not.
+
+ // When the input data starts with a UTF-8 Byte-Order-Mark
+ // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a
+ // Unicode BOM (U+FEFF). Skip a converted Unicode BOM if it exists.
+ int bom_offset = 0;
+ if (!data_string.empty() && data_string[0] == 0xFEFF) {
+ bom_offset = 1;
+ ++p;
+ }
+
+ if (p >= end ||
+ data_string.compare(bom_offset, kSignature.length(), kSignature)) {
+ return false;
+ }
+
+ p += kSignature.length(); // Skip past "CACHE MANIFEST"
+
+ // Character after "CACHE MANIFEST" must be whitespace.
+ if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r')
+ return false;
+
+ // Skip to the end of the line.
+ while (p < end && *p != '\r' && *p != '\n')
+ ++p;
+
+ while (1) {
+ // Skip whitespace
+ while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t'))
+ ++p;
+
+ if (p == end)
+ break;
+
+ const wchar_t* line_start = p;
+
+ // Find the end of the line
while (p < end && *p != '\r' && *p != '\n')
- p++;
-
- while (1) {
- // Skip whitespace
- while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t'))
- p++;
-
- if (p == end)
- break;
-
- const UChar* lineStart = p;
-
- // Find the end of the line
- while (p < end && *p != '\r' && *p != '\n')
- p++;
-
- // Check if we have a comment
- if (*lineStart == '#')
- continue;
-
- // Get rid of trailing whitespace
- const UChar* tmp = p - 1;
- while (tmp > lineStart && (*tmp == ' ' || *tmp == '\t'))
- tmp--;
-
- String line(lineStart, tmp - lineStart + 1);
-
- if (line == "CACHE:")
- mode = Explicit;
- else if (line == "FALLBACK:")
- mode = Fallback;
- else if (line == "NETWORK:")
- mode = OnlineWhitelist;
- else if (line.endsWith(":"))
- mode = Unknown;
- else if (mode == Unknown)
- continue;
- else if (mode == Explicit || mode == OnlineWhitelist) {
- const UChar* p = line.characters();
- const UChar* lineEnd = p + line.length();
-
- // Look for whitespace separating the URL from subsequent ignored tokens.
- while (p < lineEnd && *p != '\t' && *p != ' ')
- p++;
-
- KURL url(manifestURL, String(line.characters(), p - line.characters()));
-
- if (!url.isValid())
- continue;
-
- if (url.hasRef())
- url.setRef(String());
-
- if (!equalIgnoringCase(url.protocol(), manifestURL.protocol()))
- continue;
-
- if (mode == Explicit)
- manifest.explicitURLs.add(url.string());
- else
- manifest.onlineWhitelistedURLs.append(url);
-
- } else if (mode == Fallback) {
- const UChar* p = line.characters();
- const UChar* lineEnd = p + line.length();
-
- // Look for whitespace separating the two URLs
- while (p < lineEnd && *p != '\t' && *p != ' ')
- p++;
-
- if (p == lineEnd) {
- // There was no whitespace separating the URLs.
- continue;
- }
-
- KURL namespaceURL(manifestURL, String(line.characters(), p - line.characters()));
- if (!namespaceURL.isValid())
- continue;
- if (namespaceURL.hasRef())
- namespaceURL.setRef(String());
-
- if (!protocolHostAndPortAreEqual(manifestURL, namespaceURL))
- continue;
-
- // Skip whitespace separating fallback namespace from URL.
- while (p < lineEnd && (*p == '\t' || *p == ' '))
- p++;
-
- // Look for whitespace separating the URL from subsequent ignored tokens.
- const UChar* fallbackStart = p;
- while (p < lineEnd && *p != '\t' && *p != ' ')
- p++;
-
- KURL fallbackURL(manifestURL, String(fallbackStart, p - fallbackStart));
- if (!fallbackURL.isValid())
- continue;
- if (fallbackURL.hasRef())
- fallbackURL.setRef(String());
-
- if (!protocolHostAndPortAreEqual(manifestURL, fallbackURL))
- continue;
-
- manifest.fallbackURLs.append(make_pair(namespaceURL, fallbackURL));
- } else
- ASSERT_NOT_REACHED();
+ ++p;
+
+ // Check if we have a comment
+ if (*line_start == '#')
+ continue;
+
+ // Get rid of trailing whitespace
+ const wchar_t* tmp = p - 1;
+ while (tmp > line_start && (*tmp == ' ' || *tmp == '\t'))
+ --tmp;
+
+ std::wstring line(line_start, tmp - line_start + 1);
+
+ if (line == L"CACHE:") {
+ mode = kExplicit;
+ } else if (line == L"FALLBACK:") {
+ mode = kFallback;
+ } else if (line == L"NETWORK:") {
+ mode = kOnlineWhitelist;
+ } else if (*(line.end() - 1) == ':') {
+ mode = kUnknown;
+ } else if (mode == kUnknown) {
+ continue;
+ } else if (mode == kExplicit || mode == kOnlineWhitelist) {
+ const wchar_t *line_p = line.c_str();
+ const wchar_t *line_end = line_p + line.length();
+
+ // Look for whitespace separating the URL from subsequent ignored tokens.
+ while (line_p < line_end && *line_p != '\t' && *p != ' ')
+ ++line_p;
+
+ string16 url16;
+ WideToUTF16(line.c_str(), line_p - line.c_str(), &url16);
+ GURL url = manifest_url.Resolve(url16);
+ if (!url.is_valid())
+ continue;
+ if (url.has_ref()) {
+ GURL::Replacements replacements;
+ replacements.ClearRef();
+ url = url.ReplaceComponents(replacements);
+ }
+
+ // Scheme component must be the same as the manifest URL's.
+ if (url.scheme() != manifest_url.scheme()) {
+ continue;
+ }
+
+ if (mode == kExplicit) {
+ manifest.explicit_urls.insert(url.spec());
+ } else {
+ manifest.online_whitelisted_urls.push_back(url);
+ }
+ } else if (mode == kFallback) {
+ const wchar_t* line_p = line.c_str();
+ const wchar_t* line_end = line_p + line.length();
+
+ // Look for whitespace separating the two URLs
+ while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
+ ++line_p;
+
+ if (line_p == line_end) {
+ // There was no whitespace separating the URLs.
+ continue;
+ }
+
+ string16 namespace_url16;
+ WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16);
+ GURL namespace_url = manifest_url.Resolve(namespace_url16);
+ if (!namespace_url.is_valid())
+ continue;
+ if (namespace_url.has_ref()) {
+ GURL::Replacements replacements;
+ replacements.ClearRef();
+ namespace_url = namespace_url.ReplaceComponents(replacements);
+ }
+
+ // Fallback namespace URL must have the same scheme, host and port
+ // as the manifest's URL.
+ if (manifest_url.GetOrigin() != namespace_url.GetOrigin()) {
+ continue;
+ }
+
+ // Skip whitespace separating fallback namespace from URL.
+ while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
+ ++line_p;
+
+ // Look for whitespace separating the URL from subsequent ignored tokens.
+ const wchar_t* fallback_start = line_p;
+ while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
+ ++line_p;
+
+ string16 fallback_url16;
+ WideToUTF16(fallback_start, line_p - fallback_start, &fallback_url16);
+ GURL fallback_url = manifest_url.Resolve(fallback_url16);
+ if (!fallback_url.is_valid())
+ continue;
+ if (fallback_url.has_ref()) {
+ GURL::Replacements replacements;
+ replacements.ClearRef();
+ fallback_url = fallback_url.ReplaceComponents(replacements);
+ }
+
+ // Fallback entry URL must have the same scheme, host and port
+ // as the manifest's URL.
+ if (manifest_url.GetOrigin() != fallback_url.GetOrigin()) {
+ continue;
+ }
+
+ // Store regardless of duplicate namespace URL. Only first match
+ // will ever be used.
+ manifest.fallback_urls.push_back(
+ std::make_pair(namespace_url, fallback_url));
+ } else {
+ NOTREACHED();
}
+ }
- return true;
-}
-
+ return true;
}
-#endif // ENABLE(OFFLINE_WEB_APPLICATIONS)
+} // namespace appcache