// Copyright 2015 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "content/common/cross_site_document_classifier.h" #include "base/command_line.h" #include "base/lazy_instance.h" #include "base/logging.h" #include "base/macros.h" #include "base/metrics/histogram.h" #include "base/strings/string_util.h" #include "content/public/common/content_switches.h" #include "content/public/common/resource_response_info.h" #include "net/base/registry_controlled_domains/registry_controlled_domain.h" #include "net/http/http_response_headers.h" using base::StringPiece; namespace content { namespace { // MIME types const char kTextHtml[] = "text/html"; const char kTextXml[] = "text/xml"; const char kAppRssXml[] = "application/rss+xml"; const char kAppXml[] = "application/xml"; const char kAppJson[] = "application/json"; const char kTextJson[] = "text/json"; const char kTextXjson[] = "text/x-json"; const char kTextPlain[] = "text/plain"; bool MatchesSignature(StringPiece data, const StringPiece signatures[], size_t arr_size) { size_t offset = data.find_first_not_of(" \t\r\n"); // There is no not-whitespace character in this document. if (offset == base::StringPiece::npos) return false; data.remove_prefix(offset); for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { if (base::StartsWith(data, signatures[sig_index], base::CompareCase::INSENSITIVE_ASCII)) return true; } return false; } } // namespace CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType( const std::string& mime_type) { if (base::LowerCaseEqualsASCII(mime_type, kTextHtml)) { return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML; } if (base::LowerCaseEqualsASCII(mime_type, kTextPlain)) { return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN; } if (base::LowerCaseEqualsASCII(mime_type, kAppJson) || base::LowerCaseEqualsASCII(mime_type, kTextJson) || base::LowerCaseEqualsASCII(mime_type, kTextXjson)) { return CROSS_SITE_DOCUMENT_MIME_TYPE_JSON; } if (base::LowerCaseEqualsASCII(mime_type, kTextXml) || base::LowerCaseEqualsASCII(mime_type, kAppRssXml) || base::LowerCaseEqualsASCII(mime_type, kAppXml)) { return CROSS_SITE_DOCUMENT_MIME_TYPE_XML; } return CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS; } bool CrossSiteDocumentClassifier::IsBlockableScheme(const GURL& url) { // We exclude ftp:// from here. FTP doesn't provide a Content-Type // header which our policy depends on, so we cannot protect any // document from FTP servers. return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme); } bool CrossSiteDocumentClassifier::IsSameSite(const GURL& frame_origin, const GURL& response_url) { if (!frame_origin.is_valid() || !response_url.is_valid()) return false; if (frame_origin.scheme() != response_url.scheme()) return false; // SameDomainOrHost() extracts the effective domains (public suffix plus one) // from the two URLs and compare them. return net::registry_controlled_domains::SameDomainOrHost( frame_origin, response_url, net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); } // We don't use Webkit's existing CORS policy implementation since // their policy works in terms of origins, not sites. For example, // when frame is sub.a.com and it is not allowed to access a document // with sub1.a.com. But under Site Isolation, it's allowed. bool CrossSiteDocumentClassifier::IsValidCorsHeaderSet( const GURL& frame_origin, const GURL& website_origin, const std::string& access_control_origin) { // Many websites are sending back "\"*\"" instead of "*". This is // non-standard practice, and not supported by Chrome. Refer to // CrossOriginAccessControl::passesAccessControlCheck(). // TODO(dsjang): * is not allowed for the response from a request // with cookies. This allows for more than what the renderer will // eventually be able to receive, so we won't see illegal cross-site // documents allowed by this. We have to find a way to see if this // response is from a cookie-tagged request or not in the future. if (access_control_origin == "*") return true; // TODO(dsjang): The CORS spec only treats a fully specified URL, except for // "*", but many websites are using just a domain for access_control_origin, // and this is blocked by Webkit's CORS logic here : // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set // is_valid() to false when it is created from a URL containing * in the // domain part. GURL cors_origin(access_control_origin); return IsSameSite(frame_origin, cors_origin); } // This function is a slight modification of |net::SniffForHTML|. bool CrossSiteDocumentClassifier::SniffForHTML(StringPiece data) { // The content sniffer used by Chrome and Firefox are using " and do SniffForHTML after that. If we can find the // comment's end, we start HTML sniffing from there again. static const char kEndComment[] = "-->"; size_t offset = data.find(kEndComment); if (offset == base::StringPiece::npos) break; // Proceed to the index next to the ending comment (-->). data.remove_prefix(offset + strlen(kEndComment)); } return false; } bool CrossSiteDocumentClassifier::SniffForXML(base::StringPiece data) { // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for // this signature. However, XML is case-sensitive. Don't we have to // be more lenient only to block documents starting with the exact // string