// Copyright 2015 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "content/child/site_isolation_stats_gatherer.h" #include #include #include "base/macros.h" #include "base/metrics/histogram.h" #include "base/strings/string_piece.h" #include "base/strings/string_util.h" #include "content/public/common/resource_response_info.h" #include "net/http/http_response_headers.h" namespace content { namespace { // The gathering of UMA stats for site isolation is deactivated by default, and // only activated in renderer processes. static bool g_stats_gathering_enabled = false; bool IsRenderableStatusCode(int status_code) { // Chrome only uses the content of a response with one of these status codes // for CSS/JavaScript. For images, Chrome just ignores status code. const int renderable_status_code[] = { 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307}; for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { if (renderable_status_code[i] == status_code) return true; } return false; } void IncrementHistogramCount(const std::string& name) { // The default value of min, max, bucket_count are copied from histogram.h. base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet( name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag); histogram_pointer->Add(1); } void IncrementHistogramEnum(const std::string& name, uint32_t sample, uint32_t boundary_value) { // The default value of min, max, bucket_count are copied from histogram.h. base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet( name, 1, boundary_value, boundary_value + 1, base::HistogramBase::kUmaTargetedHistogramFlag); histogram_pointer->Add(sample); } void HistogramCountBlockedResponse( const std::string& bucket_prefix, const scoped_ptr& resp_data, bool nosniff_block) { std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked"); IncrementHistogramCount(bucket_prefix + block_label); // The content is blocked if it is sniffed as HTML/JSON/XML. When // the blocked response is with an error status code, it is not // disruptive for the following reasons : 1) the blocked content is // not a binary object (such as an image) since it is sniffed as // text; 2) then, this blocking only breaks the renderer behavior // only if it is either JavaScript or CSS. However, the renderer // doesn't use the contents of JS/CSS with unaffected status code // (e.g, 404). 3) the renderer is expected not to use the cross-site // document content for purposes other than JS/CSS (e.g, XHR). bool renderable_status_code = IsRenderableStatusCode(resp_data->http_status_code); if (renderable_status_code) { IncrementHistogramEnum( bucket_prefix + block_label + ".RenderableStatusCode2", resp_data->resource_type, RESOURCE_TYPE_LAST_TYPE); } else { IncrementHistogramCount(bucket_prefix + block_label + ".NonRenderableStatusCode"); } } void HistogramCountNotBlockedResponse(const std::string& bucket_prefix, bool sniffed_as_js) { IncrementHistogramCount(bucket_prefix + ".NotBlocked"); if (sniffed_as_js) IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS"); } } // namespace SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() { } void SiteIsolationStatsGatherer::SetEnabled(bool enabled) { g_stats_gathering_enabled = enabled; } scoped_ptr SiteIsolationStatsGatherer::OnReceivedResponse( const GURL& frame_origin, const GURL& response_url, ResourceType resource_type, int origin_pid, const ResourceResponseInfo& info) { if (!g_stats_gathering_enabled) return nullptr; // if |origin_pid| is non-zero, it means that this response is for a plugin // spawned from this renderer process. We exclude responses for plugins for // now, but eventually, we're going to make plugin processes directly talk to // the browser process so that we don't apply cross-site document blocking to // them. if (origin_pid) return nullptr; UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1); // See if this is for navigation. If it is, don't block it, under the // assumption that we will put it in an appropriate process. if (IsResourceTypeFrame(resource_type)) return nullptr; if (!CrossSiteDocumentClassifier::IsBlockableScheme(response_url)) return nullptr; if (CrossSiteDocumentClassifier::IsSameSite(frame_origin, response_url)) return nullptr; CrossSiteDocumentMimeType canonical_mime_type = CrossSiteDocumentClassifier::GetCanonicalMimeType(info.mime_type); if (canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS) return nullptr; // Every CORS request should have the Access-Control-Allow-Origin header even // if it is preceded by a pre-flight request. Therefore, if this is a CORS // request, it has this header. response.httpHeaderField() internally uses // case-insensitive matching for the header name. std::string access_control_origin; // We can use a case-insensitive header name for EnumerateHeader(). info.headers->EnumerateHeader(NULL, "access-control-allow-origin", &access_control_origin); if (CrossSiteDocumentClassifier::IsValidCorsHeaderSet( frame_origin, response_url, access_control_origin)) return nullptr; // Real XSD data collection starts from here. std::string no_sniff; info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff); scoped_ptr resp_data( new SiteIsolationResponseMetaData); resp_data->frame_origin = frame_origin.spec(); resp_data->response_url = response_url; resp_data->resource_type = resource_type; resp_data->canonical_mime_type = canonical_mime_type; resp_data->http_status_code = info.headers->response_code(); resp_data->no_sniff = base::LowerCaseEqualsASCII(no_sniff, "nosniff"); return resp_data; } bool SiteIsolationStatsGatherer::OnReceivedFirstChunk( const scoped_ptr& resp_data, const char* raw_data, int raw_length) { if (!g_stats_gathering_enabled) return false; DCHECK(resp_data.get()); base::StringPiece data(raw_data, raw_length); // Record the length of the first received chunk of data to see if it's enough // for sniffing. UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length); // Record the number of cross-site document responses with a specific mime // type (text/html, text/xml, etc). UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType", resp_data->canonical_mime_type, CROSS_SITE_DOCUMENT_MIME_TYPE_MAX); // Store the result of cross-site document blocking analysis. bool would_block = false; bool sniffed_as_js = SniffForJS(data); // Record the number of responses whose content is sniffed for what its mime // type claims it to be. For example, we apply a HTML sniffer for a document // tagged with text/html here. Whenever this check becomes true, we'll block // the response. if (resp_data->canonical_mime_type != CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN) { std::string bucket_prefix; bool sniffed_as_target_document = false; if (resp_data->canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_HTML) { bucket_prefix = "SiteIsolation.XSD.HTML"; sniffed_as_target_document = CrossSiteDocumentClassifier::SniffForHTML(data); } else if (resp_data->canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_XML) { bucket_prefix = "SiteIsolation.XSD.XML"; sniffed_as_target_document = CrossSiteDocumentClassifier::SniffForXML(data); } else if (resp_data->canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_JSON) { bucket_prefix = "SiteIsolation.XSD.JSON"; sniffed_as_target_document = CrossSiteDocumentClassifier::SniffForJSON(data); } else { NOTREACHED() << "Not a blockable mime type: " << resp_data->canonical_mime_type; } if (sniffed_as_target_document) { would_block = true; HistogramCountBlockedResponse(bucket_prefix, resp_data, false); } else { if (resp_data->no_sniff) { would_block = true; HistogramCountBlockedResponse(bucket_prefix, resp_data, true); } else { HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js); } } } else { // This block is for plain text documents. We apply our HTML, XML, // and JSON sniffer to a text document in the order, and block it // if any of them succeeds in sniffing. std::string bucket_prefix; if (CrossSiteDocumentClassifier::SniffForHTML(data)) bucket_prefix = "SiteIsolation.XSD.Plain.HTML"; else if (CrossSiteDocumentClassifier::SniffForXML(data)) bucket_prefix = "SiteIsolation.XSD.Plain.XML"; else if (CrossSiteDocumentClassifier::SniffForJSON(data)) bucket_prefix = "SiteIsolation.XSD.Plain.JSON"; if (bucket_prefix.size() > 0) { would_block = true; HistogramCountBlockedResponse(bucket_prefix, resp_data, false); } else if (resp_data->no_sniff) { would_block = true; HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true); } else { HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain", sniffed_as_js); } } return would_block; } bool SiteIsolationStatsGatherer::SniffForJS(base::StringPiece data) { // The purpose of this function is to try to see if there's any possibility // that this data can be JavaScript (superset of JS). Search for "var " for JS // detection. This is a real hack and should only be used for stats gathering. return data.find("var ") != base::StringPiece::npos; } } // namespace content