// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // // Implementation of the MalwareDetails class. #include "chrome/browser/safe_browsing/malware_details.h" #include "base/lazy_instance.h" #include "chrome/browser/safe_browsing/safe_browsing_service.h" #include "chrome/browser/safe_browsing/report.pb.h" #include "chrome/common/safebrowsing_messages.h" #include "content/browser/browser_thread.h" #include "content/browser/renderer_host/render_view_host.h" #include "content/browser/tab_contents/navigation_entry.h" #include "content/browser/tab_contents/tab_contents.h" using safe_browsing::ClientMalwareReportRequest; // Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details static const uint32 kMaxDomNodes = 500; // static MalwareDetailsFactory* MalwareDetails::factory_ = NULL; // The default MalwareDetailsFactory. Global, made a singleton so we // don't leak it. class MalwareDetailsFactoryImpl : public MalwareDetailsFactory { public: MalwareDetails* CreateMalwareDetails( TabContents* tab_contents, const SafeBrowsingService::UnsafeResource& unsafe_resource) { return new MalwareDetails(tab_contents, unsafe_resource); } private: friend struct base::DefaultLazyInstanceTraits< MalwareDetailsFactoryImpl>; MalwareDetailsFactoryImpl() { } DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl); }; static base::LazyInstance g_malware_details_factory_impl(base::LINKER_INITIALIZED); // Create a MalwareDetails for the given tab. /* static */ MalwareDetails* MalwareDetails::NewMalwareDetails( TabContents* tab_contents, const SafeBrowsingService::UnsafeResource& resource) { // Set up the factory if this has not been done already (tests do that // before this method is called). if (!factory_) factory_ = g_malware_details_factory_impl.Pointer(); return factory_->CreateMalwareDetails(tab_contents, resource); } // Create a MalwareDetails for the given tab. Runs in the UI thread. MalwareDetails::MalwareDetails( TabContents* tab_contents, const SafeBrowsingService::UnsafeResource& resource) : TabContentsObserver(tab_contents), resource_(resource) { StartCollection(); } MalwareDetails::~MalwareDetails() {} bool MalwareDetails::OnMessageReceived(const IPC::Message& message) { bool handled = true; IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message) IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails, OnReceivedMalwareDOMDetails) IPC_MESSAGE_UNHANDLED(handled = false) IPC_END_MESSAGE_MAP() return handled; } bool MalwareDetails::IsPublicUrl(const GURL& url) const { return url.SchemeIs("http"); // TODO(panayiotis): also skip internal urls. } // Looks for a Resource for the given url in resources_. If found, it // updates |resource|. Otherwise, it creates a new message, adds it to // resources_ and updates |resource| to point to it. ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource( const GURL& url) { ResourceMap::iterator it = resources_.find(url.spec()); if (it != resources_.end()) { return it->second.get(); } // Create the resource for |url|. int id = resources_.size(); linked_ptr new_resource( new ClientMalwareReportRequest::Resource()); new_resource->set_url(url.spec()); new_resource->set_id(id); resources_[url.spec()] = new_resource; return new_resource.get(); } void MalwareDetails::AddUrl(const GURL& url, const GURL& parent, const std::string& tagname, const std::vector* children) { if (!IsPublicUrl(url)) return; // Find (or create) the resource for the url. ClientMalwareReportRequest::Resource* url_resource = FindOrCreateResource(url); if (!tagname.empty()) { url_resource->set_tag_name(tagname); } if (!parent.is_empty() && IsPublicUrl(parent)) { // Add the resource for the parent. ClientMalwareReportRequest::Resource* parent_resource = FindOrCreateResource(parent); // Update the parent-child relation url_resource->set_parent_id(parent_resource->id()); } if (children) { for (std::vector::const_iterator it = children->begin(); it != children->end(); it++) { ClientMalwareReportRequest::Resource* child_resource = FindOrCreateResource(*it); url_resource->add_child_ids(child_resource->id()); } } } void MalwareDetails::StartCollection() { DVLOG(1) << "Starting to compute malware details."; report_.reset(new ClientMalwareReportRequest()); if (IsPublicUrl(resource_.url)) { report_->set_malware_url(resource_.url.spec()); } GURL page_url = tab_contents()->GetURL(); if (IsPublicUrl(page_url)) { report_->set_page_url(page_url.spec()); } GURL referrer_url; NavigationEntry* nav_entry = tab_contents()->controller().GetActiveEntry(); if (nav_entry) { referrer_url = nav_entry->referrer(); if (IsPublicUrl(referrer_url)) { report_->set_referrer_url(referrer_url.spec()); } } // Add the nodes, starting from the page url. AddUrl(page_url, GURL(), "", NULL); // Add the resource_url and its original url, if non-empty and different. if (!resource_.original_url.is_empty() && resource_.url != resource_.original_url) { // Add original_url, as the parent of resource_url. AddUrl(resource_.original_url, GURL(), "", NULL); AddUrl(resource_.url, resource_.original_url, "", NULL); } else { AddUrl(resource_.url, GURL(), "", NULL); } // Add the redirect urls, if non-empty. The redirect urls do not include the // original url, but include the unsafe url which is the last one of the // redirect urls chain GURL parent_url; // Set the original url as the parent of the first redirect url if it's not // empty. if (!resource_.original_url.is_empty()) { parent_url = resource_.original_url; } // Set the previous redirect url as the parent of the next one for (unsigned int i = 0; i < resource_.redirect_urls.size(); ++i) { AddUrl(resource_.redirect_urls[i], parent_url, "", NULL); parent_url = resource_.redirect_urls[i]; } // Add the referrer url. if (nav_entry && !referrer_url.is_empty()) { AddUrl(referrer_url, GURL(), "", NULL); } // Get URLs of frames, scripts etc from the DOM. // OnReceivedMalwareDOMDetails will be called when the renderer replies. tab_contents()->render_view_host()->GetMalwareDOMDetails(); } // When the renderer is done, this is called. void MalwareDetails::OnReceivedMalwareDOMDetails( const std::vector& params) { // Schedule this in IO thread, so it doesn't conflict with future users // of our data structures (eg GetSerializedReport). BrowserThread::PostTask( BrowserThread::IO, FROM_HERE, NewRunnableMethod( this, &MalwareDetails::AddDOMDetails, params)); } void MalwareDetails::AddDOMDetails( const std::vector& params) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); // Add the urls from the DOM to |resources_|. The renderer could be // sending bogus messages, so limit the number of nodes we accept. DVLOG(1) << "Nodes from the DOM: " << params.size(); for (uint32 i = 0; i < params.size() && i < kMaxDomNodes; ++i) { SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i]; DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; AddUrl(node.url, node.parent, node.tag_name, &(node.children)); } } // Called from the SB Service on the IO thread, after the user has // closed the tab, or clicked proceed or goback. Since the user needs // to take an action, we expect this to be called after // OnReceivedMalwareDOMDetails in most cases. If not, we don't include // the DOM data in our report. const std::string* MalwareDetails::GetSerializedReport() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); // The |report_| protocol buffer is now generated: We add all the // urls in our |resources_| maps. for (ResourceMap::const_iterator it = resources_.begin(); it != resources_.end(); it++) { ClientMalwareReportRequest::Resource* pb_resource = report_->add_resources(); pb_resource->CopyFrom(*(it->second)); } scoped_ptr request_data(new std::string()); if (!report_->SerializeToString(request_data.get())) { DLOG(ERROR) << "Unable to serialize the malware report."; } return request_data.release(); }