diff options
author | panayiotis@google.com <panayiotis@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-01-13 23:46:02 +0000 |
---|---|---|
committer | panayiotis@google.com <panayiotis@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-01-13 23:46:02 +0000 |
commit | 29852c22221c64a330be04c48b9eced2d82b1dd8 (patch) | |
tree | ceb47bc5558df2791b951e49283294784d621196 /chrome/browser/safe_browsing | |
parent | e6956ee8c5f023f65f4471f922002c2a273996d6 (diff) | |
download | chromium_src-29852c22221c64a330be04c48b9eced2d82b1dd8.zip chromium_src-29852c22221c64a330be04c48b9eced2d82b1dd8.tar.gz chromium_src-29852c22221c64a330be04c48b9eced2d82b1dd8.tar.bz2 |
Add a unique ID for each URL in the malware details protocol buffer. This allows us to save some space in the protocol buffer by not repeating the urls of parents and children. Also, change some string fields to bytes to avoid the implicit utf-8 conversion when not applicable.
BUG=60831
TEST=relevant unit_tests, browser_tests
Review URL: http://codereview.chromium.org/6208003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@71384 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing')
-rw-r--r-- | chrome/browser/safe_browsing/malware_details.cc | 64 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/malware_details.h | 18 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/malware_details_unittest.cc | 93 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/report.proto | 47 |
4 files changed, 130 insertions, 92 deletions
diff --git a/chrome/browser/safe_browsing/malware_details.cc b/chrome/browser/safe_browsing/malware_details.cc index b4860a2..9cc8233 100644 --- a/chrome/browser/safe_browsing/malware_details.cc +++ b/chrome/browser/safe_browsing/malware_details.cc @@ -11,6 +11,8 @@ #include "chrome/browser/tab_contents/tab_contents.h" #include "chrome/browser/safe_browsing/report.pb.h" +using safe_browsing::ClientMalwareReportRequest; + // Create a MalwareDetails for the given tab. Runs in the UI thread. MalwareDetails::MalwareDetails( TabContents* tab_contents, @@ -26,21 +28,46 @@ bool MalwareDetails::IsPublicUrl(const GURL& url) const { return url.SchemeIs("http"); // TODO(panayiotis): also skip internal urls. } -void MalwareDetails::AddNode(const std::string& url, +// Looks for a Resource for the given url in resources_. If found, it +// updates |resource|. Otherwise, it creates a new message, adds it to +// resources_ and updates |resource| to point to it. +ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource( + const std::string& url) { + ResourceMap::iterator it = resources_.find(url); + if (it != resources_.end()) { + return it->second.get(); + } + + // Create the resource for |url|. + int id = resources_.size(); + linked_ptr<ClientMalwareReportRequest::Resource> new_resource( + new ClientMalwareReportRequest::Resource()); + new_resource->set_url(url); + new_resource->set_id(id); + resources_[url] = new_resource; + return new_resource.get(); +} + +void MalwareDetails::AddUrl(const std::string& url, const std::string& parent) { if (!IsPublicUrl(GURL(url))) return; - linked_ptr<safe_browsing::ClientMalwareReportRequest::Resource> resource( - new safe_browsing::ClientMalwareReportRequest::Resource()); - resource->set_url(url); - if (!parent.empty() && IsPublicUrl(GURL(parent))) - resource->set_parent(parent); - urls_[url] = resource; + + // Find (or create) the resource for the url. + ClientMalwareReportRequest::Resource* url_resource = + FindOrCreateResource(url); + if (!parent.empty() && IsPublicUrl(GURL(parent))) { + // Add the resource for the parent. + ClientMalwareReportRequest::Resource* parent_resource = + FindOrCreateResource(parent); + // Update the parent-child relation + url_resource->set_parent_id(parent_resource->id()); + } } void MalwareDetails::StartCollection() { DVLOG(1) << "Starting to compute malware details."; - report_.reset(new safe_browsing::ClientMalwareReportRequest()); + report_.reset(new ClientMalwareReportRequest()); if (IsPublicUrl(resource_.url)) { report_->set_malware_url(resource_.url.spec()); @@ -61,28 +88,29 @@ void MalwareDetails::StartCollection() { } // Add the nodes, starting from the page url. - AddNode(page_url.spec(), ""); + AddUrl(page_url.spec(), ""); // Add the resource_url and its original url, if non-empty and different. if (!resource_.original_url.spec().empty() && resource_.url != resource_.original_url) { // Add original_url, as the parent of resource_url. - AddNode(resource_.original_url.spec(), ""); - AddNode(resource_.url.spec(), resource_.original_url.spec()); + AddUrl(resource_.original_url.spec(), ""); + AddUrl(resource_.url.spec(), resource_.original_url.spec()); } else { - AddNode(resource_.url.spec(), ""); + AddUrl(resource_.url.spec(), ""); } // Add the referrer url. if (nav_entry && !referrer_url.spec().empty()) { - AddNode(referrer_url.spec(), ""); + AddUrl(referrer_url.spec(), ""); } - // Add all the urls in our |urls_| map to the |report_| protobuf. - for (ResourceMap::const_iterator it = urls_.begin(); - it != urls_.end(); it++) { - safe_browsing::ClientMalwareReportRequest::Resource* pb_resource = - report_->add_nodes(); + // The |report_| protocol buffer is now generated: We add all the + // urls in our |resources_| maps. + for (ResourceMap::const_iterator it = resources_.begin(); + it != resources_.end(); it++) { + ClientMalwareReportRequest::Resource* pb_resource = + report_->add_resources(); pb_resource->CopyFrom(*(it->second)); } } diff --git a/chrome/browser/safe_browsing/malware_details.h b/chrome/browser/safe_browsing/malware_details.h index 7d21dc4..bbe63e1 100644 --- a/chrome/browser/safe_browsing/malware_details.h +++ b/chrome/browser/safe_browsing/malware_details.h @@ -36,6 +36,7 @@ class MalwareDetails : public base::RefCountedThreadSafe<MalwareDetails> { private: friend class base::RefCountedThreadSafe<MalwareDetails>; + // Maps a URL to its Resource. typedef base::hash_map< std::string, linked_ptr<safe_browsing::ClientMalwareReportRequest::Resource> > @@ -47,17 +48,24 @@ class MalwareDetails : public base::RefCountedThreadSafe<MalwareDetails> { // Whether the url is "public" so we can add it to the report. bool IsPublicUrl(const GURL& url) const; - // Adds a node to |urls_|. |parent| can be empty. - void AddNode(const std::string& url, const std::string& parent); + // Finds an existing Resource for the given url, or creates a new + // one if not found, and adds it to |resources_|. Returns the + // found/created resource. + safe_browsing::ClientMalwareReportRequest::Resource* FindOrCreateResource( + const std::string& url); + + // Adds a Resource to resources_ with the given parent + // relationship. |parent| can be empty. + void AddUrl(const std::string& url, const std::string& parent); ~MalwareDetails(); TabContents* tab_contents_; const SafeBrowsingService::UnsafeResource resource_; - // The urls that we collect. We first add them into this map and then - // generate a protocol buffer from it. - ResourceMap urls_; + // For every Url we collect we create a Resource message. We keep + // them in a map so we can avoid duplicates. + ResourceMap resources_; // The report protocol buffer. scoped_ptr<safe_browsing::ClientMalwareReportRequest> report_; diff --git a/chrome/browser/safe_browsing/malware_details_unittest.cc b/chrome/browser/safe_browsing/malware_details_unittest.cc index b6136c8..ea42fe4 100644 --- a/chrome/browser/safe_browsing/malware_details_unittest.cc +++ b/chrome/browser/safe_browsing/malware_details_unittest.cc @@ -18,6 +18,8 @@ static const char* kLandingURL = "http://www.landingpage.com/"; static const char* kMalwareURL = "http://www.malware.com/"; static const char* kHttpsURL = "https://www.url.com/"; +using safe_browsing::ClientMalwareReportRequest; + class MalwareDetailsTest : public RenderViewHostTestHarness { public: MalwareDetailsTest() @@ -30,9 +32,9 @@ class MalwareDetailsTest : public RenderViewHostTestHarness { } static bool ResourceLessThan( - const safe_browsing::ClientMalwareReportRequest::Resource* lhs, - const safe_browsing::ClientMalwareReportRequest::Resource* rhs) { - return lhs->url() < rhs->url(); + const ClientMalwareReportRequest::Resource* lhs, + const ClientMalwareReportRequest::Resource* rhs) { + return lhs->id() < rhs->id(); } protected: @@ -47,38 +49,36 @@ class MalwareDetailsTest : public RenderViewHostTestHarness { resource->render_view_id = contents_->render_view_host()->routing_id(); } - void VerifyResults( - const safe_browsing::ClientMalwareReportRequest& report_pb, - const safe_browsing::ClientMalwareReportRequest& expected_pb) { + void VerifyResults(const ClientMalwareReportRequest& report_pb, + const ClientMalwareReportRequest& expected_pb) { EXPECT_EQ(expected_pb.malware_url(), report_pb.malware_url()); EXPECT_EQ(expected_pb.page_url(), report_pb.page_url()); EXPECT_EQ(expected_pb.referrer_url(), report_pb.referrer_url()); - ASSERT_EQ(expected_pb.nodes_size(), report_pb.nodes_size()); - // Sort the nodes, to make the test deterministic - std::vector<const safe_browsing::ClientMalwareReportRequest::Resource*> - nodes; - for (int i = 0; i < report_pb.nodes_size(); ++i) { - const safe_browsing::ClientMalwareReportRequest::Resource& resource = - report_pb.nodes(i); - nodes.push_back(&resource); + ASSERT_EQ(expected_pb.resources_size(), report_pb.resources_size()); + // Sort the resources, to make the test deterministic + std::vector<const ClientMalwareReportRequest::Resource*> resources; + for (int i = 0; i < report_pb.resources_size(); ++i) { + const ClientMalwareReportRequest::Resource& resource = + report_pb.resources(i); + resources.push_back(&resource); } - std::sort(nodes.begin(), nodes.end(), + std::sort(resources.begin(), resources.end(), &MalwareDetailsTest::ResourceLessThan); - std::vector<const safe_browsing::ClientMalwareReportRequest::Resource*> - expected; - for (int i = 0; i < report_pb.nodes_size(); ++i) { - const safe_browsing::ClientMalwareReportRequest::Resource& resource = - expected_pb.nodes(i); + std::vector<const ClientMalwareReportRequest::Resource*> expected; + for (int i = 0; i < report_pb.resources_size(); ++i) { + const ClientMalwareReportRequest::Resource& resource = + expected_pb.resources(i); expected.push_back(&resource); } std::sort(expected.begin(), expected.end(), &MalwareDetailsTest::ResourceLessThan); for (uint32 i = 0; i < expected.size(); ++i) { - EXPECT_EQ(expected[i]->url(), nodes[i]->url()); - EXPECT_EQ(expected[i]->parent(), nodes[i]->parent()); + EXPECT_EQ(expected[i]->id(), resources[i]->id()); + EXPECT_EQ(expected[i]->url(), resources[i]->url()); + EXPECT_EQ(expected[i]->parent_id(), resources[i]->parent_id()); } } @@ -98,19 +98,20 @@ TEST_F(MalwareDetailsTest, MalwareSubResource) { contents(), resource); scoped_ptr<const std::string> serialized(report->GetSerializedReport()); - safe_browsing::ClientMalwareReportRequest actual; + ClientMalwareReportRequest actual; actual.ParseFromString(*serialized); - safe_browsing::ClientMalwareReportRequest expected; + ClientMalwareReportRequest expected; expected.set_malware_url(kMalwareURL); expected.set_page_url(kLandingURL); expected.set_referrer_url(""); - safe_browsing::ClientMalwareReportRequest::Resource* node = - expected.add_nodes(); - node->set_url(kLandingURL); - node = expected.add_nodes(); - node->set_url(kMalwareURL); + ClientMalwareReportRequest::Resource* pb_resource = expected.add_resources(); + pb_resource->set_id(0); + pb_resource->set_url(kLandingURL); + pb_resource = expected.add_resources(); + pb_resource->set_id(1); + pb_resource->set_url(kMalwareURL); VerifyResults(actual, expected); } @@ -128,25 +129,28 @@ TEST_F(MalwareDetailsTest, MalwareSubResourceWithOriginalUrl) { contents(), resource); scoped_ptr<const std::string> serialized(report->GetSerializedReport()); - safe_browsing::ClientMalwareReportRequest actual; + ClientMalwareReportRequest actual; actual.ParseFromString(*serialized); - safe_browsing::ClientMalwareReportRequest expected; + ClientMalwareReportRequest expected; expected.set_malware_url(kMalwareURL); expected.set_page_url(kLandingURL); expected.set_referrer_url(""); - safe_browsing::ClientMalwareReportRequest::Resource* node = - expected.add_nodes(); - node->set_url(kLandingURL); + ClientMalwareReportRequest::Resource* pb_resource = expected.add_resources(); + pb_resource->set_id(0); + pb_resource->set_url(kLandingURL); - // Malware url should have originalurl as parent. - node = expected.add_nodes(); - node->set_url(kMalwareURL); - node->set_parent(kOriginalLandingURL); + pb_resource = expected.add_resources(); + pb_resource->set_id(1); + pb_resource->set_url(kOriginalLandingURL); - node = expected.add_nodes(); - node->set_url(kOriginalLandingURL); + pb_resource = expected.add_resources(); + pb_resource->set_id(2); + pb_resource->set_url(kMalwareURL); + // The Resource for kMmalwareUrl should have the Resource for + // kOriginalLandingURL (with id 1) as parent. + pb_resource->set_parent_id(1); VerifyResults(actual, expected); } @@ -160,16 +164,15 @@ TEST_F(MalwareDetailsTest, NotPublicUrl) { contents(), resource); scoped_ptr<const std::string> serialized(report->GetSerializedReport()); - safe_browsing::ClientMalwareReportRequest actual; + ClientMalwareReportRequest actual; actual.ParseFromString(*serialized); - safe_browsing::ClientMalwareReportRequest expected; + ClientMalwareReportRequest expected; expected.set_malware_url(kMalwareURL); // No page_url expected.set_referrer_url(""); - safe_browsing::ClientMalwareReportRequest::Resource* node = - expected.add_nodes(); - node->set_url(kMalwareURL); // Only one node + ClientMalwareReportRequest::Resource* pb_resource = expected.add_resources(); + pb_resource->set_url(kMalwareURL); // Only one resource VerifyResults(actual, expected); } diff --git a/chrome/browser/safe_browsing/report.proto b/chrome/browser/safe_browsing/report.proto index ea4cfda..719c918 100644 --- a/chrome/browser/safe_browsing/report.proto +++ b/chrome/browser/safe_browsing/report.proto @@ -23,61 +23,60 @@ package safe_browsing; message ClientMalwareReportRequest { message HTTPHeader { - required string name = 1; - optional string value = 2; + required bytes name = 1; + optional bytes value = 2; } message HTTPRequest { message FirstLine { - optional string verb = 1; // Also known as method, eg "GET" - optional string uri = 2; - optional string version = 3; + optional bytes verb = 1; // Also known as method, eg "GET" + optional bytes uri = 2; + optional bytes version = 3; } optional FirstLine firstline = 1; repeated HTTPHeader headers = 2; - optional string body = 3; + optional bytes body = 3; // bodydigest and bodylength can be useful if the report does not // contain the body itself. - optional string bodydigest = 4; + optional bytes bodydigest = 4; optional int32 bodylength = 5; } message HTTPResponse { message FirstLine { optional int32 code = 1; - optional string reason = 2; - optional string version = 3; + optional bytes reason = 2; + optional bytes version = 3; } optional FirstLine firstline = 1; repeated HTTPHeader headers = 2; - optional string body = 3; + optional bytes body = 3; // bodydigest and bodylength can be useful if the report does not // contain the body itself. - optional string bodydigest = 4; + optional bytes bodydigest = 4; optional int32 bodylength = 5; - optional string remote_ip = 6; + optional bytes remote_ip = 6; } message Resource { - optional string url = 1; + required int32 id = 1; + optional string url = 2; + optional HTTPRequest request = 3; + optional HTTPResponse response = 4; - // URL of the parent frame. - optional string parent = 2; - - // Tag that was used to include this resource, eg "iframe" - optional string tag_name = 3; - - optional HTTPRequest request = 4; - optional HTTPResponse response = 5; + optional int32 parent_id = 5; // Id of the parent, if known. // A list of children. The order of the children in this list is - // significant. The |parent| field for child nodes can be derived + // significant. The |parent_id| field for child nodes can be derived // from this, but this allows us to be more flexible. - repeated string children = 6; + repeated int32 child_ids = 6; + + // Tag that was used to include this resource, eg "iframe" + optional string tag_name = 7; } // URL of the resource that matches the safe browsing list. @@ -87,5 +86,5 @@ message ClientMalwareReportRequest { optional string page_url = 2; optional string referrer_url = 3; - repeated Resource nodes = 4; + repeated Resource resources = 4; } |