diff options
author | paulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-10-09 21:31:57 +0000 |
---|---|---|
committer | paulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-10-09 21:31:57 +0000 |
commit | 445d4cc80e88320e4bd86068c100520c9025fc63 (patch) | |
tree | 3d4fc71e16627dbd23be957651d5bb412b4440cc | |
parent | 28f180ecca4fda508a0f7b6c512051012d2036df (diff) | |
download | chromium_src-445d4cc80e88320e4bd86068c100520c9025fc63.zip chromium_src-445d4cc80e88320e4bd86068c100520c9025fc63.tar.gz chromium_src-445d4cc80e88320e4bd86068c100520c9025fc63.tar.bz2 |
Support zero size chunks in the current implementation.
This allows the SafeBrowsing servers to send us Add and Sub
chunks with no content so that when we report our update
status, the request size is decreased.
Our update status request contains a list of all chunks that
we have received from the service, and this can get fragmented
over time. For example, the part of the request containing our
phishing chunks might look like this:
goog-phish-shavar:a:1,3,5,7,9,11,13,15,17
By sending zero size chunks for the chunk numbers missing from
the above example, the report will now look like:
goog-phish-shavar:a:1-17
Given the large number of chunks and the rate of chunk expiry,
this change will reduce the client request size.
BUG= http://code.google.com/p/chromium/issues/detail?id=3262
Review URL: http://codereview.chromium.org/6369
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@3131 0039d316-1c4b-4281-b951-d872f2087c98
5 files changed, 282 insertions, 39 deletions
diff --git a/chrome/browser/safe_browsing/protocol_parser.cc b/chrome/browser/safe_browsing/protocol_parser.cc index b3a460b..c3e7b04 100644 --- a/chrome/browser/safe_browsing/protocol_parser.cc +++ b/chrome/browser/safe_browsing/protocol_parser.cc @@ -286,9 +286,11 @@ bool SafeBrowsingProtocolParser::ParseChunk(const char* data, chunks->back().chunk_number = chunk_number; if (cmd_parts[0] == "a") { + chunks->back().is_add = true; if (!ParseAddChunk(chunk_data, chunk_len, hash_len, &chunks->back().hosts)) return false; // Parse error. } else if (cmd_parts[0] == "s") { + chunks->back().is_add = false; if (!ParseSubChunk(chunk_data, chunk_len, hash_len, &chunks->back().hosts)) return false; // Parse error. } else { diff --git a/chrome/browser/safe_browsing/protocol_parser_unittest.cc b/chrome/browser/safe_browsing/protocol_parser_unittest.cc index f371b6b..b5799cd 100644 --- a/chrome/browser/safe_browsing/protocol_parser_unittest.cc +++ b/chrome/browser/safe_browsing/protocol_parser_unittest.cc @@ -551,6 +551,106 @@ TEST(SafeBrowsingProtocolParsingTest, TestReset) { EXPECT_TRUE(reset); } +// The SafeBrowsing service will occasionally send zero length chunks so that +// client requests will have longer contiguous chunk number ranges, and thus +// reduce the request size. +TEST(SafeBrowsingProtocolParsingTest, TestZeroSizeAddChunk) { + std::string add_chunk("a:1:4:0\n"); + SafeBrowsingProtocolParser parser; + bool re_key = false; + std::deque<SBChunk> chunks; + + bool result = parser.ParseChunk(add_chunk.data(), + static_cast<int>(add_chunk.length()), + "", "", &re_key, &chunks); + EXPECT_TRUE(result); + EXPECT_EQ(chunks.size(), static_cast<size_t>(1)); + EXPECT_EQ(chunks[0].chunk_number, 1); + EXPECT_EQ(chunks[0].hosts.size(), static_cast<size_t>(0)); + + safe_browsing_util::FreeChunks(&chunks); + + // Now test a zero size chunk in between normal chunks. + chunks.clear(); + std::string add_chunks("a:1:4:18\n1234\001abcd5678\001wxyz" + "a:2:4:0\n" + "a:3:4:9\ncafe\001beef"); + result = parser.ParseChunk(add_chunks.data(), + static_cast<int>(add_chunks.length()), + "", "", &re_key, &chunks); + EXPECT_TRUE(result); + EXPECT_EQ(chunks.size(), static_cast<size_t>(3)); + + // See that each chunk has the right content. + EXPECT_EQ(chunks[0].chunk_number, 1); + EXPECT_EQ(chunks[0].hosts.size(), static_cast<size_t>(2)); + EXPECT_EQ(chunks[0].hosts[0].host, 0x34333231); + EXPECT_EQ(chunks[0].hosts[0].entry->PrefixAt(0), 0x64636261); + EXPECT_EQ(chunks[0].hosts[1].host, 0x38373635); + EXPECT_EQ(chunks[0].hosts[1].entry->PrefixAt(0), 0x7a797877); + + EXPECT_EQ(chunks[1].chunk_number, 2); + EXPECT_EQ(chunks[1].hosts.size(), static_cast<size_t>(0)); + + EXPECT_EQ(chunks[2].chunk_number, 3); + EXPECT_EQ(chunks[2].hosts.size(), static_cast<size_t>(1)); + EXPECT_EQ(chunks[2].hosts[0].host, 0x65666163); + EXPECT_EQ(chunks[2].hosts[0].entry->PrefixAt(0), 0x66656562); + + safe_browsing_util::FreeChunks(&chunks); +} + +// Test parsing a zero sized sub chunk. +TEST(SafeBrowsingProtocolParsingTest, TestZeroSizeSubChunk) { + std::string sub_chunk("s:9:4:0\n"); + SafeBrowsingProtocolParser parser; + bool re_key = false; + std::deque<SBChunk> chunks; + + bool result = parser.ParseChunk(sub_chunk.data(), + static_cast<int>(sub_chunk.length()), + "", "", &re_key, &chunks); + EXPECT_TRUE(result); + EXPECT_EQ(chunks.size(), static_cast<size_t>(1)); + EXPECT_EQ(chunks[0].chunk_number, 9); + EXPECT_EQ(chunks[0].hosts.size(), static_cast<size_t>(0)); + + safe_browsing_util::FreeChunks(&chunks); + chunks.clear(); + + // Test parsing a zero sized sub chunk mixed in with content carrying chunks. + std::string sub_chunks("s:1:4:9\nabcdxwxyz" + "s:2:4:0\n" + "s:3:4:26\nefgh\0011234pqrscafe\0015678lmno"); + sub_chunks[12] = '\0'; + + result = parser.ParseChunk(sub_chunks.data(), + static_cast<int>(sub_chunks.length()), + "", "", &re_key, &chunks); + EXPECT_TRUE(result); + + EXPECT_EQ(chunks[0].chunk_number, 1); + EXPECT_EQ(chunks[0].hosts.size(), static_cast<size_t>(1)); + EXPECT_EQ(chunks[0].hosts[0].host, 0x64636261); + EXPECT_EQ(chunks[0].hosts[0].entry->prefix_count(), 0); + + EXPECT_EQ(chunks[1].chunk_number, 2); + EXPECT_EQ(chunks[1].hosts.size(), static_cast<size_t>(0)); + + EXPECT_EQ(chunks[2].chunk_number, 3); + EXPECT_EQ(chunks[2].hosts.size(), static_cast<size_t>(2)); + EXPECT_EQ(chunks[2].hosts[0].host, 0x68676665); + EXPECT_EQ(chunks[2].hosts[0].entry->prefix_count(), 1); + EXPECT_EQ(chunks[2].hosts[0].entry->PrefixAt(0), 0x73727170); + EXPECT_EQ(chunks[2].hosts[0].entry->ChunkIdAtPrefix(0), 0x31323334); + EXPECT_EQ(chunks[2].hosts[1].host, 0x65666163); + EXPECT_EQ(chunks[2].hosts[1].entry->prefix_count(), 1); + EXPECT_EQ(chunks[2].hosts[1].entry->PrefixAt(0), 0x6f6e6d6c); + EXPECT_EQ(chunks[2].hosts[1].entry->ChunkIdAtPrefix(0), 0x35363738); + + safe_browsing_util::FreeChunks(&chunks); +} + TEST(SafeBrowsingProtocolParsingTest, TestVerifyUpdateMac) { SafeBrowsingProtocolParser parser; diff --git a/chrome/browser/safe_browsing/safe_browsing_database_impl.cc b/chrome/browser/safe_browsing/safe_browsing_database_impl.cc index 3dc123b..498ebb3 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_impl.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_impl.cc @@ -489,6 +489,7 @@ void SafeBrowsingDatabaseImpl::InsertChunks(const std::string& list_name, std::deque<SBChunk>::iterator i = chunks->begin(); for (; i != chunks->end(); ++i) { SBChunk& chunk = (*i); + chunk.list_id = list_id; std::deque<SBChunkHost>::iterator j = chunk.hosts.begin(); for (; j != chunk.hosts.end(); ++j) { j->entry->set_list_id(list_id); @@ -510,8 +511,7 @@ bool SafeBrowsingDatabaseImpl::ProcessChunks() { while (!pending_chunks_.empty()) { std::deque<SBChunk>* chunks = pending_chunks_.front(); bool done = false; - // The entries in one chunk are all either adds or subs. - if (chunks->front().hosts.front().entry->IsAdd()) { + if (chunks->front().is_add) { done = ProcessAddChunks(chunks); } else { done = ProcessSubChunks(chunks); @@ -553,7 +553,7 @@ bool SafeBrowsingDatabaseImpl::ProcessAddChunks(std::deque<SBChunk>* chunks) { Time before = Time::Now(); while (!chunks->empty()) { SBChunk& chunk = chunks->front(); - int list_id = chunk.hosts.front().entry->list_id(); + int list_id = chunk.list_id; int chunk_id = chunk.chunk_number; // The server can give us a chunk that we already have because it's part of @@ -594,7 +594,7 @@ bool SafeBrowsingDatabaseImpl::ProcessSubChunks(std::deque<SBChunk>* chunks) { Time before = Time::Now(); while (!chunks->empty()) { SBChunk& chunk = chunks->front(); - int list_id = chunk.hosts.front().entry->list_id(); + int list_id = chunk.list_id; int chunk_id = chunk.chunk_number; if (!ChunkExists(list_id, SUB_CHUNK, chunk_id)) { diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc index 1f3f954..fb14fc8 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc @@ -17,51 +17,71 @@ #include "googleurl/src/gurl.h" #include "testing/gtest/include/gtest/gtest.h" +static const wchar_t kSafeBrowsingTestDatabase[] = L"SafeBrowsingTestDatabase"; + namespace { SBPrefix Sha256Prefix(const std::string& str) { SBPrefix hash; base::SHA256HashString(str, &hash, sizeof(hash)); return hash; } -} -// Helper function to do an AddDel or SubDel command. -void DelChunk(SafeBrowsingDatabase* db, - const std::string& list, - int chunk_id, - bool is_sub_del) { - std::vector<SBChunkDelete>* deletes = new std::vector<SBChunkDelete>; - SBChunkDelete chunk_delete; - chunk_delete.list_name = list; - chunk_delete.is_sub_del = is_sub_del; - chunk_delete.chunk_del.push_back(ChunkRange(chunk_id)); - deletes->push_back(chunk_delete); - db->DeleteChunks(deletes); -} + // Helper function to do an AddDel or SubDel command. + void DelChunk(SafeBrowsingDatabase* db, + const std::string& list, + int chunk_id, + bool is_sub_del) { + std::vector<SBChunkDelete>* deletes = new std::vector<SBChunkDelete>; + SBChunkDelete chunk_delete; + chunk_delete.list_name = list; + chunk_delete.is_sub_del = is_sub_del; + chunk_delete.chunk_del.push_back(ChunkRange(chunk_id)); + deletes->push_back(chunk_delete); + db->DeleteChunks(deletes); + } -void AddDelChunk(SafeBrowsingDatabase* db, - const std::string& list, - int chunk_id) { - DelChunk(db, list, chunk_id, false); -} + void AddDelChunk(SafeBrowsingDatabase* db, + const std::string& list, + int chunk_id) { + DelChunk(db, list, chunk_id, false); + } -void SubDelChunk(SafeBrowsingDatabase* db, - const std::string& list, - int chunk_id) { - DelChunk(db, list, chunk_id, true); -} + void SubDelChunk(SafeBrowsingDatabase* db, + const std::string& list, + int chunk_id) { + DelChunk(db, list, chunk_id, true); + } + + // Common database test set up code. + std::wstring GetTestDatabaseName() { + std::wstring filename; + PathService::Get(base::DIR_TEMP, &filename); + filename.push_back(file_util::kPathSeparator); + filename.append(kSafeBrowsingTestDatabase); + return filename; + } + + SafeBrowsingDatabase* SetupTestDatabase() { + std::wstring filename = GetTestDatabaseName(); + DeleteFile(filename.c_str()); // In case it existed from a previous run. + + SafeBrowsingDatabase* database = SafeBrowsingDatabase::Create(); + database->SetSynchronous(); + EXPECT_TRUE(database->Init(filename, NULL)); + + return database; + } + + void TearDownTestDatabase(SafeBrowsingDatabase* database) { + DeleteFile(GetTestDatabaseName().c_str()); + delete database; + } + +} // namespace // Checks database reading and writing. TEST(SafeBrowsingDatabase, Database) { - std::wstring filename; - PathService::Get(base::DIR_TEMP, &filename); - filename.push_back(file_util::kPathSeparator); - filename.append(L"SafeBrowsingTestDatabase"); - DeleteFile(filename.c_str()); // In case it existed from a previous run. - - SafeBrowsingDatabase* database = SafeBrowsingDatabase::Create(); - database->SetSynchronous(); - EXPECT_TRUE(database->Init(filename, NULL)); + SafeBrowsingDatabase* database = SetupTestDatabase(); // Add a simple chunk with one hostkey. SBChunkHost host; @@ -175,6 +195,7 @@ TEST(SafeBrowsingDatabase, Database) { host.entry->SetChunkIdAtPrefix(0, 2); host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/notevil1.html")); + chunk.is_add = false; chunk.chunk_number = 4; chunk.hosts.clear(); chunk.hosts.push_back(host); @@ -243,6 +264,7 @@ TEST(SafeBrowsingDatabase, Database) { host.entry->set_chunk_id(1); host.entry->SetPrefixAt(0, Sha256Prefix("www.redherring.com/index.html")); + chunk.is_add = true; chunk.chunk_number = 44; chunk.hosts.clear(); chunk.hosts.push_back(host); @@ -274,6 +296,7 @@ TEST(SafeBrowsingDatabase, Database) { host.entry->SetPrefixAt(1, Sha256Prefix("www.notevilanymore.com/good.html")); host.entry->SetChunkIdAtPrefix(1, 10); + chunk.is_add = false; chunk.chunk_number = 5; chunk.hosts.clear(); chunk.hosts.push_back(host); @@ -293,6 +316,7 @@ TEST(SafeBrowsingDatabase, Database) { host.entry->SetPrefixAt(0, Sha256Prefix("www.notevilanymore.com/index.html")); host.entry->SetPrefixAt(1, Sha256Prefix("www.notevilanymore.com/good.html")); + chunk.is_add = true; chunk.chunk_number = 10; chunk.hosts.clear(); chunk.hosts.push_back(host); @@ -310,9 +334,124 @@ TEST(SafeBrowsingDatabase, Database) { GURL("http://www.notevilanymore.com/good.html"), &matching_list, &prefix_hits, &full_hashes, now)); - DeleteFile(filename.c_str()); // Clean up. + TearDownTestDatabase(database); +} - delete database; + +// Test adding zero length chunks to the database. +TEST(SafeBrowsingDatabase, ZeroSizeChunk) { + SafeBrowsingDatabase* database = SetupTestDatabase(); + + // Populate with a couple of normal chunks. + SBChunkHost host; + host.host = Sha256Prefix("www.test.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); + host.entry->SetPrefixAt(0, Sha256Prefix("www.test.com/test1.html")); + host.entry->SetPrefixAt(1, Sha256Prefix("www.test.com/test2.html")); + host.entry->set_chunk_id(1); + + SBChunk chunk; + chunk.is_add = true; + chunk.chunk_number = 1; + chunk.hosts.push_back(host); + + std::deque<SBChunk>* chunks = new std::deque<SBChunk>; + chunks->push_back(chunk); + + host.host = Sha256Prefix("www.random.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); + host.entry->SetPrefixAt(0, Sha256Prefix("www.random.com/random1.html")); + host.entry->SetPrefixAt(1, Sha256Prefix("www.random.com/random2.html")); + host.entry->set_chunk_id(10); + chunk.chunk_number = 10; + chunk.hosts.clear(); + chunk.hosts.push_back(host); + chunks->push_back(chunk); + + database->InsertChunks("goog-malware", chunks); + + // Add an empty ADD and SUB chunk. + std::vector<SBListChunkRanges> list_chunks_empty; + database->GetListsInfo(&list_chunks_empty); + EXPECT_EQ(list_chunks_empty[0].adds, "1,10"); + + SBChunk empty_chunk; + empty_chunk.chunk_number = 19; + empty_chunk.is_add = true; + chunks = new std::deque<SBChunk>; + chunks->push_back(empty_chunk); + database->InsertChunks("goog-malware", chunks); + chunks = new std::deque<SBChunk>; + empty_chunk.chunk_number = 7; + empty_chunk.is_add = false; + chunks->push_back(empty_chunk); + database->InsertChunks("goog-malware", chunks); + + list_chunks_empty.clear(); + database->GetListsInfo(&list_chunks_empty); + EXPECT_EQ(list_chunks_empty[0].adds, "1,10,19"); + EXPECT_EQ(list_chunks_empty[0].subs, "7"); + + // Add an empty chunk along with a couple that contain data. This should + // result in the chunk range being reduced in size. + host.host = Sha256Prefix("www.notempty.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); + host.entry->SetPrefixAt(0, Sha256Prefix("www.notempty.com/full1.html")); + host.entry->set_chunk_id(20); + empty_chunk.chunk_number = 20; + empty_chunk.is_add = true; + empty_chunk.hosts.clear(); + empty_chunk.hosts.push_back(host); + chunks = new std::deque<SBChunk>; + chunks->push_back(empty_chunk); + + empty_chunk.chunk_number = 21; + empty_chunk.is_add = true; + empty_chunk.hosts.clear(); + chunks->push_back(empty_chunk); + + host.host = Sha256Prefix("www.notempty.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); + host.entry->SetPrefixAt(0, Sha256Prefix("www.notempty.com/full2.html")); + host.entry->set_chunk_id(22); + empty_chunk.hosts.clear(); + empty_chunk.hosts.push_back(host); + empty_chunk.chunk_number = 22; + empty_chunk.is_add = true; + chunks->push_back(empty_chunk); + + database->InsertChunks("goog-malware", chunks); + + const Time now = Time::Now(); + std::vector<SBFullHashResult> full_hashes; + std::vector<SBPrefix> prefix_hits; + std::string matching_list; + EXPECT_TRUE(database->ContainsUrl(GURL("http://www.notempty.com/full1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + EXPECT_TRUE(database->ContainsUrl(GURL("http://www.notempty.com/full2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + list_chunks_empty.clear(); + database->GetListsInfo(&list_chunks_empty); + EXPECT_EQ(list_chunks_empty[0].adds, "1,10,19-22"); + EXPECT_EQ(list_chunks_empty[0].subs, "7"); + + // Handle AddDel and SubDel commands for empty chunks. + AddDelChunk(database, "goog-malware", 21); + list_chunks_empty.clear(); + database->GetListsInfo(&list_chunks_empty); + EXPECT_EQ(list_chunks_empty[0].adds, "1,10,19-20,22"); + EXPECT_EQ(list_chunks_empty[0].subs, "7"); + + SubDelChunk(database, "goog-malware", 7); + list_chunks_empty.clear(); + database->GetListsInfo(&list_chunks_empty); + EXPECT_EQ(list_chunks_empty[0].adds, "1,10,19-20,22"); + EXPECT_EQ(list_chunks_empty[0].subs, ""); + + TearDownTestDatabase(database); } void PrintStat(const wchar_t* name) { diff --git a/chrome/browser/safe_browsing/safe_browsing_util.h b/chrome/browser/safe_browsing/safe_browsing_util.h index 1e40323..ec2f43d 100644 --- a/chrome/browser/safe_browsing/safe_browsing_util.h +++ b/chrome/browser/safe_browsing/safe_browsing_util.h @@ -57,6 +57,8 @@ struct SBChunkHost { // Container for an add/sub chunk. struct SBChunk { int chunk_number; + int list_id; + bool is_add; std::deque<SBChunkHost> hosts; }; |