summaryrefslogtreecommitdiffstats
path: root/chrome/browser
diff options
context:
space:
mode:
authorpaulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-10-08 19:34:13 +0000
committerpaulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-10-08 19:34:13 +0000
commitcf13cfb4b44dc30a8d3dde60e6d900a7cacb29d2 (patch)
tree5c73d284fdf61b73dd39453df7b8ff0c9409f3e1 /chrome/browser
parent7d0d8af5e43ae5b599f80208606a821b1f0c5482 (diff)
downloadchromium_src-cf13cfb4b44dc30a8d3dde60e6d900a7cacb29d2.zip
chromium_src-cf13cfb4b44dc30a8d3dde60e6d900a7cacb29d2.tar.gz
chromium_src-cf13cfb4b44dc30a8d3dde60e6d900a7cacb29d2.tar.bz2
Fixes to the experimental bloom filter storage code.
Update the unittest to handle the bloom filter storage implementation and fix a test scenario. This change does not affect the current working implementation. Review URL: http://codereview.chromium.org/6513 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@3031 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser')
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_bloom.cc81
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_bloom.h19
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_unittest.cc15
3 files changed, 69 insertions, 46 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
index 676f858..ff8244f 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
@@ -74,6 +74,8 @@ bool SafeBrowsingDatabaseBloom::Init(const std::wstring& filename,
load_filter = true;
}
+ CreateChunkCaches();
+
bloom_filter_filename_ = BloomFilterFilename(filename_);
if (load_filter) {
@@ -98,8 +100,6 @@ bool SafeBrowsingDatabaseBloom::Open() {
statement_cache_.reset(new SqliteStatementCache(db_));
- CreateChunkCaches();
-
return true;
}
@@ -199,6 +199,8 @@ bool SafeBrowsingDatabaseBloom::CreateTables() {
// The SafeBrowsing service assumes this operation is synchronous.
bool SafeBrowsingDatabaseBloom::ResetDatabase() {
hash_cache_.clear();
+ add_chunk_cache_.clear();
+ sub_chunk_cache_.clear();
prefix_miss_cache_.clear();
bool rv = Close();
@@ -241,13 +243,18 @@ bool SafeBrowsingDatabaseBloom::ContainsUrl(
std::vector<SBFullHashResult>* full_hits,
Time last_update) {
+ // Clear the results first.
+ matching_list->clear();
+ prefix_hits->clear();
+ full_hits->clear();
+
std::vector<std::string> hosts;
if (url.HostIsIPAddress()) {
hosts.push_back(url.host());
} else {
safe_browsing_util::GenerateHostsToCheck(url, &hosts);
if (hosts.size() == 0)
- return false; // things like about:blank
+ return false; // things like about:blank
}
std::vector<std::string> paths;
safe_browsing_util::GeneratePathsToCheck(url, &paths);
@@ -260,7 +267,7 @@ bool SafeBrowsingDatabaseBloom::ContainsUrl(
SBFullHash full_hash;
// TODO(erikkay): maybe we should only do the first 32 bits initially,
// and then fall back to the full hash if there's a hit.
- base::SHA256HashString(hosts[i] + paths[j], &full_hash,
+ base::SHA256HashString(hosts[i] + paths[j], &full_hash,
sizeof(SBFullHash));
SBPrefix prefix;
memcpy(&prefix, &full_hash, sizeof(SBPrefix));
@@ -297,7 +304,7 @@ void SafeBrowsingDatabaseBloom::InsertChunks(const std::string& list_name,
// database lookups, we need a reasonably current bloom filter at startup.
// I think we need some way to indicate that the bloom filter is out of date
// and needs to be rebuilt, but we shouldn't delete it.
- //DeleteBloomFilter();
+ // DeleteBloomFilter();
int list_id = GetListID(list_name);
std::deque<SBChunk>::iterator i = chunks->begin();
@@ -362,7 +369,7 @@ void SafeBrowsingDatabaseBloom::ProcessAddChunks(std::deque<SBChunk>* chunks) {
entry->Destroy();
chunk.hosts.pop_front();
}
- int encoded = EncodedChunkId(chunk_id, list_id);
+ int encoded = EncodeChunkId(chunk_id, list_id);
add_chunk_cache_.insert(encoded);
}
@@ -376,7 +383,7 @@ void SafeBrowsingDatabaseBloom::AddEntry(SBPrefix host, SBEntry* entry) {
// TODO(erikkay)
return;
}
- int encoded = EncodedChunkId(entry->chunk_id(), entry->list_id());
+ int encoded = EncodeChunkId(entry->chunk_id(), entry->list_id());
int count = entry->prefix_count();
if (count == 0) {
AddPrefix(host, encoded);
@@ -417,8 +424,8 @@ void SafeBrowsingDatabaseBloom::AddSub(
return;
}
- int encoded = EncodedChunkId(chunk_id, entry->list_id());
- int encoded_add = EncodedChunkId(entry->chunk_id(), entry->list_id());
+ int encoded = EncodeChunkId(chunk_id, entry->list_id());
+ int encoded_add = EncodeChunkId(entry->chunk_id(), entry->list_id());
int count = entry->prefix_count();
if (count == 0) {
AddSubPrefix(host, encoded, encoded_add);
@@ -430,11 +437,11 @@ void SafeBrowsingDatabaseBloom::AddSub(
}
}
-void SafeBrowsingDatabaseBloom::AddSubPrefix(SBPrefix prefix,
+void SafeBrowsingDatabaseBloom::AddSubPrefix(SBPrefix prefix,
int encoded_chunk,
int encoded_add_chunk) {
STATS_COUNTER(L"SB.PrefixSub", 1);
- std::string sql =
+ std::string sql =
"INSERT INTO sub_prefix (chunk, add_chunk, prefix) VALUES (?,?,?)";
SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str());
if (!statement.is_valid()) {
@@ -453,19 +460,22 @@ void SafeBrowsingDatabaseBloom::AddSubPrefix(SBPrefix prefix,
}
}
-// Encode the list id in the lower bit of the chunk.
-static inline int EncodeChunkId(int chunk, int list_id) {
- list_id--;
- DCHECK(list_id == 0 || list_id == 1);
- chunk = chunk << 1;
- chunk |= list_id;
- return chunk;
-}
+// TODO(paulg): Look for a less expensive way to maintain add_count_.
+int SafeBrowsingDatabaseBloom::GetAddPrefixCount() {
+ SQLITE_UNIQUE_STATEMENT(count, *statement_cache_,
+ "SELECT count(*) FROM add_prefix");
+ if (!count.is_valid()) {
+ NOTREACHED();
+ return 0;
+ }
+ int rv = count->step();
+ int add_count = 0;
+ if (rv == SQLITE_ROW)
+ add_count = count->column_int(0);
+ else if (rv == SQLITE_CORRUPT)
+ HandleCorruptDatabase();
-// Split an encoded chunk id and return the original chunk id and list id.
-static inline void DecodeChunkId(int encoded, int* chunk, int* list_id) {
- *list_id = 1 + (encoded & 0x1);
- *chunk = encoded >> 1;
+ return add_count;
}
// TODO(erikkay) - this is too slow
@@ -495,17 +505,7 @@ void SafeBrowsingDatabaseBloom::CreateChunkCaches() {
rv = subs->step();
}
- SQLITE_UNIQUE_STATEMENT(count, *statement_cache_,
- "SELECT count(*) FROM add_prefix");
- if (!count.is_valid()) {
- NOTREACHED();
- return;
- }
- rv = count->step();
- if (rv == SQLITE_ROW)
- add_count_ = count->column_int(0);
- else if (rv == SQLITE_CORRUPT)
- HandleCorruptDatabase();
+ add_count_ = GetAddPrefixCount();
}
void SafeBrowsingDatabaseBloom::ProcessSubChunks(std::deque<SBChunk>* chunks) {
@@ -527,7 +527,7 @@ void SafeBrowsingDatabaseBloom::ProcessSubChunks(std::deque<SBChunk>* chunks) {
chunk.hosts.pop_front();
}
- int encoded = EncodedChunkId(chunk_id, list_id);
+ int encoded = EncodeChunkId(chunk_id, list_id);
sub_chunk_cache_.insert(encoded);
}
@@ -578,7 +578,7 @@ void SafeBrowsingDatabaseBloom::AddDel(int list_id,
return;
}
- int encoded = EncodedChunkId(add_chunk_id, list_id);
+ int encoded = EncodeChunkId(add_chunk_id, list_id);
statement->bind_int(0, encoded);
int rv = statement->step();
if (rv == SQLITE_CORRUPT) {
@@ -606,7 +606,7 @@ void SafeBrowsingDatabaseBloom::SubDel(int list_id,
return;
}
- int encoded = EncodedChunkId(sub_chunk_id, list_id);
+ int encoded = EncodeChunkId(sub_chunk_id, list_id);
statement->bind_int(0, encoded);
int rv = statement->step();
if (rv == SQLITE_CORRUPT) {
@@ -633,7 +633,7 @@ bool SafeBrowsingDatabaseBloom::ChunkExists(int list_id,
ChunkType type,
int chunk_id) {
STATS_COUNTER(L"SB.ChunkSelect", 1);
- int encoded = EncodedChunkId(chunk_id, list_id);
+ int encoded = EncodeChunkId(chunk_id, list_id);
bool ret;
if (type == ADD_CHUNK)
ret = add_chunk_cache_.count(encoded) > 0;
@@ -789,6 +789,7 @@ static int pair_compare(const void* arg1, const void* arg2) {
void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
Time before = Time::Now();
+ add_count_ = GetAddPrefixCount();
scoped_array<SBPair> adds_array(new SBPair[add_count_]);
SBPair* adds = adds_array.get();
@@ -872,8 +873,8 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
while (add - adds < add_count_) {
if (add->chunk_id != 0) {
filter->Insert(add->prefix);
- insert->bind_int(0, add->prefix);
- insert->bind_int(1, add->chunk_id);
+ insert->bind_int(0, add->chunk_id);
+ insert->bind_int(1, add->prefix);
rv = insert->step();
if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
index 9b3c0b3..e991333 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
+++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
@@ -168,15 +168,28 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
// flag. This method should be called periodically inside of busy disk loops.
void WaitAfterResume();
- //
void AddEntry(SBPrefix host, SBEntry* entry);
void AddPrefix(SBPrefix prefix, int encoded_chunk);
void AddSub(int chunk, SBPrefix host, SBEntry* entry);
void AddSubPrefix(SBPrefix prefix, int encoded_chunk, int encoded_add_chunk);
void ProcessPendingSubs();
- int EncodedChunkId(int chunk, int list_id);
- void DecodeChunkId(int encoded, int* chunk, int* list_id);
void CreateChunkCaches();
+ int GetAddPrefixCount();
+
+ // Encode the list id in the lower bit of the chunk.
+ static inline int EncodeChunkId(int chunk, int list_id) {
+ list_id--;
+ DCHECK(list_id == 0 || list_id == 1);
+ chunk = chunk << 1;
+ chunk |= list_id;
+ return chunk;
+ }
+
+ // Split an encoded chunk id and return the original chunk id and list id.
+ static inline void DecodeChunkId(int encoded, int* chunk, int* list_id) {
+ *list_id = 1 + (encoded & 0x1);
+ *chunk = encoded >> 1;
+ }
// The database connection.
sqlite3* db_;
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc
index 542de7e..1f3f954 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc
@@ -82,7 +82,7 @@ TEST(SafeBrowsingDatabase, Database) {
// Add another chunk with two different hostkeys.
host.host = Sha256Prefix("www.evil.com/");
host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2);
- host.entry->set_chunk_id(1);
+ host.entry->set_chunk_id(2);
host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/notevil1.html"));
host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/notevil2.html"));
@@ -215,6 +215,8 @@ TEST(SafeBrowsingDatabase, Database) {
// Test removing all the prefixes from an add chunk.
AddDelChunk(database, "goog-malware", 2);
+ database->UpdateFinished();
+
EXPECT_FALSE(database->ContainsUrl(GURL("http://www.evil.com/notevil2.html"),
&matching_list, &prefix_hits,
&full_hashes, now));
@@ -235,7 +237,7 @@ TEST(SafeBrowsingDatabase, Database) {
// The adddel command exposed a bug in the transaction code where any
// transaction after it would fail. Add a dummy entry and remove it to
- // make sure the transcation work fine.
+ // make sure the transcation works fine.
host.host = Sha256Prefix("www.redherring.com/");
host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1);
host.entry->set_chunk_id(1);
@@ -255,6 +257,8 @@ TEST(SafeBrowsingDatabase, Database) {
// Test the subdel command.
SubDelChunk(database, "goog-malware", 4);
+ database->UpdateFinished();
+
database->GetListsInfo(&lists);
EXPECT_EQ(lists.size(), 1);
EXPECT_EQ(lists[0].name, "goog-malware");
@@ -263,8 +267,12 @@ TEST(SafeBrowsingDatabase, Database) {
// Test a sub command coming in before the add.
host.host = Sha256Prefix("www.notevilanymore.com/");
- host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0);
+ host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 2);
host.entry->set_chunk_id(10);
+ host.entry->SetPrefixAt(0, Sha256Prefix("www.notevilanymore.com/index.html"));
+ host.entry->SetChunkIdAtPrefix(0, 10);
+ host.entry->SetPrefixAt(1, Sha256Prefix("www.notevilanymore.com/good.html"));
+ host.entry->SetChunkIdAtPrefix(1, 10);
chunk.chunk_number = 5;
chunk.hosts.clear();
@@ -273,6 +281,7 @@ TEST(SafeBrowsingDatabase, Database) {
chunks = new std::deque<SBChunk>;
chunks->push_back(chunk);
database->InsertChunks("goog-malware", chunks);
+ database->UpdateFinished();
EXPECT_FALSE(database->ContainsUrl(
GURL("http://www.notevilanymore.com/index.html"),