summaryrefslogtreecommitdiffstats
path: root/chrome/browser/safe_browsing
diff options
context:
space:
mode:
authorinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-26 23:55:29 +0000
committerinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-26 23:55:29 +0000
commit09911bf300f1a419907a9412154760efd0b7abc3 (patch)
treef131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/browser/safe_browsing
parent586acc5fe142f498261f52c66862fa417c3d52d2 (diff)
downloadchromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip
chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz
chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2
Add chrome to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing')
-rw-r--r--chrome/browser/safe_browsing/bloom_filter.cc86
-rw-r--r--chrome/browser/safe_browsing/bloom_filter.h58
-rw-r--r--chrome/browser/safe_browsing/bloom_filter_unittest.cc103
-rw-r--r--chrome/browser/safe_browsing/chunk_range.cc122
-rw-r--r--chrome/browser/safe_browsing/chunk_range.h89
-rw-r--r--chrome/browser/safe_browsing/chunk_range_unittest.cc202
-rw-r--r--chrome/browser/safe_browsing/database_perftest.cc565
-rw-r--r--chrome/browser/safe_browsing/protocol_manager.cc607
-rw-r--r--chrome/browser/safe_browsing/protocol_manager.h232
-rw-r--r--chrome/browser/safe_browsing/protocol_manager_unittest.cc158
-rw-r--r--chrome/browser/safe_browsing/protocol_parser.cc496
-rw-r--r--chrome/browser/safe_browsing/protocol_parser.h153
-rw-r--r--chrome/browser/safe_browsing/protocol_parser_unittest.cc654
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_blocking_page.cc361
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_blocking_page.h135
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database.cc1283
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database.h322
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_unittest.cc652
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_service.cc567
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_service.h279
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_util.cc630
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_util.h324
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_util_unittest.cc260
23 files changed, 8338 insertions, 0 deletions
diff --git a/chrome/browser/safe_browsing/bloom_filter.cc b/chrome/browser/safe_browsing/bloom_filter.cc
new file mode 100644
index 0000000..a3e8699
--- /dev/null
+++ b/chrome/browser/safe_browsing/bloom_filter.cc
@@ -0,0 +1,86 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "chrome/browser/safe_browsing/bloom_filter.h"
+
+#include <windows.h>
+
+
+BloomFilter::BloomFilter(int bit_size) {
+ byte_size_ = bit_size / 8 + 1;
+ bit_size_ = byte_size_ * 8;
+ data_.reset(new char[byte_size_]);
+ ZeroMemory(data_.get(), byte_size_);
+}
+
+BloomFilter::BloomFilter(char* data, int size) {
+ byte_size_ = size;
+ bit_size_ = byte_size_ * 8;
+ data_.reset(data);
+}
+
+BloomFilter::~BloomFilter() {
+}
+
+void BloomFilter::Insert(int hash_int) {
+ uint32 hash;
+ memcpy(&hash, &hash_int, sizeof(hash));
+ for (int i = 0; i < 4; ++i) {
+ hash = RotateLeft(hash);
+ uint32 index = hash % bit_size_;
+
+ int byte = index / 8;
+ int bit = index % 8;
+ data_.get()[byte] |= 1 << bit;
+ }
+}
+
+bool BloomFilter::Exists(int hash_int) const {
+ uint32 hash;
+ memcpy(&hash, &hash_int, sizeof(hash));
+ for (int i = 0; i < 4; ++i) {
+ hash = RotateLeft(hash);
+ uint32 index = hash % bit_size_;
+
+ int byte = index / 8;
+ int bit = index % 8;
+ char data = data_.get()[byte];
+ if (!(data & (1 << bit)))
+ return false;
+ }
+
+ return true;
+}
+
+uint32 BloomFilter::RotateLeft(uint32 hash) {
+ uint32 left_byte = hash >> 24;
+ hash = hash << 8;
+ hash |= left_byte;
+ return hash;
+}
diff --git a/chrome/browser/safe_browsing/bloom_filter.h b/chrome/browser/safe_browsing/bloom_filter.h
new file mode 100644
index 0000000..67b05dc
--- /dev/null
+++ b/chrome/browser/safe_browsing/bloom_filter.h
@@ -0,0 +1,58 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// A simple bloom filter. It's currently limited to four hashing functions,
+// which are calculated from the item's hash.
+
+#include "base/scoped_ptr.h"
+#include "base/basictypes.h"
+
+class BloomFilter {
+ public:
+ // Constructs an empty filter with the given size.
+ BloomFilter(int bit_size);
+
+ // Constructs a filter from serialized data. This object owns the memory
+ // and will delete it on destruction.
+ BloomFilter(char* data, int size);
+ ~BloomFilter();
+
+ void Insert(int hash);
+ bool Exists(int hash) const;
+
+ const char* data() const { return data_.get(); }
+ int size() const { return byte_size_; }
+
+ private:
+ static uint32 RotateLeft(uint32 hash);
+
+ int byte_size_; // size in bytes
+ int bit_size_; // size in bits
+ scoped_array<char> data_;
+};
diff --git a/chrome/browser/safe_browsing/bloom_filter_unittest.cc b/chrome/browser/safe_browsing/bloom_filter_unittest.cc
new file mode 100644
index 0000000..fe0ad7c
--- /dev/null
+++ b/chrome/browser/safe_browsing/bloom_filter_unittest.cc
@@ -0,0 +1,103 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+#include "chrome/browser/safe_browsing/bloom_filter.h"
+
+#include <set>
+
+#include "base/logging.h"
+#include "base/string_util.h"
+#include "base/win_util.h"
+#include "chrome/common/rand_util.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+
+uint32 GenHash() {
+ return static_cast<uint32>(rand_util::RandIntSecure(0, kint32max));
+}
+
+}
+
+TEST(SafeBrowsing, BloomFilter) {
+ // rand_util isn't random enough on Win2K, see bug 1076619.
+ if (win_util::GetWinVersion() == win_util::WINVERSION_2000)
+ return;
+
+ // Use a small number for unit test so it's not slow.
+ int count = 1000;//100000;
+
+ // Build up the bloom filter.
+ BloomFilter filter(count * 10);
+
+ typedef std::set<int> Values;
+ Values values;
+ for (int i = 0; i < count; ++i) {
+ uint32 value = GenHash();
+ values.insert(value);
+ filter.Insert(value);
+ }
+
+ // Check serialization works.
+ char* data_copy = new char[filter.size()];
+ memcpy(data_copy, filter.data(), filter.size());
+ BloomFilter filter_copy(data_copy, filter.size());
+
+ // Check no false negatives by ensuring that every time we inserted exists.
+ for (Values::iterator i = values.begin(); i != values.end(); ++i) {
+ EXPECT_TRUE(filter_copy.Exists(*i));
+ }
+
+ // Check false positive error rate by checking the same number of items that
+ // we inserted, but of different values, and calculating what percentage are
+ // "found".
+ uint32 found_count = 0;
+ uint32 checked = 0;
+ while (true) {
+ uint32 value = GenHash();
+ if (values.find(value) != values.end())
+ continue;
+
+ if (filter_copy.Exists(value))
+ found_count++;
+
+ checked ++;
+ if (checked == count)
+ break;
+ }
+
+ // The FP rate should be 1.2%. Keep a large margin of error because we don't
+ // want to fail this test because we happened to randomly pick a lot of FPs.
+ double fp_rate = found_count * 100.0 / count;
+ CHECK(fp_rate < 5.0);
+
+ LOG(INFO) << "For safe browsing bloom filter of size " << count <<
+ ", the FP rate was " << fp_rate << " %";
+}
diff --git a/chrome/browser/safe_browsing/chunk_range.cc b/chrome/browser/safe_browsing/chunk_range.cc
new file mode 100644
index 0000000..201504f
--- /dev/null
+++ b/chrome/browser/safe_browsing/chunk_range.cc
@@ -0,0 +1,122 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Implementation of ChunkRange class.
+
+#include "chrome/browser/safe_browsing/chunk_range.h"
+
+#include "base/logging.h"
+#include "base/string_util.h"
+
+ChunkRange::ChunkRange(int start) : start_(start), stop_(start) {
+}
+
+ChunkRange::ChunkRange(int start, int stop) : start_(start), stop_(stop) {
+}
+
+ChunkRange::ChunkRange(const ChunkRange& rhs)
+ : start_(rhs.start()), stop_(rhs.stop()) {
+}
+
+// Helper functions -----------------------------------------------------------
+
+// Traverse the chunks vector looking for contiguous integers.
+void ChunksToRanges(const std::vector<int>& chunks,
+ std::vector<ChunkRange>* ranges) {
+ DCHECK(ranges);
+ for (size_t i = 0; i < chunks.size(); ++i) {
+ int start = static_cast<int>(i);
+ int next = start + 1;
+ while (next < static_cast<int>(chunks.size()) &&
+ (chunks[start] == chunks[next] - 1 ||
+ chunks[start] == chunks[next])) {
+ ++start;
+ ++next;
+ }
+ ranges->push_back(ChunkRange(chunks[i], chunks[start]));
+ if (next >= static_cast<int>(chunks.size()))
+ break;
+ i = start;
+ }
+}
+
+void RangesToChunks(const std::vector<ChunkRange>& ranges,
+ std::vector<int>* chunks) {
+ DCHECK(chunks);
+ for (size_t i = 0; i < ranges.size(); ++i) {
+ const ChunkRange& range = ranges[i];
+ for (int chunk = range.start(); chunk <= range.stop(); ++chunk) {
+ chunks->push_back(chunk);
+ }
+ }
+}
+
+void RangesToString(const std::vector<ChunkRange>& ranges,
+ std::string* result) {
+ DCHECK(result);
+ result->clear();
+ std::vector<ChunkRange>::const_iterator it = ranges.begin();
+ for (; it != ranges.end(); ++it) {
+ if (!result->empty())
+ result->append(",");
+ if (it->start() == it->stop()) {
+ char num_buf[11]; // One 32 bit positive integer + NULL.
+ _itoa_s(it->start(), num_buf, sizeof(num_buf), 10);
+ result->append(num_buf);
+ } else {
+ result->append(StringPrintf("%d-%d", it->start(), it->stop()));
+ }
+ }
+}
+
+bool StringToRanges(const std::string& input,
+ std::vector<ChunkRange>* ranges) {
+ DCHECK(ranges);
+
+ // Crack the string into chunk parts, then crack each part looking for a range.
+ std::vector<std::string> chunk_parts;
+ SplitString(input, ',', &chunk_parts);
+
+ for (size_t i = 0; i < chunk_parts.size(); ++i) {
+ std::vector<std::string> chunk_ranges;
+ SplitString(chunk_parts[i], '-', &chunk_ranges);
+ int start = atoi(chunk_ranges[0].c_str());
+ int stop = start;
+ if (chunk_ranges.size() == 2)
+ stop = atoi(chunk_ranges[1].c_str());
+ if (start == 0 || stop == 0) {
+ // atoi error, since chunk numbers are guaranteed to never be 0.
+ ranges->clear();
+ return false;
+ }
+ ranges->push_back(ChunkRange(start, stop));
+ }
+
+ return true;
+} \ No newline at end of file
diff --git a/chrome/browser/safe_browsing/chunk_range.h b/chrome/browser/safe_browsing/chunk_range.h
new file mode 100644
index 0000000..4f4567b
--- /dev/null
+++ b/chrome/browser/safe_browsing/chunk_range.h
@@ -0,0 +1,89 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Class for parsing lists of integers into ranges.
+//
+// The anti-phishing and anti-malware protocol sends ASCII strings of numbers
+// and ranges of numbers corresponding to chunks of whitelists and blacklists.
+// Clients of this protocol need to be able to convert back and forth between
+// this representation, and individual integer chunk numbers. The ChunkRange
+// class is a simple and compact mechanism for storing a continuous list of
+// chunk numbers.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_CHUNK_RANGE_H__
+#define CHROME_BROWSER_SAFE_BROWSING_CHUNK_RANGE_H__
+
+#include <string>
+#include <vector>
+
+// ChunkRange ------------------------------------------------------------------
+// Each ChunkRange represents a continuous range of chunk numbers [start, stop].
+
+class ChunkRange {
+ public:
+ ChunkRange(int start);
+ ChunkRange(int start, int stop);
+ ChunkRange(const ChunkRange& rhs);
+
+ inline int start() const { return start_; }
+ inline int stop() const { return stop_; }
+
+ bool operator==(const ChunkRange& rhs) const {
+ return start_ == rhs.start() && stop_ == rhs.stop();
+ }
+
+ private:
+ int start_;
+ int stop_;
+};
+
+
+// Helper functions ------------------------------------------------------------
+
+// Convert a series of chunk numbers into a more compact range representation.
+// The 'chunks' vector must be sorted in ascending order.
+void ChunksToRanges(const std::vector<int>& chunks,
+ std::vector<ChunkRange>* ranges);
+
+// Convert a set of ranges into individual chunk numbers.
+void RangesToChunks(const std::vector<ChunkRange>& ranges,
+ std::vector<int>* chunks);
+
+// Convert a series of chunk ranges into a string in protocol format.
+void RangesToString(const std::vector<ChunkRange>& ranges,
+ std::string* result);
+
+// Returns 'true' if the string was successfully converted to ChunkRanges,
+// 'false' if the input was malformed.
+// The string must be in the form: "1-100,398,415,1138-2001,2019".
+bool StringToRanges(const std::string& input,
+ std::vector<ChunkRange>* ranges);
+
+
+#endif // CHROME_BROWSER_SAFE_BROWSING_CHUNK_RANGE_H__ \ No newline at end of file
diff --git a/chrome/browser/safe_browsing/chunk_range_unittest.cc b/chrome/browser/safe_browsing/chunk_range_unittest.cc
new file mode 100644
index 0000000..675de33
--- /dev/null
+++ b/chrome/browser/safe_browsing/chunk_range_unittest.cc
@@ -0,0 +1,202 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Test program to convert lists of integers into ranges, and vice versa.
+
+#include "base/logging.h"
+#include "chunk_range.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+// Test formatting chunks into a string representation.
+TEST(SafeBrowsingChunkRangeTest, TestRangesToString) {
+ std::vector<ChunkRange> ranges;
+ ranges.push_back(ChunkRange(1, 10));
+ ranges.push_back(ChunkRange(15, 17));
+ ranges.push_back(ChunkRange(21, 410));
+ ranges.push_back(ChunkRange(991, 1000));
+
+ std::string range_string;
+ RangesToString(ranges, &range_string);
+ EXPECT_EQ(range_string, "1-10,15-17,21-410,991-1000");
+ ranges.clear();
+
+ ranges.push_back(ChunkRange(4, 4));
+ RangesToString(ranges, &range_string);
+ EXPECT_EQ(range_string, "4");
+
+ ranges.push_back(ChunkRange(7));
+ ranges.push_back(ChunkRange(9));
+ RangesToString(ranges, &range_string);
+ EXPECT_EQ(range_string, "4,7,9");
+
+ ranges.push_back(ChunkRange(42, 99));
+ RangesToString(ranges, &range_string);
+ EXPECT_EQ(range_string, "4,7,9,42-99");
+}
+
+
+// Test various configurations of chunk numbers.
+TEST(SafeBrowsingChunkRangeTest, TestChunksToRanges) {
+ std::vector<int> chunks;
+ std::vector<ChunkRange> ranges;
+
+ // Test one chunk range and one single value.
+ chunks.push_back(1);
+ chunks.push_back(2);
+ chunks.push_back(3);
+ chunks.push_back(4);
+ chunks.push_back(7);
+ ChunksToRanges(chunks, &ranges);
+ EXPECT_EQ(ranges.size(), 2);
+ EXPECT_EQ(ranges[0].start(), 1);
+ EXPECT_EQ(ranges[0].stop(), 4);
+ EXPECT_EQ(ranges[1].start(), 7);
+ EXPECT_EQ(ranges[1].stop(), 7);
+
+ chunks.clear();
+ ranges.clear();
+
+ // Test all chunk numbers in one range.
+ chunks.push_back(3);
+ chunks.push_back(4);
+ chunks.push_back(5);
+ chunks.push_back(6);
+ chunks.push_back(7);
+ chunks.push_back(8);
+ chunks.push_back(9);
+ chunks.push_back(10);
+ ChunksToRanges(chunks, &ranges);
+ EXPECT_EQ(ranges.size(), 1);
+ EXPECT_EQ(ranges[0].start(), 3);
+ EXPECT_EQ(ranges[0].stop(), 10);
+
+ chunks.clear();
+ ranges.clear();
+
+ // Test no chunk numbers in contiguous ranges.
+ chunks.push_back(3);
+ chunks.push_back(5);
+ chunks.push_back(7);
+ chunks.push_back(9);
+ chunks.push_back(11);
+ chunks.push_back(13);
+ chunks.push_back(15);
+ chunks.push_back(17);
+ ChunksToRanges(chunks, &ranges);
+ EXPECT_EQ(ranges.size(), 8);
+
+ chunks.clear();
+ ranges.clear();
+
+ // Test a single chunk number.
+ chunks.push_back(17);
+ ChunksToRanges(chunks, &ranges);
+ EXPECT_EQ(ranges.size(), 1);
+ EXPECT_EQ(ranges[0].start(), 17);
+ EXPECT_EQ(ranges[0].stop(), 17);
+
+ chunks.clear();
+ ranges.clear();
+
+ // Test duplicates.
+ chunks.push_back(1);
+ chunks.push_back(2);
+ chunks.push_back(2);
+ chunks.push_back(2);
+ chunks.push_back(3);
+ chunks.push_back(7);
+ chunks.push_back(7);
+ chunks.push_back(7);
+ chunks.push_back(7);
+ ChunksToRanges(chunks, &ranges);
+ EXPECT_EQ(ranges.size(), 2);
+ EXPECT_EQ(ranges[0].start(), 1);
+ EXPECT_EQ(ranges[0].stop(), 3);
+ EXPECT_EQ(ranges[1].start(), 7);
+ EXPECT_EQ(ranges[1].stop(), 7);
+}
+
+
+TEST(SafeBrowsingChunkRangeTest, TestStringToRanges) {
+ std::vector<ChunkRange> ranges;
+
+ std::string input = "1-100,398,415,1138-2001,2019";
+ EXPECT_TRUE(StringToRanges(input, &ranges));
+ EXPECT_EQ(ranges.size(), 5);
+ EXPECT_EQ(ranges[0].start(), 1);
+ EXPECT_EQ(ranges[0].stop(), 100);
+ EXPECT_EQ(ranges[1].start(), 398);
+ EXPECT_EQ(ranges[1].stop(), 398);
+ EXPECT_EQ(ranges[3].start(), 1138);
+ EXPECT_EQ(ranges[3].stop(), 2001);
+
+ ranges.clear();
+
+ input = "1,2,3,4,5,6,7";
+ EXPECT_TRUE(StringToRanges(input, &ranges));
+ EXPECT_EQ(ranges.size(), 7);
+
+ ranges.clear();
+
+ input = "300-3001";
+ EXPECT_TRUE(StringToRanges(input, &ranges));
+ EXPECT_EQ(ranges.size(), 1);
+ EXPECT_EQ(ranges[0].start(), 300);
+ EXPECT_EQ(ranges[0].stop(), 3001);
+
+ ranges.clear();
+
+ input = "17";
+ EXPECT_TRUE(StringToRanges(input, &ranges));
+ EXPECT_EQ(ranges.size(), 1);
+ EXPECT_EQ(ranges[0].start(), 17);
+ EXPECT_EQ(ranges[0].stop(), 17);
+
+ ranges.clear();
+
+ input = "x-y";
+ EXPECT_FALSE(StringToRanges(input, &ranges));
+}
+
+
+TEST(SafeBrowsingChunkRangeTest, TestRangesToChunks) {
+ std::vector<ChunkRange> ranges;
+ ranges.push_back(ChunkRange(1, 4));
+ ranges.push_back(ChunkRange(17));
+
+ std::vector<int> chunks;
+ RangesToChunks(ranges, &chunks);
+
+ EXPECT_EQ(chunks.size(), 5);
+ EXPECT_EQ(chunks[0], 1);
+ EXPECT_EQ(chunks[1], 2);
+ EXPECT_EQ(chunks[2], 3);
+ EXPECT_EQ(chunks[3], 4);
+ EXPECT_EQ(chunks[4], 17);
+} \ No newline at end of file
diff --git a/chrome/browser/safe_browsing/database_perftest.cc b/chrome/browser/safe_browsing/database_perftest.cc
new file mode 100644
index 0000000..ee61c38
--- /dev/null
+++ b/chrome/browser/safe_browsing/database_perftest.cc
@@ -0,0 +1,565 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <set>
+
+#include "base/file_util.h"
+#include "base/logging.h"
+#include "base/path_service.h"
+#include "base/perftimer.h"
+#include "base/string_util.h"
+#include "chrome/browser/safe_browsing/safe_browsing_database.h"
+#include "chrome/common/chrome_paths.h"
+#include "chrome/common/sqlite_compiled_statement.h"
+#include "chrome/common/sqlite_utils.h"
+#include "chrome/test/test_file_util.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+// These tests are slow, especially the ones that create databases. So disable
+// them by default.
+//#define SAFE_BROWSING_DATABASE_TESTS_ENABLED
+#ifdef SAFE_BROWSING_DATABASE_TESTS_ENABLED
+
+namespace {
+
+// Base class for a safebrowsing database. Derived classes can implement
+// different types of tables to compare performance characteristics.
+class Database {
+ public:
+ Database() : db_(NULL) {
+ }
+
+ ~Database() {
+ if (db_) {
+ statement_cache_.Cleanup();
+ sqlite3_close(db_);
+ db_ = NULL;
+ }
+ }
+
+ bool Init(const std::string& name, bool create) {
+ // get an empty file for the test DB
+ std::wstring filename;
+ PathService::Get(base::DIR_TEMP, &filename);
+ filename.push_back(file_util::kPathSeparator);
+ filename.append(ASCIIToWide(name));
+
+ if (create) {
+ DeleteFile(filename.c_str());
+ } else {
+ DLOG(INFO) << "evicting " << name << " ...";
+ file_util::EvictFileFromSystemCache(filename.c_str());
+ DLOG(INFO) << "... evicted";
+ }
+
+ if (sqlite3_open(WideToUTF8(filename).c_str(), &db_) != SQLITE_OK)
+ return false;
+
+ statement_cache_.set_db(db_);
+
+ if (!create)
+ return true;
+
+ return CreateTable();
+ }
+
+ virtual bool CreateTable() = 0;
+ virtual bool Add(int host_key, int* prefixes, int count) = 0;
+ virtual bool Read(int host_key, int* prefixes, int size, int* count) = 0;
+ virtual int Count() = 0;
+ virtual std::string GetDBSuffix() = 0;
+
+ sqlite3* db() { return db_; }
+
+ protected:
+ // The database connection.
+ sqlite3* db_;
+
+ // Cache of compiled statements for our database.
+ SqliteStatementCache statement_cache_;
+};
+
+class SimpleDatabase : public Database {
+ public:
+ virtual bool CreateTable() {
+ if (DoesSqliteTableExist(db_, "hosts"))
+ return false;
+
+ return sqlite3_exec(db_, "CREATE TABLE hosts ("
+ "host INTEGER,"
+ "prefixes BLOB)",
+ NULL, NULL, NULL) == SQLITE_OK;
+ }
+
+ virtual bool Add(int host_key, int* prefixes, int count) {
+ SQLITE_UNIQUE_STATEMENT(statement, statement_cache_,
+ "INSERT OR REPLACE INTO hosts"
+ "(host,prefixes)"
+ "VALUES (?,?)");
+ if (!statement.is_valid())
+ return false;
+
+ statement->bind_int(0, host_key);
+ statement->bind_blob(1, prefixes, count*sizeof(int));
+ return statement->step() == SQLITE_DONE;
+ }
+
+ virtual bool Read(int host_key, int* prefixes, int size, int* count) {
+ SQLITE_UNIQUE_STATEMENT(statement, statement_cache_,
+ "SELECT host, prefixes FROM hosts WHERE host=?");
+ if (!statement.is_valid())
+ return false;
+
+ statement->bind_int(0, host_key);
+
+ int rv = statement->step();
+ if (rv == SQLITE_DONE) {
+ // no hostkey found, not an error
+ *count = -1;
+ return true;
+ }
+
+ if (rv != SQLITE_ROW)
+ return false;
+
+ *count = statement->column_bytes(1);
+ if (*count > size)
+ return false;
+
+ memcpy(prefixes, statement->column_blob(0), *count);
+ return true;
+ }
+
+ int Count() {
+ SQLITE_UNIQUE_STATEMENT(statement, statement_cache_,
+ "SELECT COUNT(*) FROM hosts");
+ if (!statement.is_valid()) {
+ EXPECT_TRUE(false);
+ return -1;
+ }
+
+ if (statement->step() != SQLITE_ROW) {
+ EXPECT_TRUE(false);
+ return -1;
+ }
+
+ return statement->column_int(0);
+ }
+
+ std::string GetDBSuffix() {
+ return "Simple";
+ }
+};
+
+class IndexedDatabase : public SimpleDatabase {
+ public:
+ virtual bool CreateTable() {
+ return sqlite3_exec(db_, "CREATE TABLE hosts ("
+ "host INTEGER PRIMARY KEY,"
+ "prefixes BLOB)",
+ NULL, NULL, NULL) == SQLITE_OK;
+ }
+
+ std::string GetDBSuffix() {
+ return "Indexed";
+ }
+};
+
+class IndexedWithIDDatabase : public SimpleDatabase {
+ public:
+ virtual bool CreateTable() {
+ return sqlite3_exec(db_, "CREATE TABLE hosts ("
+ "id INTEGER PRIMARY KEY AUTOINCREMENT,"
+ "host INTEGER UNIQUE,"
+ "prefixes BLOB)",
+ NULL, NULL, NULL) == SQLITE_OK;
+ }
+
+ virtual bool Add(int host_key, int* prefixes, int count) {
+ SQLITE_UNIQUE_STATEMENT(statement, statement_cache_,
+ "INSERT OR REPLACE INTO hosts"
+ "(id,host,prefixes)"
+ "VALUES (NULL,?,?)");
+ if (!statement.is_valid())
+ return false;
+
+ statement->bind_int(0, host_key);
+ statement->bind_blob(1, prefixes, count * sizeof(int));
+ return statement->step() == SQLITE_DONE;
+ }
+
+ std::string GetDBSuffix() {
+ return "IndexedWithID";
+ }
+};
+
+}
+
+class SafeBrowsing: public testing::Test {
+ protected:
+ // Get the test parameters from the test case's name.
+ virtual void SetUp() {
+ logging::InitLogging(
+ NULL, logging::LOG_ONLY_TO_SYSTEM_DEBUG_LOG,
+ logging::LOCK_LOG_FILE,
+ logging::DELETE_OLD_LOG_FILE);
+
+ const testing::TestInfo* const test_info =
+ testing::UnitTest::GetInstance()->current_test_info();
+ std::string test_name = test_info->name();
+
+ TestType type;
+ if (test_name.find("Write") != std::string::npos) {
+ type = WRITE;
+ } else if (test_name.find("Read") != std::string::npos) {
+ type = READ;
+ } else {
+ type = COUNT;
+ }
+
+ if (test_name.find("IndexedWithID") != std::string::npos) {
+ db_ = new IndexedWithIDDatabase();
+ } else if (test_name.find("Indexed") != std::string::npos) {
+ db_ = new IndexedDatabase();
+ } else {
+ db_ = new SimpleDatabase();
+ }
+
+
+ char multiplier_letter = test_name[test_name.size() - 1];
+ int multiplier = 0;
+ if (multiplier_letter == 'K') {
+ multiplier = 1000;
+ } else if (multiplier_letter == 'M') {
+ multiplier = 1000000;
+ } else {
+ NOTREACHED();
+ }
+
+ size_t index = test_name.size() - 1;
+ while (index != 0 && test_name[index] != '_')
+ index--;
+
+ DCHECK(index);
+ const char* count_start = test_name.c_str() + ++index;
+ int count = atoi(count_start);
+ int size = count * multiplier;
+
+ db_name_ = StringPrintf("TestSafeBrowsing");
+ db_name_.append(count_start);
+ db_name_.append(db_->GetDBSuffix());
+
+ ASSERT_TRUE(db_->Init(db_name_, type == WRITE));
+
+ if (type == WRITE) {
+ WriteEntries(size);
+ } else if (type == READ) {
+ ReadEntries(100);
+ } else {
+ CountEntries();
+ }
+ }
+
+ virtual void TearDown() {
+ delete db_;
+ }
+
+ // This writes the given number of entries to the database.
+ void WriteEntries(int count) {
+ int prefixes[4];
+
+ SQLTransaction transaction(db_->db());
+ transaction.Begin();
+
+ int inc = kint32max / count;
+ for (int i = 0; i < count; i++) {
+ int hostkey;
+ rand_s((unsigned int*)&hostkey);
+ ASSERT_TRUE(db_->Add(hostkey, prefixes, 1));
+ }
+
+ transaction.Commit();
+ }
+
+ // Read the given number of entries from the database.
+ void ReadEntries(int count) {
+ int prefixes[4];
+
+ int64 total_ms = 0;
+
+ for (int i = 0; i < count; ++i) {
+ int key;
+ rand_s((unsigned int*)&key);
+
+ PerfTimer timer;
+
+ int read;
+ ASSERT_TRUE(db_->Read(key, prefixes, sizeof(prefixes), &read));
+
+ int64 time_ms = timer.Elapsed().InMilliseconds();
+ total_ms += time_ms;
+ DLOG(INFO) << "Read in " << time_ms << " ms.";
+ }
+
+ DLOG(INFO) << db_name_ << " read " << count << " entries in average of " <<
+ total_ms/count << " ms.";
+ }
+
+ // Counts how many entries are in the database, which effectively does a full
+ // table scan.
+ void CountEntries() {
+ PerfTimer timer;
+
+ int count = db_->Count();
+
+ DLOG(INFO) << db_name_ << " counted " << count << " entries in " <<
+ timer.Elapsed().InMilliseconds() << " ms";
+ }
+
+ enum TestType {
+ WRITE,
+ READ,
+ COUNT,
+ };
+
+ private:
+
+ Database* db_;
+ std::string db_name_;
+};
+
+TEST_F(SafeBrowsing, Write_100K) {
+}
+
+TEST_F(SafeBrowsing, Read_100K) {
+}
+
+TEST_F(SafeBrowsing, WriteIndexed_100K) {
+}
+
+TEST_F(SafeBrowsing, ReadIndexed_100K) {
+}
+
+TEST_F(SafeBrowsing, WriteIndexed_250K) {
+}
+
+TEST_F(SafeBrowsing, ReadIndexed_250K) {
+}
+
+TEST_F(SafeBrowsing, WriteIndexed_500K) {
+}
+
+TEST_F(SafeBrowsing, ReadIndexed_500K) {
+}
+
+TEST_F(SafeBrowsing, ReadIndexedWithID_250K) {
+}
+
+TEST_F(SafeBrowsing, WriteIndexedWithID_250K) {
+}
+
+TEST_F(SafeBrowsing, ReadIndexedWithID_500K) {
+}
+
+TEST_F(SafeBrowsing, WriteIndexedWithID_500K) {
+}
+
+TEST_F(SafeBrowsing, CountIndexed_250K) {
+}
+
+TEST_F(SafeBrowsing, CountIndexed_500K) {
+}
+
+TEST_F(SafeBrowsing, CountIndexedWithID_250K) {
+}
+
+TEST_F(SafeBrowsing, CountIndexedWithID_500K) {
+}
+
+
+class SafeBrowsingDatabaseTest {
+ public:
+ SafeBrowsingDatabaseTest(const std::wstring& name) {
+ logging::InitLogging(
+ NULL, logging::LOG_ONLY_TO_SYSTEM_DEBUG_LOG,
+ logging::LOCK_LOG_FILE,
+ logging::DELETE_OLD_LOG_FILE);
+
+ PathService::Get(base::DIR_TEMP, &filename_);
+ filename_.push_back(file_util::kPathSeparator);
+ filename_.append(name);
+ }
+
+ void Create(int size) {
+ DeleteFile(filename_.c_str());
+
+ SafeBrowsingDatabase database;
+ database.set_synchronous();
+ EXPECT_TRUE(database.Init(filename_));
+
+ int chunk_id = 0;
+ int total_host_keys = size;
+ int host_keys_per_chunk = 100;
+
+ std::deque<SBChunk>* chunks = new std::deque<SBChunk>;
+
+ for (int i = 0; i < total_host_keys / host_keys_per_chunk; ++i) {
+ chunks->push_back(SBChunk());
+ chunks->back().chunk_number = ++chunk_id;
+
+ for (int j = 0; j < host_keys_per_chunk; ++j) {
+ SBChunkHost host;
+ rand_s((unsigned int*)&host.host);
+ host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2);
+ host.entry->SetPrefixAt(0, 0x2425525);
+ host.entry->SetPrefixAt(1, 0x1536366);
+
+ chunks->back().hosts.push_back(host);
+ }
+ }
+
+ database.InsertChunks("goog-malware", chunks);
+ }
+
+ void Read(bool use_bloom_filter) {
+ int keys_to_read = 500;
+ file_util::EvictFileFromSystemCache(filename_.c_str());
+
+ SafeBrowsingDatabase database;
+ database.set_synchronous();
+ EXPECT_TRUE(database.Init(filename_));
+
+ PerfTimer total_timer;
+ int64 db_ms = 0;
+ int keys_from_db = 0;
+ for (int i = 0; i < keys_to_read; ++i) {
+ int key;
+ rand_s((unsigned int*)&key);
+
+ std::string url = StringPrintf("http://www.%d.com/blah.html", key);
+
+ std::string matching_list;
+ std::vector<SBPrefix> prefix_hits;
+ GURL gurl(url);
+ if (!use_bloom_filter || database.NeedToCheckUrl(gurl)) {
+ PerfTimer timer;
+ database.ContainsUrl(gurl, &matching_list, &prefix_hits);
+
+ int64 time_ms = timer.Elapsed().InMilliseconds();
+
+ DLOG(INFO) << "Read from db in " << time_ms << " ms.";
+
+ db_ms += time_ms;
+ keys_from_db++;
+ }
+ }
+
+ int64 total_ms = total_timer.Elapsed().InMilliseconds();
+
+ DLOG(INFO) << WideToASCII(file_util::GetFilenameFromPath(filename_)) <<
+ " read " << keys_to_read << " entries in " << total_ms << " ms. " <<
+ keys_from_db << " keys were read from the db, with average read taking " <<
+ db_ms / keys_from_db << " ms";
+ }
+
+ void BuildBloomFilter() {
+ file_util::EvictFileFromSystemCache(filename_.c_str());
+ file_util::Delete(SafeBrowsingDatabase::BloomFilterFilename(filename_), false);
+
+ PerfTimer total_timer;
+
+ SafeBrowsingDatabase database;
+ database.set_synchronous();
+ EXPECT_TRUE(database.Init(filename_));
+
+ int64 total_ms = total_timer.Elapsed().InMilliseconds();
+
+ DLOG(INFO) << WideToASCII(file_util::GetFilenameFromPath(filename_)) <<
+ " built bloom filter in " << total_ms << " ms.";
+ }
+
+ private:
+ std::wstring filename_;
+};
+
+// Adds 100K host records.
+TEST(SafeBrowsingDatabase, FillUp100K) {
+ SafeBrowsingDatabaseTest db(L"SafeBrowsing100K");
+ db.Create(100000);
+}
+
+// Adds 250K host records.
+TEST(SafeBrowsingDatabase, FillUp250K) {
+ SafeBrowsingDatabaseTest db(L"SafeBrowsing250K");
+ db.Create(250000);
+}
+
+// Adds 500K host records.
+TEST(SafeBrowsingDatabase, FillUp500K) {
+ SafeBrowsingDatabaseTest db(L"SafeBrowsing500K");
+ db.Create(500000);
+}
+
+// Reads 500 entries and prints the timing.
+TEST(SafeBrowsingDatabase, ReadFrom250K) {
+ SafeBrowsingDatabaseTest db(L"SafeBrowsing250K");
+ db.Read(false);
+}
+
+TEST(SafeBrowsingDatabase, ReadFrom500K) {
+ SafeBrowsingDatabaseTest db(L"SafeBrowsing500K");
+ db.Read(false);
+}
+
+// Read 500 entries with a bloom filter and print the timing.
+TEST(SafeBrowsingDatabase, BloomReadFrom250K) {
+ SafeBrowsingDatabaseTest db(L"SafeBrowsing250K");
+ db.Read(true);
+}
+
+TEST(SafeBrowsingDatabase, BloomReadFrom500K) {
+ SafeBrowsingDatabaseTest db(L"SafeBrowsing500K");
+ db.Read(true);
+}
+
+// Test how long bloom filter creation takes.
+TEST(SafeBrowsingDatabase, BuildBloomFilter250K) {
+ SafeBrowsingDatabaseTest db(L"SafeBrowsing250K");
+ db.BuildBloomFilter();
+}
+
+TEST(SafeBrowsingDatabase, BuildBloomFilter500K) {
+ SafeBrowsingDatabaseTest db(L"SafeBrowsing500K");
+ db.BuildBloomFilter();
+}
+
+#endif \ No newline at end of file
diff --git a/chrome/browser/safe_browsing/protocol_manager.cc b/chrome/browser/safe_browsing/protocol_manager.cc
new file mode 100644
index 0000000..086ceb9
--- /dev/null
+++ b/chrome/browser/safe_browsing/protocol_manager.cc
@@ -0,0 +1,607 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "chrome/browser/safe_browsing/protocol_manager.h"
+
+#include "base/histogram.h"
+#include "base/logging.h"
+#include "base/message_loop.h"
+#include "base/string_util.h"
+#include "base/task.h"
+#include "base/timer.h"
+#include "chrome/browser/profile.h"
+#include "chrome/browser/safe_browsing/protocol_parser.h"
+#include "chrome/browser/safe_browsing/safe_browsing_database.h"
+#include "chrome/browser/safe_browsing/safe_browsing_service.h"
+#include "chrome/common/env_util.h"
+#include "chrome/common/env_vars.h"
+#include "chrome/common/rand_util.h"
+#include "chrome/common/stl_util-inl.h"
+#include "net/base/base64.h"
+#include "net/base/load_flags.h"
+
+
+// Maximum time, in seconds, from start up before we must issue an update query.
+static const int kSbTimerStartIntervalSec = 300;
+
+// Update URL for querying about the latest set of chunk updates.
+static const char* const kSbUpdateUrl =
+ "http://safebrowsing.clients.google.com/safebrowsing/downloads?client=%s&appver=%d.%d&pver=2.1";
+
+// GetHash request URL for retrieving full hashes.
+static const char* const kSbGetHashUrl =
+ "http://safebrowsing.clients.google.com/safebrowsing/gethash?client=%s&appver=%d.%d&pver=2.1";
+
+// New MAC client key requests URL.
+static const char* const kSbNewKeyUrl =
+ "https://sb-ssl.google.com/safebrowsing/newkey?client=%s&appver=%d.%d&pver=2.1";
+
+// TODO(paulg): Change these values when we get close to launch.
+static const char* const kSbClientName = "googleclient";
+static const int kSbClientMajorVersion = 1;
+static const int kSbClientMinorVersion = 0;
+
+// Maximum back off multiplier.
+static const int kSbMaxBackOff = 8;
+
+
+// Periodic update task --------------------------------------------------------
+class SafeBrowsingProtocolUpdateTask : public Task {
+ public:
+ explicit SafeBrowsingProtocolUpdateTask(SafeBrowsingProtocolManager* manager)
+ : manager_(manager) {
+ }
+
+ void Run() {
+ manager_->GetNextUpdate();
+ }
+
+ private:
+ SafeBrowsingProtocolManager* manager_;
+};
+
+
+// SafeBrowsingProtocolManager implementation ----------------------------------
+
+SafeBrowsingProtocolManager::SafeBrowsingProtocolManager(
+ SafeBrowsingService* sb_service,
+ MessageLoop* notify_loop,
+ const std::string& client_key,
+ const std::string& wrapped_key)
+ : sb_service_(sb_service),
+ request_type_(NO_REQUEST),
+ update_error_count_(0),
+ gethash_error_count_(0),
+ update_back_off_mult_(1),
+ gethash_back_off_mult_(1),
+ next_update_sec_(-1),
+ update_state_(FIRST_REQUEST),
+ initial_request_(true),
+ chunk_pending_to_write_(false),
+ notify_loop_(notify_loop),
+ client_key_(client_key),
+ wrapped_key_(wrapped_key) {
+ // Set the backoff multiplier fuzz to a random value between 0 and 1.
+ back_off_fuzz_ = static_cast<float>(rand_util::RandInt(1, INT_MAX)) / INT_MAX;
+
+ // The first update must happen between 0-5 minutes of start up.
+ next_update_sec_ = rand_util::RandInt(60, kSbTimerStartIntervalSec);
+}
+
+SafeBrowsingProtocolManager::~SafeBrowsingProtocolManager() {
+ if (update_timer_.get())
+ MessageLoop::current()->timer_manager()->StopTimer(update_timer_.get());
+
+ // Delete in-progress SafeBrowsing requests.
+ STLDeleteContainerPairFirstPointers(hash_requests_.begin(),
+ hash_requests_.end());
+ hash_requests_.clear();
+}
+
+// Public API used by the SafeBrowsingService ----------------------------------
+
+// We can only have one update or chunk request outstanding, but there may be
+// multiple GetHash requests pending since we don't want to serialize them and
+// slow down the user.
+void SafeBrowsingProtocolManager::GetFullHash(
+ SafeBrowsingService::SafeBrowsingCheck* check,
+ const std::vector<SBPrefix>& prefixes) {
+ // If we are in GetHash backoff, we need to check if we're past the next
+ // allowed time. If we are, we can proceed with the request. If not, we are
+ // required to return empty results (i.e. treat the page as safe).
+ if (gethash_error_count_ && Time::Now() <= next_gethash_time_) {
+ std::vector<SBFullHashResult> full_hashes;
+ sb_service_->HandleGetHashResults(check, full_hashes);
+ return;
+ }
+
+ std::string url = StringPrintf(kSbGetHashUrl,
+ kSbClientName,
+ kSbClientMajorVersion,
+ kSbClientMinorVersion);
+ if (!client_key_.empty()) {
+ url.append("&wrkey=");
+ url.append(wrapped_key_);
+ }
+
+ GURL gethash_url(url);
+ URLFetcher* fetcher = new URLFetcher(gethash_url, URLFetcher::POST, this);
+ hash_requests_[fetcher] = check;
+
+ std::string get_hash;
+ SafeBrowsingProtocolParser parser;
+ parser.FormatGetHash(prefixes, &get_hash);
+
+ fetcher->set_load_flags(net::LOAD_DISABLE_CACHE);
+ fetcher->set_request_context(Profile::GetDefaultRequestContext());
+ fetcher->set_upload_data("text/plain", get_hash.data());
+ fetcher->Start();
+}
+
+void SafeBrowsingProtocolManager::GetNextUpdate() {
+ if (initial_request_) {
+ if (client_key_.empty() || wrapped_key_.empty()) {
+ IssueKeyRequest();
+ return;
+ } else {
+ initial_request_ = false;
+ }
+ }
+
+ if (!request_.get())
+ IssueUpdateRequest();
+}
+
+// URLFetcher::Delegate implementation -----------------------------------------
+
+// All SafeBrowsing request responses are handled here.
+// TODO(paulg): Clarify with the SafeBrowsing team whether a failed parse of a
+// chunk should retry the download and parse of that chunk (and
+// what back off / how many times to try), and if that effects the
+// update back off. For now, a failed parse of the chunk means we
+// drop it. This isn't so bad because the next UPDATE_REQUEST we
+// do will report all the chunks we have. If that chunk is still
+// required, the SafeBrowsing servers will tell us to get it again.
+void SafeBrowsingProtocolManager::OnURLFetchComplete(
+ const URLFetcher* source,
+ const GURL& url,
+ const URLRequestStatus& status,
+ int response_code,
+ const ResponseCookies& cookies,
+ const std::string& data) {
+ scoped_ptr<const URLFetcher> fetcher;
+ bool parsed_ok = true;
+ bool must_back_off = false; // Reduce SafeBrowsing service query frequency.
+
+ HashRequests::iterator it = hash_requests_.find(source);
+ if (it != hash_requests_.end()) {
+ // GetHash response.
+ fetcher.reset(it->first);
+ SafeBrowsingService::SafeBrowsingCheck* check = it->second;
+ std::vector<SBFullHashResult> full_hashes;
+ if (response_code == 200 || response_code == 204) {
+ gethash_error_count_ = 0;
+ gethash_back_off_mult_ = 1;
+ bool re_key = false;
+ SafeBrowsingProtocolParser parser;
+ parsed_ok = parser.ParseGetHash(data.data(),
+ static_cast<int>(data.length()),
+ client_key_,
+ &re_key,
+ &full_hashes);
+ if (!parsed_ok) {
+ // If we fail to parse it, we must still inform the SafeBrowsingService
+ // so that it doesn't hold up the user's request indefinitely. Not sure
+ // what to do at that point though!
+ full_hashes.clear();
+ } else {
+ if (re_key)
+ HandleReKey();
+ }
+ } else if (response_code >= 300) {
+ HandleGetHashError();
+ SB_DLOG(INFO) << "SafeBrowsing GetHash request for: " << source->url()
+ << ", failed with error: " << response_code;
+ }
+
+ // Call back the SafeBrowsingService with full_hashes, even if there was a
+ // parse error or an error response code (in which case full_hashes will be
+ // empty). We can't block the user regardless of the error status.
+ sb_service_->HandleGetHashResults(check, full_hashes);
+
+ hash_requests_.erase(it);
+ } else {
+ // Update, chunk or key response.
+ DCHECK(source == request_.get());
+ fetcher.reset(request_.release());
+
+ if (response_code == 200) {
+ // We have data from the SafeBrowsing service.
+ parsed_ok = HandleServiceResponse(source->url(),
+ data.data(),
+ static_cast<int>(data.length()));
+ if (!parsed_ok) {
+ SB_DLOG(INFO) << "SafeBrowsing request for: " << source->url()
+ << "failed parse.";
+ }
+
+ if (request_type_ == CHUNK_REQUEST) {
+ if (parsed_ok) {
+ chunk_request_urls_.pop_front();
+ } else {
+ chunk_request_urls_.clear();
+ }
+ } else if (request_type_ == GETKEY_REQUEST && initial_request_) {
+ // This is the first request we've made this session. Now that we have
+ // the keys, do the regular update request.
+ initial_request_ = false;
+ GetNextUpdate();
+ return;
+ }
+ } else if (response_code >= 300) {
+ // The SafeBrowsing service error: back off.
+ must_back_off = true;
+ if (request_type_ == CHUNK_REQUEST)
+ chunk_request_urls_.clear();
+ SB_DLOG(INFO) << "SafeBrowsing request for: " << source->url()
+ << ", failed with error: " << response_code;
+ }
+ }
+
+ // Schedule a new update request if we've finished retrieving all the chunks
+ // from the previous update. We treat the update request and the chunk URLs it
+ // contains as an atomic unit as far as back off is concerned.
+ if (chunk_request_urls_.empty() &&
+ (request_type_ == CHUNK_REQUEST || request_type_ == UPDATE_REQUEST))
+ ScheduleNextUpdate(must_back_off);
+
+ // Get the next chunk if available.
+ IssueChunkRequest();
+}
+
+bool SafeBrowsingProtocolManager::HandleServiceResponse(const GURL& url,
+ const char* data,
+ int length) {
+ SafeBrowsingProtocolParser parser;
+
+ switch (request_type_) {
+ case UPDATE_REQUEST: {
+ int next_update_sec = -1;
+ bool re_key = false;
+ bool reset = false;
+ std::vector<SBChunkDelete>* chunk_deletes =
+ new std::vector<SBChunkDelete>;
+ std::vector<ChunkUrl> chunk_urls;
+ if (!parser.ParseUpdate(data, length, client_key_,
+ &next_update_sec, &re_key,
+ &reset, chunk_deletes, &chunk_urls)) {
+ delete chunk_deletes;
+ return false;
+ }
+
+ last_update_ = Time::Now();
+
+ if (update_state_ == FIRST_REQUEST)
+ update_state_ = SECOND_REQUEST;
+ else if (update_state_ == SECOND_REQUEST)
+ update_state_ = NORMAL_REQUEST;
+
+ // New time for the next update.
+ if (next_update_sec > 0) {
+ next_update_sec_ = next_update_sec;
+ } else if (update_state_ == SECOND_REQUEST) {
+ next_update_sec_ = rand_util::RandInt(15, 45) * 60;
+ }
+
+ // We need to request a new set of keys for MAC.
+ if (re_key)
+ HandleReKey();
+
+ // New chunks to download.
+ if (!chunk_urls.empty()) {
+ for (size_t i = 0; i < chunk_urls.size(); ++i)
+ chunk_request_urls_.push_back(chunk_urls[i]);
+ }
+
+ // Handle the case were the SafeBrowsing service tells us to dump our
+ // database.
+ if (reset) {
+ sb_service_->ResetDatabase();
+ return true;
+ }
+
+ // Chunks to delete from our storage.
+ if (!chunk_deletes->empty())
+ sb_service_->HandleChunkDelete(chunk_deletes);
+
+ break;
+ }
+ case CHUNK_REQUEST: {
+ // Find list name from url.
+ std::string url_path = url.ExtractFileName();
+ if (url_path.empty())
+ return false;
+
+ std::string::size_type pos = url_path.find_first_of('_');
+ if (pos == std::string::npos)
+ return false;
+
+ const ChunkUrl chunk_url = chunk_request_urls_.front();
+ DCHECK(url.spec().find(chunk_url.url) != std::string::npos);
+
+ bool re_key = false;
+ std::deque<SBChunk>* chunks = new std::deque<SBChunk>;
+ if (!parser.ParseChunk(data, length,
+ client_key_, chunk_url.mac,
+ &re_key, chunks)) {
+#ifndef NDEBUG
+ std::string data_str;
+ data_str.assign(data, length);
+ std::string encoded_chunk;
+ Base64Encode(data, &encoded_chunk);
+ SB_DLOG(INFO) << "ParseChunk error for chunk: " << chunk_url.url
+ << ", client_key: " << client_key_
+ << ", wrapped_key: " << wrapped_key_
+ << ", mac: " << chunk_url.mac
+ << ", Base64Encode(data): " << encoded_chunk
+ << ", length: " << length;
+#endif
+ safe_browsing_util::FreeChunks(chunks);
+ delete chunks;
+ return false;
+ }
+
+ if (re_key)
+ HandleReKey();
+
+ if (chunks->empty()) {
+ delete chunks;
+ } else {
+ chunk_pending_to_write_ = true;
+ std::string list_name(url_path, 0, pos);
+ sb_service_->HandleChunk(list_name, chunks);
+ }
+
+ break;
+ }
+ case GETKEY_REQUEST: {
+ std::string client_key, wrapped_key;
+ if (!parser.ParseNewKey(data, length, &client_key, &wrapped_key))
+ return false;
+
+ client_key_ = client_key;
+ wrapped_key_ = wrapped_key;
+ notify_loop_->PostTask(FROM_HERE, NewRunnableMethod(
+ sb_service_, &SafeBrowsingService::OnNewMacKeys, client_key_,
+ wrapped_key_));
+ break;
+ }
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+void SafeBrowsingProtocolManager::Initialize() {
+ // Don't want to hit the safe browsing servers on build/chrome bots.
+ if (env_util::HasEnvironmentVariable(env_vars::kHeadless))
+ return;
+
+ ScheduleNextUpdate(false /* no back off */);
+}
+
+void SafeBrowsingProtocolManager::ScheduleNextUpdate(bool back_off) {
+ DCHECK(next_update_sec_ > 0);
+
+ if (!update_task_.get())
+ update_task_.reset(new SafeBrowsingProtocolUpdateTask(this));
+
+ // Unschedule any current timer & task.
+ TimerManager* tm = MessageLoop::current()->timer_manager();
+ if (update_timer_.get())
+ tm->StopTimer(update_timer_.get());
+
+ // Reschedule with the new update.
+ const int next_update = GetNextUpdateTime(back_off);
+ update_timer_.reset(tm->StartTimer(next_update, update_task_.get(), false));
+}
+
+// According to section 5 of the SafeBrowsing protocol specification, we must
+// back off after a certain number of errors. We only change 'next_update_sec_'
+// when we receive a response from the SafeBrowsing service.
+int SafeBrowsingProtocolManager::GetNextUpdateTime(bool back_off) {
+ int next = next_update_sec_;
+ if (back_off) {
+ next = GetNextBackOffTime(&update_error_count_, &update_back_off_mult_);
+ } else {
+ // Successful response means error reset.
+ update_error_count_ = 0;
+ update_back_off_mult_ = 1;
+ }
+ return next * 1000; // milliseconds
+}
+
+int SafeBrowsingProtocolManager::GetNextBackOffTime(int* error_count,
+ int* multiplier) {
+ DCHECK(multiplier && error_count);
+ (*error_count)++;
+ if (*error_count > 1 && *error_count < 6) {
+ int next = static_cast<int>(*multiplier * (1 + back_off_fuzz_) * 30 * 60);
+ *multiplier *= 2;
+ if (*multiplier > kSbMaxBackOff)
+ *multiplier = kSbMaxBackOff;
+ return next;
+ }
+
+ if (*error_count >= 6)
+ return 60 * 60 * 8; // 8 hours
+
+ return 60; // 1 minute
+}
+
+// This request requires getting a list of all the chunks for each list from the
+// database asynchronously. The request will be issued when we're called back in
+// OnGetChunksComplete.
+// TODO(paulg): We should get this at start up and maintain a ChunkRange cache
+// to avoid hitting the database with each update request. On the
+// otherhand, this request will only occur ~20-30 minutes so there
+// isn't that much overhead. Measure!
+void SafeBrowsingProtocolManager::IssueUpdateRequest() {
+ request_type_ = UPDATE_REQUEST;
+ sb_service_->GetAllChunks();
+}
+
+void SafeBrowsingProtocolManager::IssueChunkRequest() {
+ // We are only allowed to have one request outstanding at any time. Also,
+ // don't get the next url until the previous one has been written to disk so
+ // that we don't use too much memory.
+ if (request_.get() || chunk_request_urls_.empty() || chunk_pending_to_write_)
+ return;
+
+ ChunkUrl next_chunk = chunk_request_urls_.front();
+ DCHECK(!next_chunk.url.empty());
+ if (!StartsWithASCII(next_chunk.url, "http://", false) &&
+ !StartsWithASCII(next_chunk.url, "https://", false))
+ next_chunk.url = "http://" + next_chunk.url;
+ GURL chunk_url(next_chunk.url);
+ request_type_ = CHUNK_REQUEST;
+ request_.reset(new URLFetcher(chunk_url, URLFetcher::GET, this));
+ request_->set_load_flags(net::LOAD_DISABLE_CACHE);
+ request_->set_request_context(Profile::GetDefaultRequestContext());
+ request_->Start();
+}
+
+void SafeBrowsingProtocolManager::IssueKeyRequest() {
+ GURL key_url(StringPrintf(kSbNewKeyUrl,
+ kSbClientName,
+ kSbClientMajorVersion,
+ kSbClientMinorVersion));
+ request_type_ = GETKEY_REQUEST;
+ request_.reset(new URLFetcher(key_url, URLFetcher::GET, this));
+ request_->set_load_flags(net::LOAD_DISABLE_CACHE);
+ request_->set_request_context(Profile::GetDefaultRequestContext());
+ request_->Start();
+}
+
+void SafeBrowsingProtocolManager::OnGetChunksComplete(
+ const std::vector<SBListChunkRanges>& lists, bool database_error) {
+ DCHECK(request_type_ == UPDATE_REQUEST);
+
+ if (database_error) {
+ ScheduleNextUpdate(false);
+ return;
+ }
+
+ const bool use_mac = !client_key_.empty();
+
+ // Format our stored chunks:
+ std::string list_data;
+ bool found_malware = false;
+ bool found_phishing = false;
+ for (size_t i = 0; i < lists.size(); ++i) {
+ list_data.append(FormatList(lists[i], use_mac));
+ if (lists[i].name == "goog-phish-shavar")
+ found_phishing = true;
+
+ if (lists[i].name == "goog-malware-shavar")
+ found_malware = true;
+ }
+
+ // If we have an empty database, let the server know we want data for these
+ // lists.
+ if (!found_phishing)
+ list_data.append(FormatList(SBListChunkRanges("goog-phish-shavar"),
+ use_mac));
+
+ if (!found_malware)
+ list_data.append(FormatList(SBListChunkRanges("goog-malware-shavar"),
+ use_mac));
+
+ std::string url = StringPrintf(kSbUpdateUrl,
+ kSbClientName,
+ kSbClientMajorVersion,
+ kSbClientMinorVersion);
+ if (use_mac) {
+ url.append("&wrkey=");
+ url.append(wrapped_key_);
+ }
+
+ GURL update_url(url);
+ request_.reset(new URLFetcher(update_url, URLFetcher::POST, this));
+ request_->set_load_flags(net::LOAD_DISABLE_CACHE);
+ request_->set_request_context(Profile::GetDefaultRequestContext());
+ request_->set_upload_data("text/plain", list_data);
+ request_->Start();
+}
+
+void SafeBrowsingProtocolManager::OnChunkInserted() {
+ chunk_pending_to_write_ = false;
+
+ if (chunk_request_urls_.empty()) {
+ UMA_HISTOGRAM_LONG_TIMES(L"SB.Update", Time::Now() - last_update_);
+ } else {
+ IssueChunkRequest();
+ }
+}
+
+// static
+std::string SafeBrowsingProtocolManager::FormatList(
+ const SBListChunkRanges& list, bool use_mac) {
+ std::string formatted_results;
+ formatted_results.append(list.name);
+ formatted_results.append(";");
+ if (!list.adds.empty()) {
+ formatted_results.append("a:" + list.adds);
+ if (!list.subs.empty() || use_mac)
+ formatted_results.append(":");
+ }
+ if (!list.subs.empty()) {
+ formatted_results.append("s:" + list.subs);
+ if (use_mac)
+ formatted_results.append(":");
+ }
+ if (use_mac)
+ formatted_results.append("mac");
+ formatted_results.append("\n");
+
+ return formatted_results;
+}
+
+void SafeBrowsingProtocolManager::HandleReKey() {
+ client_key_.clear();
+ wrapped_key_.clear();
+ IssueKeyRequest();
+}
+
+void SafeBrowsingProtocolManager::HandleGetHashError() {
+ int next = GetNextBackOffTime(&gethash_error_count_, &gethash_back_off_mult_);
+ next_gethash_time_ = Time::Now() + TimeDelta::FromSeconds(next);
+}
diff --git a/chrome/browser/safe_browsing/protocol_manager.h b/chrome/browser/safe_browsing/protocol_manager.h
new file mode 100644
index 0000000..e4d9fc5
--- /dev/null
+++ b/chrome/browser/safe_browsing/protocol_manager.h
@@ -0,0 +1,232 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H__
+#define CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H__
+
+// A class that implements Chrome's interface with the SafeBrowsing protocol.
+// The SafeBrowsingProtocolManager handles formatting and making requests of,
+// and handling responses from, Google's SafeBrowsing servers. This class uses
+// The SafeBrowsingProtocolParser class to do the actual parsing.
+
+#include <deque>
+#include <hash_map>
+#include <string>
+#include <vector>
+
+#include "base/scoped_ptr.h"
+#include "base/time.h"
+#include "chrome/browser/url_fetcher.h"
+#include "chrome/browser/safe_browsing/chunk_range.h"
+#include "chrome/browser/safe_browsing/protocol_parser.h"
+#include "chrome/browser/safe_browsing/safe_browsing_service.h"
+#include "chrome/browser/safe_browsing/safe_browsing_util.h"
+#include "net/url_request/url_request.h"
+
+class MessageLoop;
+class Task;
+class Timer;
+
+
+class SafeBrowsingProtocolManager : public URLFetcher::Delegate {
+ // Testing friends:
+ friend class SafeBrowsingProtocolManagerTest_TestBackOffTimes_Test;
+ friend class SafeBrowsingProtocolManagerTest_TestChunkStrings_Test;
+ friend class SafeBrowsingProtocolManagerTest_TestGetHashBackOffTimes_Test;
+
+ public:
+ SafeBrowsingProtocolManager(SafeBrowsingService* sb_service,
+ MessageLoop* notify_loop,
+ const std::string& client_key,
+ const std::string& wrapped_key);
+ ~SafeBrowsingProtocolManager();
+
+ // Set up the update schedule and internal state for making periodic requests
+ // of the SafeBrowsing service.
+ void Initialize();
+
+ // URLFetcher::Delegate interface.
+ virtual void OnURLFetchComplete(const URLFetcher* source,
+ const GURL& url,
+ const URLRequestStatus& status,
+ int response_code,
+ const ResponseCookies& cookies,
+ const std::string& data);
+
+ // API used by the SafeBrowsingService for issuing queries. When the results
+ // are available, SafeBrowsingService::HandleGetHashResults is called.
+ void GetFullHash(SafeBrowsingService::SafeBrowsingCheck* check,
+ const std::vector<SBPrefix>& prefixes);
+
+ // Scheduled update callback.
+ void GetNextUpdate();
+
+ // Called by the SafeBrowsingService when our request for a list of all chunks
+ // for each list is done. If database_error is true, that means the protocol
+ // manager shouldn't fetch updates since they can't be written to disk. It
+ // should try again later to open the database.
+ void OnGetChunksComplete(const std::vector<SBListChunkRanges>& list,
+ bool database_error);
+
+ // Called after the chunks that were parsed were inserted in the database.
+ void OnChunkInserted();
+
+ // The last time we received an update.
+ Time last_update() const { return last_update_; }
+
+ private:
+ // Internal API for fetching information from the SafeBrowsing servers. The
+ // GetHash requests are higher priority since they can block user requests
+ // so are handled separately.
+ enum SafeBrowsingRequestType {
+ NO_REQUEST = 0, // No requests in progress
+ UPDATE_REQUEST, // Request for redirect URLs
+ CHUNK_REQUEST, // Request for a specific chunk
+ GETKEY_REQUEST // Update the client's MAC key
+ };
+
+ // Returns the time (in milliseconds) for the next update request. If
+ // 'back_off' is true, the time returned will increment an error count and
+ // return the appriate next time (see ScheduleNextUpdate below).
+ int GetNextUpdateTime(bool back_off);
+
+ // Worker function for calculating GetHash and Update backoff times (in
+ // seconds). 'Multiplier' is doubled for each consecutive error between the
+ // 2nd and 5th, and 'error_count' is incremented with each call.
+ int GetNextBackOffTime(int* error_count, int* multiplier);
+
+ // Manage our update with the next allowable update time. If 'back_off_' is
+ // true, we must decrease the frequency of requests of the SafeBrowsing
+ // service according to section 5 of the protocol specification.
+ void ScheduleNextUpdate(bool back_off);
+
+ // Send a request for a list of chunks we should download to the SafeBrowsing
+ // servers. In order to format this request, we need to send all the chunk
+ // numbers for each list that we have to the server. Getting the chunk numbers
+ // requires a database query (run on the database thread), and the request
+ // is sent upon completion of that query in OnGetChunksComplete.
+ void IssueUpdateRequest();
+
+ // Send a request for a chunk to the SafeBrowsing servers.
+ void IssueChunkRequest();
+
+ // Get a key from the SafeBrowsing servers for use with MAC. This should only
+ // be called once per client unless the server directly tells us to update.
+ void IssueKeyRequest();
+
+ // Format a string returned from the database into:
+ // "list_name;a:<add_chunk_ranges>:s:<sub_chunk_ranges>:mac\n"
+ static std::string FormatList(const SBListChunkRanges& list, bool use_mac);
+
+ // Run the protocol parser on received data and update the SafeBrowsingService
+ // with the new content. Returns 'true' on successful parse, 'false' on error.
+ bool HandleServiceResponse(const GURL& url, const char* data, int length);
+
+ // If the SafeBrowsing service wants us to re-key, we clear our key state and
+ // issue the request.
+ void HandleReKey();
+
+ // Update internal state for each GetHash response error.
+ void HandleGetHashError();
+
+ private:
+ // Main SafeBrowsing interface object.
+ SafeBrowsingService* sb_service_;
+
+ // Current active request (in case we need to cancel) for updates or chunks
+ // from the SafeBrowsing service. We can only have one of these outstanding
+ // at any given time unlike GetHash requests, which are tracked separately.
+ scoped_ptr<URLFetcher> request_;
+
+ // The kind of request that is currently in progress.
+ SafeBrowsingRequestType request_type_;
+
+ // The number of HTTP response errors, used for request backoff timing.
+ int update_error_count_;
+ int gethash_error_count_;
+
+ // Multipliers which double (max == 8) for each error after the second.
+ int update_back_off_mult_;
+ int gethash_back_off_mult_;
+
+ // Multiplier between 0 and 1 to spread clients over an interval.
+ float back_off_fuzz_;
+
+ // The list for which we are make a request.
+ std::string list_name_;
+
+ // For managing the next earliest time to query the SafeBrowsing servers for
+ // updates.
+ int next_update_sec_;
+ scoped_ptr<Task> update_task_;
+ scoped_ptr<Timer> update_timer_;
+
+ // All chunk requests that need to be made, along with their MAC.
+ std::deque<ChunkUrl> chunk_request_urls_;
+
+ // Map of GetHash requests.
+ typedef stdext::hash_map<const URLFetcher*,
+ SafeBrowsingService::SafeBrowsingCheck*> HashRequests;
+ HashRequests hash_requests_;
+
+ // The next scheduled update has special behavior for the first 2 requests.
+ enum UpdateRequestState {
+ FIRST_REQUEST = 0,
+ SECOND_REQUEST,
+ NORMAL_REQUEST
+ };
+ UpdateRequestState update_state_;
+
+ // We'll attempt to get keys once per browser session if we don't already have
+ // them. They are not essential to operation, but provide a layer of
+ // verification.
+ bool initial_request_;
+
+ // True if the service has been given an add/sub chunk but it hasn't been
+ // added to the database yet.
+ bool chunk_pending_to_write_;
+
+ // Message loop for forwarding MAC keys to the SafeBrowsingService for
+ // storage.
+ MessageLoop* notify_loop_;
+
+ // The keys used for MAC. Empty keys mean we aren't using MAC.
+ std::string client_key_;
+ std::string wrapped_key_;
+
+ // The last time we successfully received an update.
+ Time last_update_;
+
+ // While in GetHash backoff, we can't make another GetHash until this time.
+ Time next_gethash_time_;
+
+ DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingProtocolManager);
+};
+
+#endif // CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H__
diff --git a/chrome/browser/safe_browsing/protocol_manager_unittest.cc b/chrome/browser/safe_browsing/protocol_manager_unittest.cc
new file mode 100644
index 0000000..b2abbb8
--- /dev/null
+++ b/chrome/browser/safe_browsing/protocol_manager_unittest.cc
@@ -0,0 +1,158 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+#include "base/logging.h"
+#include "base/time.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "chrome/browser/safe_browsing/protocol_manager.h"
+
+
+// Ensure that we respect section 5 of the SafeBrowsing protocol specification.
+TEST(SafeBrowsingProtocolManagerTest, TestBackOffTimes) {
+ SafeBrowsingProtocolManager pm(NULL, NULL, "", "");
+ pm.next_update_sec_ = 1800;
+ DCHECK(pm.back_off_fuzz_ >= 0.0 && pm.back_off_fuzz_ <= 1.0);
+
+ // No errors received so far.
+ EXPECT_EQ(pm.GetNextUpdateTime(false), 1800 * 1000);
+
+ // 1 error.
+ EXPECT_EQ(pm.GetNextUpdateTime(true), 60 * 1000);
+
+ // 2 errors.
+ int next_time = pm.GetNextUpdateTime(true) / (60 * 1000); // Minutes
+ EXPECT_TRUE(next_time >= 30 && next_time <= 60);
+
+ // 3 errors.
+ next_time = pm.GetNextUpdateTime(true) / (60 * 1000);
+ EXPECT_TRUE(next_time >= 60 && next_time <= 120);
+
+ // 4 errors.
+ next_time = pm.GetNextUpdateTime(true) / (60 * 1000);
+ EXPECT_TRUE(next_time >= 120 && next_time <= 240);
+
+ // 5 errors.
+ next_time = pm.GetNextUpdateTime(true) / (60 * 1000);
+ EXPECT_TRUE(next_time >= 240 && next_time <= 480);
+
+ // 6 errors, reached max backoff.
+ EXPECT_EQ(pm.GetNextUpdateTime(true), 480 * 60 * 1000);
+
+ // 7 errors.
+ EXPECT_EQ(pm.GetNextUpdateTime(true), 480 * 60 * 1000);
+
+ // Received a successful response.
+ EXPECT_EQ(pm.GetNextUpdateTime(false), 1800 * 1000);
+}
+
+// Test string combinations with and without MAC.
+TEST(SafeBrowsingProtocolManagerTest, TestChunkStrings) {
+ SafeBrowsingProtocolManager pm(NULL, NULL, "", "");
+
+ // Add and Sub chunks.
+ SBListChunkRanges phish("goog-phish-shavar");
+ phish.adds = "1,4,6,8-20,99";
+ phish.subs = "16,32,64-96";
+ EXPECT_EQ(pm.FormatList(phish, false),
+ "goog-phish-shavar;a:1,4,6,8-20,99:s:16,32,64-96\n");
+ EXPECT_EQ(pm.FormatList(phish, true),
+ "goog-phish-shavar;a:1,4,6,8-20,99:s:16,32,64-96:mac\n");
+
+ // Add chunks only.
+ phish.subs = "";
+ EXPECT_EQ(pm.FormatList(phish, false),
+ "goog-phish-shavar;a:1,4,6,8-20,99\n");
+ EXPECT_EQ(pm.FormatList(phish, true),
+ "goog-phish-shavar;a:1,4,6,8-20,99:mac\n");
+
+ // Sub chunks only.
+ phish.adds = "";
+ phish.subs = "16,32,64-96";
+ EXPECT_EQ(pm.FormatList(phish, false), "goog-phish-shavar;s:16,32,64-96\n");
+ EXPECT_EQ(pm.FormatList(phish, true), "goog-phish-shavar;s:16,32,64-96:mac\n");
+
+ // No chunks of either type.
+ phish.adds = "";
+ phish.subs = "";
+ EXPECT_EQ(pm.FormatList(phish, false), "goog-phish-shavar;\n");
+ EXPECT_EQ(pm.FormatList(phish, true), "goog-phish-shavar;mac\n");
+}
+
+TEST(SafeBrowsingProtocolManagerTest, TestGetHashBackOffTimes) {
+ SafeBrowsingProtocolManager pm(NULL, NULL, "", "");
+
+ // No errors or back off time yet.
+ EXPECT_EQ(pm.gethash_error_count_, 0);
+ EXPECT_TRUE(pm.next_gethash_time_.is_null());
+
+ Time now = Time::Now();
+
+ // 1 error.
+ pm.HandleGetHashError();
+ EXPECT_EQ(pm.gethash_error_count_, 1);
+ TimeDelta margin = TimeDelta::FromSeconds(5); // Fudge factor.
+ Time future = now + TimeDelta::FromMinutes(1);
+ EXPECT_TRUE(pm.next_gethash_time_ >= future - margin &&
+ pm.next_gethash_time_ <= future + margin);
+
+ // 2 errors.
+ pm.HandleGetHashError();
+ EXPECT_EQ(pm.gethash_error_count_, 2);
+ EXPECT_TRUE(pm.next_gethash_time_ >= now + TimeDelta::FromMinutes(30));
+ EXPECT_TRUE(pm.next_gethash_time_ <= now + TimeDelta::FromMinutes(60));
+
+ // 3 errors.
+ pm.HandleGetHashError();
+ EXPECT_EQ(pm.gethash_error_count_, 3);
+ EXPECT_TRUE(pm.next_gethash_time_ >= now + TimeDelta::FromMinutes(60));
+ EXPECT_TRUE(pm.next_gethash_time_ <= now + TimeDelta::FromMinutes(120));
+
+ // 4 errors.
+ pm.HandleGetHashError();
+ EXPECT_EQ(pm.gethash_error_count_, 4);
+ EXPECT_TRUE(pm.next_gethash_time_ >= now + TimeDelta::FromMinutes(120));
+ EXPECT_TRUE(pm.next_gethash_time_ <= now + TimeDelta::FromMinutes(240));
+
+ // 5 errors.
+ pm.HandleGetHashError();
+ EXPECT_EQ(pm.gethash_error_count_, 5);
+ EXPECT_TRUE(pm.next_gethash_time_ >= now + TimeDelta::FromMinutes(240));
+ EXPECT_TRUE(pm.next_gethash_time_ <= now + TimeDelta::FromMinutes(480));
+
+ // 6 errors, reached max backoff.
+ pm.HandleGetHashError();
+ EXPECT_EQ(pm.gethash_error_count_, 6);
+ EXPECT_TRUE(pm.next_gethash_time_ == now + TimeDelta::FromMinutes(480));
+
+ // 7 errors.
+ pm.HandleGetHashError();
+ EXPECT_EQ(pm.gethash_error_count_, 7);
+ EXPECT_TRUE(pm.next_gethash_time_== now + TimeDelta::FromMinutes(480));
+}
diff --git a/chrome/browser/safe_browsing/protocol_parser.cc b/chrome/browser/safe_browsing/protocol_parser.cc
new file mode 100644
index 0000000..9f729c6
--- /dev/null
+++ b/chrome/browser/safe_browsing/protocol_parser.cc
@@ -0,0 +1,496 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Parse the data returned from the SafeBrowsing v2.1 protocol response.
+
+#include <Winsock2.h> // for htonl
+
+#include "chrome/browser/safe_browsing/protocol_parser.h"
+
+#include "base/logging.h"
+#include "base/string_util.h"
+
+namespace {
+// Helper function for quick scans of a line oriented protocol. Note that we use
+// std::string::assign(const charT* s, size_type n)
+// to copy data into 'line'. This form of 'assign' does not call strlen on
+// 'input', which is binary data and is not NULL terminated. 'input' may also
+// contain valid NULL bytes in the payload, which a strlen based copy would
+// truncate.
+bool GetLine(const char* input, int input_len, std::string* line) {
+ const char* pos = input;
+ while (pos && (pos - input < input_len)) {
+ if (*pos == '\n') {
+ line->assign(input, pos - input);
+ return true;
+ }
+ ++pos;
+ }
+ return false;
+}
+}
+
+//------------------------------------------------------------------------------
+// SafeBrowsingParser implementation
+
+SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() {
+}
+
+bool SafeBrowsingProtocolParser::ParseGetHash(
+ const char* chunk_data,
+ int chunk_len,
+ const std::string& key,
+ bool* re_key,
+ std::vector<SBFullHashResult>* full_hashes) {
+ full_hashes->clear();
+ int length = chunk_len;
+ const char* data = chunk_data;
+
+ int offset;
+ std::string line;
+ if (!key.empty()) {
+ if (!GetLine(data, length, &line))
+ return false; // Error! Bad GetHash result.
+
+ if (line == "e:pleaserekey") {
+ *re_key = true;
+ return true;
+ }
+
+ offset = static_cast<int>(line.size()) + 1;
+ data += offset;
+ length -= offset;
+
+ if (!safe_browsing_util::VerifyMAC(key, line, data, length))
+ return false;
+ }
+
+ while (length > 0) {
+ if (!GetLine(data, length, &line))
+ return false;
+
+ offset = static_cast<int>(line.size()) + 1;
+ data += offset;
+ length -= offset;
+
+ std::vector<std::string> cmd_parts;
+ SplitString(line, ':', &cmd_parts);
+ if (cmd_parts.size() != 3)
+ return false;
+
+ SBFullHashResult full_hash;
+ full_hash.list_name = cmd_parts[0];
+ full_hash.add_chunk_id = atoi(cmd_parts[1].c_str());
+ int full_hash_len = atoi(cmd_parts[2].c_str());
+
+ while (full_hash_len > 0) {
+ DCHECK(full_hash_len >= sizeof(SBFullHash));
+ memcpy(&full_hash.hash, data, sizeof(SBFullHash));
+ full_hashes->push_back(full_hash);
+ data += sizeof(SBFullHash);
+ length -= sizeof(SBFullHash);
+ full_hash_len -= sizeof(SBFullHash);
+ }
+ }
+
+ return length == 0;
+}
+
+void SafeBrowsingProtocolParser::FormatGetHash(
+ const std::vector<SBPrefix>& prefixes, std::string* request) {
+ DCHECK(request);
+
+ // Format the request for GetHash.
+ request->append(StringPrintf("%d:%d\n",
+ sizeof(SBPrefix),
+ sizeof(SBPrefix) * prefixes.size()));
+ for (size_t i = 0; i < prefixes.size(); ++i) {
+ request->append(reinterpret_cast<const char*>(&prefixes[i]),
+ sizeof(SBPrefix));
+ }
+}
+
+bool SafeBrowsingProtocolParser::ParseUpdate(
+ const char* chunk_data,
+ int chunk_len,
+ const std::string& key,
+ int* next_update_sec,
+ bool* re_key,
+ bool* reset,
+ std::vector<SBChunkDelete>* deletes,
+ std::vector<ChunkUrl>* chunk_urls) {
+ DCHECK(next_update_sec);
+ DCHECK(deletes);
+ DCHECK(chunk_urls);
+
+ int length = chunk_len;
+ const char* data = chunk_data;
+
+ // Populated below.
+ std::string list_name;
+
+ while (length > 0) {
+ std::string cmd_line;
+ if (!GetLine(data, length, &cmd_line))
+ return false; // Error: bad list format!
+
+ std::vector<std::string> cmd_parts;
+ SplitString(cmd_line, ':', &cmd_parts);
+ if (cmd_parts.empty())
+ return false;
+ const std::string& command = cmd_parts[0];
+ if (cmd_parts.size() != 2 && !(cmd_parts.size() == 3 && command[0] == 'u'))
+ return false;
+
+ const int consumed = static_cast<int>(cmd_line.size()) + 1;
+ data += consumed;
+ length -= consumed;
+ if (length < 0)
+ return false; // Parsing error.
+
+ // Differentiate on the first character of the command (which is usually
+ // only one character, with the exception of the 'ad' and 'sd' commands).
+ switch (command[0]) {
+ case 'a':
+ case 's': {
+ // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
+ // have also parsed the list name before getting here, or the add-del
+ // or sub-del will have no context.
+ if (command.size() != 2 || command[1] != 'd' || list_name.empty())
+ return false;
+ SBChunkDelete chunk_delete;
+ chunk_delete.is_sub_del = command[0] == 's';
+ StringToRanges(cmd_parts[1], &chunk_delete.chunk_del);
+ chunk_delete.list_name = list_name;
+ deletes->push_back(chunk_delete);
+ break;
+ }
+
+ case 'e':
+ if (cmd_parts[1] != "pleaserekey")
+ return false;
+ *re_key = true;
+ break;
+
+ case 'i':
+ // The line providing the name of the list (i.e. 'goog-phish-shavar').
+ list_name = cmd_parts[1];
+ break;
+
+ case 'm':
+ // Verify that the MAC of the remainer of this chunk is what we expect.
+ if (!key.empty() &&
+ !safe_browsing_util::VerifyMAC(key, cmd_parts[1], data, length))
+ return false;
+ break;
+
+ case 'n':
+ // The line providing the next earliest time (in seconds) to re-query.
+ *next_update_sec = atoi(cmd_parts[1].c_str());
+ break;
+
+ case 'u': {
+ // The line providing a URL redirect to a chunk.
+ std::string redirect_url = cmd_parts[1];
+ if (cmd_parts.size() == 3) {
+ redirect_url += ':' + cmd_parts[2];
+ }
+
+ std::string mac;
+ if (!key.empty()) {
+ std::string::size_type mac_pos = redirect_url.rfind(',');
+ if (mac_pos == std::string::npos)
+ return false;
+ mac = redirect_url.substr(mac_pos + 1);
+ redirect_url = redirect_url.substr(0, mac_pos);
+ }
+ ChunkUrl chunk_url;
+ chunk_url.url = redirect_url;
+ if (!key.empty())
+ chunk_url.mac = mac;
+ chunk_urls->push_back(chunk_url);
+ break;
+ }
+
+ case 'r':
+ if (cmd_parts[1] != "pleasereset")
+ return false;
+ *reset = true;
+ break;
+
+ default:
+ // A command we don't understand.
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool SafeBrowsingProtocolParser::ParseChunk(const char* data,
+ int length,
+ const std::string& key,
+ const std::string& mac,
+ bool* re_key,
+ std::deque<SBChunk>* chunks) {
+ int remaining = length;
+ const char* chunk_data = data;
+
+ if (!key.empty() &&
+ !safe_browsing_util::VerifyMAC(key, mac, data, length)) {
+ return false;
+ }
+
+ while (remaining > 0) {
+ std::string cmd_line;
+ if (!GetLine(chunk_data, length, &cmd_line))
+ return false; // Error: bad chunk format!
+
+ const int line_len = static_cast<int>(cmd_line.length()) + 1;
+ std::vector<std::string> cmd_parts;
+ SplitString(cmd_line, ':', &cmd_parts);
+
+ // Handle a possible re-key command.
+ if (cmd_parts.size() != 4) {
+ if (cmd_parts.size() == 2 &&
+ cmd_parts[0] == "e" &&
+ cmd_parts[1] == "pleaserekey") {
+ *re_key = true;
+ chunk_data += line_len;
+ remaining -= line_len;
+ continue;
+ }
+ return false;
+ }
+
+ // Process the chunk data.
+ const int chunk_number = atoi(cmd_parts[1].c_str());
+ const int hash_len = atoi(cmd_parts[2].c_str());
+ if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) {
+ SB_DLOG(INFO) << "ParseChunk got unknown hashlen " << hash_len;
+ return false;
+ }
+
+ const int chunk_len = atoi(cmd_parts[3].c_str());
+ chunk_data += line_len;
+ remaining -= line_len;
+
+ chunks->push_back(SBChunk());
+ chunks->back().chunk_number = chunk_number;
+
+ if (cmd_parts[0] == "a") {
+ if (!ParseAddChunk(chunk_data, chunk_len, hash_len, &chunks->back().hosts))
+ return false; // Parse error.
+ } else if (cmd_parts[0] == "s") {
+ if (!ParseSubChunk(chunk_data, chunk_len, hash_len, &chunks->back().hosts))
+ return false; // Parse error.
+ } else {
+ NOTREACHED();
+ return false;
+ }
+
+ chunk_data += chunk_len;
+ remaining -= chunk_len;
+ if (remaining < 0)
+ return false; // Parse error.
+ }
+
+ DCHECK(remaining == 0);
+
+ return true;
+}
+
+bool SafeBrowsingProtocolParser::ParseAddChunk(
+ const char* data, int data_len, int hash_len,
+ std::deque<SBChunkHost>* hosts) {
+
+ int remaining = data_len;
+ const char* chunk_data = data;
+ const int min_size = sizeof(SBPrefix) + 1;
+
+ while (remaining >= min_size) {
+ SBPrefix host;
+ int prefix_count;
+ ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count);
+ SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
+ SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH;
+ SBEntry* entry;
+ int index_start = 0;
+
+ // If a host has more than 255 prefixes, then subsequent entries are used.
+ // Check if this is the case, and if so put them in one SBEntry since the
+ // database code assumes that all prefixes from the same host and chunk are
+ // in one SBEntry.
+ if (!hosts->empty() && hosts->back().host == host &&
+ hosts->back().entry->HashLen() == hash_len) {
+ // Reuse the SBChunkHost, but need to create a new SBEntry since we have
+ // more prefixes.
+ index_start = hosts->back().entry->prefix_count();
+ entry = hosts->back().entry->Enlarge(prefix_count);
+ hosts->back().entry = entry;
+ } else {
+ entry = SBEntry::Create(type, prefix_count);
+ SBChunkHost chunk_host;
+ chunk_host.host = host;
+ chunk_host.entry = entry;
+ hosts->push_back(chunk_host);
+ }
+
+ if (!ReadPrefixes(&chunk_data, &remaining, entry, prefix_count, index_start))
+ return false;
+ }
+
+ return remaining == 0;
+}
+
+bool SafeBrowsingProtocolParser::ParseSubChunk(
+ const char* data, int data_len, int hash_len,
+ std::deque<SBChunkHost>* hosts) {
+
+ int remaining = data_len;
+ const char* chunk_data = data;
+ const int min_size = 2 * sizeof(SBPrefix) + 1;
+
+ while (remaining >= min_size) {
+ SBPrefix host;
+ int prefix_count;
+ ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count);
+ SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
+ SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH;
+ SBEntry* entry = SBEntry::Create(type, prefix_count);
+
+ SBChunkHost chunk_host;
+ chunk_host.host = host;
+ chunk_host.entry = entry;
+ hosts->push_back(chunk_host);
+
+ if (prefix_count == 0) {
+ // There is only an add chunk number (no prefixes).
+ entry->set_chunk_id(ReadChunkId(&chunk_data, &remaining));
+ continue;
+ }
+
+ if (!ReadPrefixes(&chunk_data, &remaining, entry, prefix_count, 0))
+ return false;
+ }
+
+ return remaining == 0;
+}
+
+
+void SafeBrowsingProtocolParser::ReadHostAndPrefixCount(
+ const char** data, int* remaining, SBPrefix* host, int* count) {
+ // Next 4 bytes are the host prefix.
+ memcpy(host, *data, sizeof(SBPrefix));
+ *data += sizeof(SBPrefix);
+ *remaining -= sizeof(SBPrefix);
+
+ // Next 1 byte is the prefix count (could be zero, but never negative).
+ *count = static_cast<unsigned char>(**data);
+ *data += 1;
+ *remaining -= 1;
+}
+
+int SafeBrowsingProtocolParser::ReadChunkId(
+ const char** data, int* remaining) {
+ int chunk_number;
+ memcpy(&chunk_number, *data, sizeof(chunk_number));
+ *data += sizeof(chunk_number);
+ *remaining -= sizeof(chunk_number);
+ return htonl(chunk_number);
+}
+
+bool SafeBrowsingProtocolParser::ReadPrefixes(
+ const char** data, int* remaining, SBEntry* entry, int count,
+ int index_start) {
+ int hash_len = entry->HashLen();
+ for (int i = 0; i < count; ++i) {
+ if (entry->IsSub()) {
+ entry->SetChunkIdAtPrefix(index_start + i, ReadChunkId(data, remaining));
+ if (*remaining <= 0)
+ return false;
+ }
+
+ if (hash_len == sizeof(SBPrefix)) {
+ entry->SetPrefixAt(index_start + i,
+ *reinterpret_cast<const SBPrefix*>(*data));
+ } else {
+ entry->SetFullHashAt(index_start + i,
+ *reinterpret_cast<const SBFullHash*>(*data));
+ }
+ *data += hash_len;
+ *remaining -= hash_len;
+ if (*remaining < 0)
+ return false;
+ }
+
+ return true;
+}
+
+bool SafeBrowsingProtocolParser::ParseNewKey(const char* chunk_data,
+ int chunk_length,
+ std::string* client_key,
+ std::string* wrapped_key) {
+ DCHECK(client_key && wrapped_key);
+ client_key->clear();
+ wrapped_key->clear();
+
+ const char* data = chunk_data;
+ int remaining = chunk_length;
+
+ while (remaining > 0) {
+ std::string line;
+ if (!GetLine(data, remaining, &line))
+ return false;
+
+ std::vector<std::string> cmd_parts;
+ SplitString(line, ':', &cmd_parts);
+ if (cmd_parts.size() != 3)
+ return false;
+
+ if (cmd_parts[2].size() != atoi(cmd_parts[1].c_str()))
+ return false;
+
+ if (cmd_parts[0] == "clientkey") {
+ client_key->assign(cmd_parts[2]);
+ } else if (cmd_parts[0] == "wrappedkey") {
+ wrapped_key->assign(cmd_parts[2]);
+ } else {
+ return false;
+ }
+
+ data += line.size() + 1;
+ remaining -= static_cast<int>(line.size()) + 1;
+ }
+
+ if (client_key->empty() || wrapped_key->empty())
+ return false;
+
+ return true;
+} \ No newline at end of file
diff --git a/chrome/browser/safe_browsing/protocol_parser.h b/chrome/browser/safe_browsing/protocol_parser.h
new file mode 100644
index 0000000..802b5e5
--- /dev/null
+++ b/chrome/browser/safe_browsing/protocol_parser.h
@@ -0,0 +1,153 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_PARSER_H__
+#define CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_PARSER_H__
+
+// Parse the data returned from the chunk response.
+//
+// Based on the SafeBrowsing v2.1 protocol:
+// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec
+//
+// Read the response from a SafeBrowsing request, and parse into useful pieces.
+// The protocol is generally line oriented, but can contain binary data in the
+// actual chunk responses. The consumer of the protocol data should instantiate
+// the parser and call the appropriate parsing function on the data.
+//
+// Examples of protocol responses:
+//
+// 1. List identification
+// i:goog-phish-shavar\n
+// <command>:<command_data>\n
+//
+// 2. Minimum time to wait (seconds) until the next download request can be made
+// n:1200\n
+// <command>:<time_in_seconds>\n
+//
+// 3. Redirect URL for retrieving a chunk
+// u:cache.googlevideo.com/safebrowsing/rd/goog-phish-shavar_a_1\n
+// <command>:<url>\n
+//
+// 4. Add and sub chunks
+// a:1:4:523\n... <-- Add chunk + binary data
+// s:13:4:17\n... <-- Sub chunk + binary data
+// <chunk_type>:<chunk_number>:<prefix_len>:<chunk_bytes>\n<binary_data>
+//
+// 5. Add-del and sub-del requests
+// ad:1-4000,5001\n <-- Add-del
+// sd:1,3,5,7,903\n <-- Sub-del
+// <command>:<chunk_range>\n
+
+
+#include <string>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "chrome/browser/safe_browsing/chunk_range.h"
+#include "chrome/browser/safe_browsing/safe_browsing_util.h"
+
+
+class SafeBrowsingProtocolParser {
+ public:
+ SafeBrowsingProtocolParser();
+
+ // Parse the response of an update request. Results for chunk deletions (both
+ // add-del and sub-del are returned in 'chunk_deletes', and new chunk URLs to
+ // download are contained in 'chunk_urls'. The next time the client is allowed
+ // to request another update is returned in 'next_update_sec'. If the service
+ // wants us to retrieve new MAC keys, 're_key' will be set to true. If we are
+ // using MACs to verify responses, the 'key' must be set to the private key
+ // returned from the SafeBrowsing servers. 'reset' will be set to true if the
+ // SafeBrowsing service wants us to dump our database.
+ // Returns 'true'if it was able to decode the chunk properly, 'false' if not
+ // decoded properly and the results should be ignored.
+ bool ParseUpdate(const char* chunk_data,
+ int chunk_len,
+ const std::string& key,
+ int* next_update_sec,
+ bool* re_key,
+ bool* reset,
+ std::vector<SBChunkDelete>* chunk_deletes,
+ std::vector<ChunkUrl>* chunk_urls);
+
+ // Parse the response from a chunk URL request and returns the hosts/prefixes
+ // for adds and subs in "chunks". Returns 'true' on successful parsing,
+ // 'false' otherwise. Any result should be ignored when a parse has failed.
+ bool ParseChunk(const char* chunk_data,
+ int chunk_len,
+ const std::string& key,
+ const std::string& mac,
+ bool* re_key,
+ std::deque<SBChunk>* chunks);
+
+ // Parse the result of a GetHash request, returning the list of full hashes.
+ // If we are checking for valid MACs, the caller should populate 'key'.
+ bool ParseGetHash(const char* chunk_data,
+ int chunk_len,
+ const std::string& key,
+ bool* re_key,
+ std::vector<SBFullHashResult>* full_hashes);
+
+ // Convert a list of partial hashes into a proper GetHash request.
+ void FormatGetHash(const std::vector<SBPrefix>& prefixes,
+ std::string* request);
+
+ // Parse the keys used for subsequent communications with the SafeBrowsing
+ // servers. Returns true on successful parse, false on parse error.
+ bool ParseNewKey(const char* chunk_data,
+ int chunk_length,
+ std::string* client_key,
+ std::string* wrapped_key);
+
+ private:
+ bool ParseAddChunk(const char* data,
+ int data_len,
+ int hash_len,
+ std::deque<SBChunkHost>* hosts);
+ bool ParseSubChunk(const char* data,
+ int data_len,
+ int hash_len,
+ std::deque<SBChunkHost>* hosts);
+
+ // Helper functions used by ParseAddChunk and ParseSubChunk.
+ static void ReadHostAndPrefixCount(
+ const char** data, int* remaining, SBPrefix* host, int* count);
+ static int ReadChunkId(const char** data, int* remaining);
+ static bool ReadPrefixes(
+ const char** data, int* remaining, SBEntry* entry, int count,
+ int index_start);
+
+ // The name of the current list
+ std::string list_name_;
+
+ DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingProtocolParser);
+};
+
+
+#endif // CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_PARSER_H__ \ No newline at end of file
diff --git a/chrome/browser/safe_browsing/protocol_parser_unittest.cc b/chrome/browser/safe_browsing/protocol_parser_unittest.cc
new file mode 100644
index 0000000..2366543
--- /dev/null
+++ b/chrome/browser/safe_browsing/protocol_parser_unittest.cc
@@ -0,0 +1,654 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Program to test the SafeBrowsing protocol parsing v2.1.
+
+#include <hash_map>
+
+#include "base/logging.h"
+#include "base/string_util.h"
+#include "base/win_util.h"
+#include "chrome/browser/safe_browsing/protocol_parser.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+
+// Test parsing one add chunk.
+TEST(SafeBrowsingProtocolParsingTest, TestAddChunk) {
+ std::string add_chunk("a:1:4:35\naaaax1111\0032222333344447777\00288889999");
+ add_chunk[13] = '\0';
+
+ // Run the parse.
+ SafeBrowsingProtocolParser parser;
+ bool re_key = false;
+ std::deque<SBChunk> chunks;
+ bool result = parser.ParseChunk(add_chunk.data(),
+ static_cast<int>(add_chunk.length()),
+ "", "", &re_key, &chunks);
+ EXPECT_TRUE(result);
+ EXPECT_FALSE(re_key);
+ EXPECT_EQ(chunks.size(), 1);
+ EXPECT_EQ(chunks[0].chunk_number, 1);
+ EXPECT_EQ(chunks[0].hosts.size(), 3);
+
+ EXPECT_EQ(chunks[0].hosts[0].host, 0x61616161);
+ SBEntry* entry = chunks[0].hosts[0].entry;
+ EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX);
+ EXPECT_EQ(entry->prefix_count(), 0);
+
+ EXPECT_EQ(chunks[0].hosts[1].host, 0x31313131);
+ entry = chunks[0].hosts[1].entry;
+ EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX);
+ EXPECT_EQ(entry->prefix_count(), 3);
+ EXPECT_EQ(entry->PrefixAt(0), 0x32323232);
+ EXPECT_EQ(entry->PrefixAt(1), 0x33333333);
+ EXPECT_EQ(entry->PrefixAt(2), 0x34343434);
+
+ EXPECT_EQ(chunks[0].hosts[2].host, 0x37373737);
+ entry = chunks[0].hosts[2].entry;
+ EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX);
+ EXPECT_EQ(entry->prefix_count(), 2);
+ EXPECT_EQ(entry->PrefixAt(0), 0x38383838);
+ EXPECT_EQ(entry->PrefixAt(1), 0x39393939);
+
+ safe_browsing_util::FreeChunks(&chunks);
+}
+
+// Test parsing one add chunk with full hashes.
+TEST(SafeBrowsingProtocolParsingTest, TestAddFullChunk) {
+ std::string add_chunk("a:1:32:69\naaaa");
+ add_chunk.push_back(2);
+
+ SBFullHash full_hash1, full_hash2;
+ for (int i = 0; i < 32; ++i) {
+ full_hash1.full_hash[i] = i % 2 ? 1 : 2;
+ full_hash2.full_hash[i] = i % 2 ? 3 : 4;
+ }
+
+ add_chunk.append(full_hash1.full_hash, 32);
+ add_chunk.append(full_hash2.full_hash, 32);
+
+ // Run the parse.
+ SafeBrowsingProtocolParser parser;
+ bool re_key = false;
+ std::deque<SBChunk> chunks;
+ bool result = parser.ParseChunk(add_chunk.data(),
+ static_cast<int>(add_chunk.length()),
+ "", "", &re_key, &chunks);
+ EXPECT_TRUE(result);
+ EXPECT_FALSE(re_key);
+ EXPECT_EQ(chunks.size(), 1);
+ EXPECT_EQ(chunks[0].chunk_number, 1);
+ EXPECT_EQ(chunks[0].hosts.size(), 1);
+
+ EXPECT_EQ(chunks[0].hosts[0].host, 0x61616161);
+ SBEntry* entry = chunks[0].hosts[0].entry;
+ EXPECT_EQ(entry->type(), SBEntry::ADD_FULL_HASH);
+ EXPECT_EQ(entry->prefix_count(), 2);
+ EXPECT_TRUE(entry->FullHashAt(0) == full_hash1);
+ EXPECT_TRUE(entry->FullHashAt(1) == full_hash2);
+
+ safe_browsing_util::FreeChunks(&chunks);
+}
+
+// Test parsing multiple add chunks. We'll use the same chunk as above, and add
+// one more after it.
+TEST(SafeBrowsingProtocolParsingTest, TestAddChunks) {
+ std::string add_chunk("a:1:4:35\naaaax1111\0032222333344447777\00288889999"
+ "a:2:4:13\n5555\002ppppgggg");
+ add_chunk[13] = '\0';
+
+ // Run the parse.
+ SafeBrowsingProtocolParser parser;
+ bool re_key = false;
+ std::deque<SBChunk> chunks;
+ bool result = parser.ParseChunk(add_chunk.data(),
+ static_cast<int>(add_chunk.length()),
+ "", "", &re_key, &chunks);
+ EXPECT_TRUE(result);
+ EXPECT_FALSE(re_key);
+ EXPECT_EQ(chunks.size(), 2);
+ EXPECT_EQ(chunks[0].chunk_number, 1);
+ EXPECT_EQ(chunks[0].hosts.size(), 3);
+
+ EXPECT_EQ(chunks[0].hosts[0].host, 0x61616161);
+ SBEntry* entry = chunks[0].hosts[0].entry;
+ EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX);
+ EXPECT_EQ(entry->prefix_count(), 0);
+
+ EXPECT_EQ(chunks[0].hosts[1].host, 0x31313131);
+ entry = chunks[0].hosts[1].entry;
+ EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX);
+ EXPECT_EQ(entry->prefix_count(), 3);
+ EXPECT_EQ(entry->PrefixAt(0), 0x32323232);
+ EXPECT_EQ(entry->PrefixAt(1), 0x33333333);
+ EXPECT_EQ(entry->PrefixAt(2), 0x34343434);
+
+ EXPECT_EQ(chunks[0].hosts[2].host, 0x37373737);
+ entry = chunks[0].hosts[2].entry;
+ EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX);
+ EXPECT_EQ(entry->prefix_count(), 2);
+ EXPECT_EQ(entry->PrefixAt(0), 0x38383838);
+ EXPECT_EQ(entry->PrefixAt(1), 0x39393939);
+
+
+ EXPECT_EQ(chunks[1].chunk_number, 2);
+ EXPECT_EQ(chunks[1].hosts.size(), 1);
+
+ EXPECT_EQ(chunks[1].hosts[0].host, 0x35353535);
+ entry = chunks[1].hosts[0].entry;
+ EXPECT_EQ(entry->type(), SBEntry::ADD_PREFIX);
+ EXPECT_EQ(entry->prefix_count(), 2);
+ EXPECT_EQ(entry->PrefixAt(0), 0x70707070);
+ EXPECT_EQ(entry->PrefixAt(1), 0x67676767);
+
+ safe_browsing_util::FreeChunks(&chunks);
+}
+
+// Test parsing one add chunk where a hostkey spans several entries.
+TEST(SafeBrowsingProtocolParsingTest, TestAddBigChunk) {
+ std::string add_chunk("a:1:4:1050\naaaaX");
+ add_chunk[add_chunk.size() - 1] |= 0xFF;
+ for (int i = 0; i < 255; ++i)
+ add_chunk.append(StringPrintf("%04d", i));
+
+ add_chunk.append("aaaa");
+ add_chunk.push_back(5);
+ for (int i = 0; i < 5; ++i)
+ add_chunk.append(StringPrintf("001%d", i));
+
+ SafeBrowsingProtocolParser parser;
+ bool re_key = false;
+ std::deque<SBChunk> chunks;
+ bool result = parser.ParseChunk(add_chunk.data(),
+ static_cast<int>(add_chunk.length()),
+ "", "", &re_key, &chunks);
+ EXPECT_TRUE(result);
+ EXPECT_FALSE(re_key);
+ EXPECT_EQ(chunks.size(), 1);
+ EXPECT_EQ(chunks[0].chunk_number, 1);
+
+ EXPECT_EQ(chunks[0].hosts.size(), 1);
+
+ const SBChunkHost& host = chunks[0].hosts[0];
+ EXPECT_EQ(host.host, 0x61616161);
+ EXPECT_EQ(host.entry->prefix_count(), 260);
+
+ safe_browsing_util::FreeChunks(&chunks);
+}
+
+// Test parsing one sub chunk.
+TEST(SafeBrowsingProtocolParsingTest, TestSubChunk) {
+ std::string sub_chunk("s:9:4:59\naaaaxkkkk1111\003"
+ "zzzz2222zzzz3333zzzz4444"
+ "7777\002yyyy8888yyyy9999");
+ sub_chunk[13] = '\0';
+
+ // Run the parse.
+ SafeBrowsingProtocolParser parser;
+ bool re_key = false;
+ std::deque<SBChunk> chunks;
+ bool result = parser.ParseChunk(sub_chunk.data(),
+ static_cast<int>(sub_chunk.length()),
+ "", "", &re_key, &chunks);
+ EXPECT_TRUE(result);
+ EXPECT_FALSE(re_key);
+ EXPECT_EQ(chunks.size(), 1);
+ EXPECT_EQ(chunks[0].chunk_number, 9);
+ EXPECT_EQ(chunks[0].hosts.size(), 3);
+
+ EXPECT_EQ(chunks[0].hosts[0].host, 0x61616161);
+ SBEntry* entry = chunks[0].hosts[0].entry;
+ EXPECT_EQ(entry->type(), SBEntry::SUB_PREFIX);
+ EXPECT_EQ(entry->chunk_id(), 0x6b6b6b6b);
+ EXPECT_EQ(entry->prefix_count(), 0);
+
+ EXPECT_EQ(chunks[0].hosts[1].host, 0x31313131);
+ entry = chunks[0].hosts[1].entry;
+ EXPECT_EQ(entry->type(), SBEntry::SUB_PREFIX);
+ EXPECT_EQ(entry->prefix_count(), 3);
+ EXPECT_EQ(entry->ChunkIdAtPrefix(0), 0x7a7a7a7a);
+ EXPECT_EQ(entry->PrefixAt(0), 0x32323232);
+ EXPECT_EQ(entry->ChunkIdAtPrefix(1), 0x7a7a7a7a);
+ EXPECT_EQ(entry->PrefixAt(1), 0x33333333);
+ EXPECT_EQ(entry->ChunkIdAtPrefix(2), 0x7a7a7a7a);
+ EXPECT_EQ(entry->PrefixAt(2), 0x34343434);
+
+ EXPECT_EQ(chunks[0].hosts[2].host, 0x37373737);
+ entry = chunks[0].hosts[2].entry;
+ EXPECT_EQ(entry->type(), SBEntry::SUB_PREFIX);
+ EXPECT_EQ(entry->prefix_count(), 2);
+ EXPECT_EQ(entry->ChunkIdAtPrefix(0), 0x79797979);
+ EXPECT_EQ(entry->PrefixAt(0), 0x38383838);
+ EXPECT_EQ(entry->ChunkIdAtPrefix(1), 0x79797979);
+ EXPECT_EQ(entry->PrefixAt(1), 0x39393939);
+
+ safe_browsing_util::FreeChunks(&chunks);
+}
+
+// Test parsing one sub chunk with full hashes.
+TEST(SafeBrowsingProtocolParsingTest, TestSubFullChunk) {
+ std::string sub_chunk("s:1:32:77\naaaa");
+ sub_chunk.push_back(2);
+
+ SBFullHash full_hash1, full_hash2;
+ for (int i = 0; i < 32; ++i) {
+ full_hash1.full_hash[i] = i % 2 ? 1 : 2;
+ full_hash2.full_hash[i] = i % 2 ? 3 : 4;
+ }
+
+ sub_chunk.append("yyyy");
+ sub_chunk.append(full_hash1.full_hash, 32);
+ sub_chunk.append("zzzz");
+ sub_chunk.append(full_hash2.full_hash, 32);
+
+ // Run the parse.
+ SafeBrowsingProtocolParser parser;
+ bool re_key = false;
+ std::deque<SBChunk> chunks;
+ bool result = parser.ParseChunk(sub_chunk.data(),
+ static_cast<int>(sub_chunk.length()),
+ "", "", &re_key, &chunks);
+ EXPECT_TRUE(result);
+ EXPECT_FALSE(re_key);
+ EXPECT_EQ(chunks.size(), 1);
+ EXPECT_EQ(chunks[0].chunk_number, 1);
+ EXPECT_EQ(chunks[0].hosts.size(), 1);
+
+ EXPECT_EQ(chunks[0].hosts[0].host, 0x61616161);
+ SBEntry* entry = chunks[0].hosts[0].entry;
+ EXPECT_EQ(entry->type(), SBEntry::SUB_FULL_HASH);
+ EXPECT_EQ(entry->prefix_count(), 2);
+ EXPECT_EQ(entry->ChunkIdAtPrefix(0), 0x79797979);
+ EXPECT_TRUE(entry->FullHashAt(0) == full_hash1);
+ EXPECT_EQ(entry->ChunkIdAtPrefix(1), 0x7a7a7a7a);
+ EXPECT_TRUE(entry->FullHashAt(1) == full_hash2);
+
+ safe_browsing_util::FreeChunks(&chunks);
+}
+
+// Test parsing the SafeBrowsing update response.
+TEST(SafeBrowsingProtocolParsingTest, TestChunkDelete) {
+ std::string add_del("n:1700\ni:phishy\nad:1-7,43-597,44444,99999\n"
+ "i:malware\nsd:21-27,42,171717\n");
+
+ SafeBrowsingProtocolParser parser;
+ int next_query_sec = 0;
+ bool re_key = false;
+ bool reset = false;
+ std::vector<SBChunkDelete> deletes;
+ std::vector<ChunkUrl> urls;
+ EXPECT_TRUE(parser.ParseUpdate(add_del.data(),
+ static_cast<int>(add_del.length()), "",
+ &next_query_sec, &re_key,
+ &reset, &deletes, &urls));
+
+ EXPECT_TRUE(urls.empty());
+ EXPECT_FALSE(re_key);
+ EXPECT_FALSE(reset);
+ EXPECT_EQ(next_query_sec, 1700);
+ EXPECT_EQ(deletes.size(), 2);
+
+ EXPECT_EQ(deletes[0].chunk_del.size(), 4);
+ EXPECT_TRUE(deletes[0].chunk_del[0] == ChunkRange(1, 7));
+ EXPECT_TRUE(deletes[0].chunk_del[1] == ChunkRange(43, 597));
+ EXPECT_TRUE(deletes[0].chunk_del[2] == ChunkRange(44444));
+ EXPECT_TRUE(deletes[0].chunk_del[3] == ChunkRange(99999));
+
+ EXPECT_EQ(deletes[1].chunk_del.size(), 3);
+ EXPECT_TRUE(deletes[1].chunk_del[0] == ChunkRange(21, 27));
+ EXPECT_TRUE(deletes[1].chunk_del[1] == ChunkRange(42));
+ EXPECT_TRUE(deletes[1].chunk_del[2] == ChunkRange(171717));
+
+ // An update response with missing list name.
+
+ next_query_sec = 0;
+ deletes.clear();
+ urls.clear();
+ add_del = "n:1700\nad:1-7,43-597,44444,99999\ni:malware\nsd:4,21-27171717\n";
+ EXPECT_FALSE(parser.ParseUpdate(add_del.data(),
+ static_cast<int>(add_del.length()), "",
+ &next_query_sec, &re_key,
+ &reset, &deletes, &urls));
+}
+
+// Test parsing the SafeBrowsing update response.
+TEST(SafeBrowsingProtocolParsingTest, TestRedirects) {
+ std::string redirects("i:goog-malware-shavar\n"
+ "u:cache.googlevideo.com/safebrowsing/rd/goog-malware-shavar_s_1\n"
+ "u:cache.googlevideo.com/safebrowsing/rd/goog-malware-shavar_s_2\n"
+ "u:cache.googlevideo.com/safebrowsing/rd/goog-malware-shavar_s_3\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8641-8800:8641-8689,"
+ "8691-8731,8733-8786\n");
+
+ SafeBrowsingProtocolParser parser;
+ int next_query_sec = 0;
+ bool re_key = false;
+ bool reset = false;
+ std::vector<SBChunkDelete> deletes;
+ std::vector<ChunkUrl> urls;
+ EXPECT_TRUE(parser.ParseUpdate(redirects.data(),
+ static_cast<int>(redirects.length()), "",
+ &next_query_sec, &re_key,
+ &reset, &deletes, &urls));
+
+ EXPECT_FALSE(re_key);
+ EXPECT_FALSE(reset);
+ EXPECT_EQ(urls.size(), 4);
+ EXPECT_EQ(urls[0].url,
+ "cache.googlevideo.com/safebrowsing/rd/goog-malware-shavar_s_1");
+ EXPECT_EQ(urls[1].url,
+ "cache.googlevideo.com/safebrowsing/rd/goog-malware-shavar_s_2");
+ EXPECT_EQ(urls[2].url,
+ "cache.googlevideo.com/safebrowsing/rd/goog-malware-shavar_s_3");
+ EXPECT_EQ(urls[3].url,
+ "s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8641-8800:8641-8689,"
+ "8691-8731,8733-8786");
+ EXPECT_EQ(next_query_sec, 0);
+ EXPECT_TRUE(deletes.empty());
+}
+
+TEST(SafeBrowsingProtocolParsingTest, TestRedirectsWithMac) {
+ std::string redirects("i:goog-phish-shavar\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6501-6505:6501-6505,"
+ "pcY6iVeT9-CBQ3fdAF0rpnKjR1Y=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8001-8160:8001-8024,"
+ "8026-8045,8048-8049,8051-8134,8136-8152,8155-8160,"
+ "j6XXAEWnjYk9tVVLBSdQvIEq2Wg=\n");
+
+ SafeBrowsingProtocolParser parser;
+ int next_query_sec = 0;
+ bool re_key = false;
+ bool reset = false;
+ const std::string key("58Lqn5WIP961x3zuLGo5Uw==");
+ std::vector<SBChunkDelete> deletes;
+ std::vector<ChunkUrl> urls;
+ EXPECT_TRUE(parser.ParseUpdate(redirects.data(),
+ static_cast<int>(redirects.length()), key,
+ &next_query_sec, &re_key,
+ &reset, &deletes, &urls));
+
+ EXPECT_FALSE(re_key);
+ EXPECT_FALSE(reset);
+ EXPECT_EQ(urls.size(), 2);
+ EXPECT_EQ(urls[0].url,
+ "s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6501-6505:6501-6505");
+ EXPECT_EQ(urls[0].mac, "pcY6iVeT9-CBQ3fdAF0rpnKjR1Y=");
+ EXPECT_EQ(urls[1].url,
+ "s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8001-8160:8001-8024,"
+ "8026-8045,8048-8049,8051-8134,8136-8152,8155-8160");
+ EXPECT_EQ(urls[1].mac, "j6XXAEWnjYk9tVVLBSdQvIEq2Wg=");
+}
+
+// Test parsing various SafeBrowsing protocol headers.
+TEST(SafeBrowsingProtocolParsingTest, TestNextQueryTime) {
+ std::string headers("n:1800\ni:goog-white-shavar\n");
+ SafeBrowsingProtocolParser parser;
+ int next_query_sec = 0;
+ bool re_key = false;
+ bool reset = false;
+ std::vector<SBChunkDelete> deletes;
+ std::vector<ChunkUrl> urls;
+ EXPECT_TRUE(parser.ParseUpdate(headers.data(),
+ static_cast<int>(headers.length()), "",
+ &next_query_sec, &re_key,
+ &reset, &deletes, &urls));
+
+ EXPECT_EQ(next_query_sec, 1800);
+ EXPECT_FALSE(re_key);
+ EXPECT_FALSE(reset);
+ EXPECT_TRUE(deletes.empty());
+ EXPECT_TRUE(urls.empty());
+}
+
+// Test parsing data from a GetHashRequest
+TEST(SafeBrowsingProtocolParsingTest, TestGetHash) {
+ std::string get_hash("goog-phish-shavar:19:96\n"
+ "00112233445566778899aabbccddeeff"
+ "00001111222233334444555566667777"
+ "ffffeeeeddddccccbbbbaaaa99998888");
+ std::vector<SBFullHashResult> full_hashes;
+ bool re_key = false;
+ SafeBrowsingProtocolParser parser;
+ parser.ParseGetHash(get_hash.data(),
+ static_cast<int>(get_hash.length()), "",
+ &re_key,
+ &full_hashes);
+
+ EXPECT_FALSE(re_key);
+ EXPECT_EQ(full_hashes.size(), 3);
+ EXPECT_EQ(memcmp(&full_hashes[0].hash,
+ "00112233445566778899aabbccddeeff",
+ sizeof(SBFullHash)), 0);
+ EXPECT_EQ(full_hashes[0].list_name, "goog-phish-shavar");
+ EXPECT_EQ(memcmp(&full_hashes[1].hash,
+ "00001111222233334444555566667777",
+ sizeof(SBFullHash)), 0);
+ EXPECT_EQ(full_hashes[1].list_name, "goog-phish-shavar");
+ EXPECT_EQ(memcmp(&full_hashes[2].hash,
+ "ffffeeeeddddccccbbbbaaaa99998888",
+ sizeof(SBFullHash)), 0);
+ EXPECT_EQ(full_hashes[2].list_name, "goog-phish-shavar");
+
+ // Test multiple lists in the GetHash results.
+ std::string get_hash2("goog-phish-shavar:19:32\n"
+ "00112233445566778899aabbccddeeff"
+ "goog-malware-shavar:19:64\n"
+ "cafebeefcafebeefdeaddeaddeaddead"
+ "zzzzyyyyxxxxwwwwvvvvuuuuttttssss");
+ parser.ParseGetHash(get_hash2.data(),
+ static_cast<int>(get_hash2.length()), "",
+ &re_key,
+ &full_hashes);
+
+ EXPECT_FALSE(re_key);
+ EXPECT_EQ(full_hashes.size(), 3);
+ EXPECT_EQ(memcmp(&full_hashes[0].hash,
+ "00112233445566778899aabbccddeeff",
+ sizeof(SBFullHash)), 0);
+ EXPECT_EQ(full_hashes[0].list_name, "goog-phish-shavar");
+ EXPECT_EQ(memcmp(&full_hashes[1].hash,
+ "cafebeefcafebeefdeaddeaddeaddead",
+ sizeof(SBFullHash)), 0);
+ EXPECT_EQ(full_hashes[1].list_name, "goog-malware-shavar");
+ EXPECT_EQ(memcmp(&full_hashes[2].hash,
+ "zzzzyyyyxxxxwwwwvvvvuuuuttttssss",
+ sizeof(SBFullHash)), 0);
+ EXPECT_EQ(full_hashes[2].list_name, "goog-malware-shavar");
+}
+
+TEST(SafeBrowsingProtocolParsingTest, TestGetHashWithMac) {
+ // TODO(paulg): Bug: http://b/1084719, skip this test on Windows 2000 until
+ // this bug is fixed.
+ if (win_util::GetWinVersion() <= win_util::WINVERSION_2000)
+ return;
+
+ const unsigned char get_hash[] = {
+ 0x32, 0x56, 0x74, 0x6f, 0x6b, 0x36, 0x64, 0x41,
+ 0x51, 0x72, 0x65, 0x51, 0x62, 0x38, 0x51, 0x68,
+ 0x59, 0x45, 0x57, 0x51, 0x57, 0x4d, 0x52, 0x65,
+ 0x42, 0x63, 0x41, 0x3d, 0x0a, 0x67, 0x6f, 0x6f,
+ 0x67, 0x2d, 0x70, 0x68, 0x69, 0x73, 0x68, 0x2d,
+ 0x73, 0x68, 0x61, 0x76, 0x61, 0x72, 0x3a, 0x36,
+ 0x31, 0x36, 0x39, 0x3a, 0x33, 0x32, 0x0a, 0x17,
+ 0x7f, 0x03, 0x42, 0x28, 0x1c, 0x31, 0xb9, 0x0b,
+ 0x1c, 0x7b, 0x9d, 0xaf, 0x7b, 0x43, 0x99, 0x10,
+ 0xc1, 0xab, 0xe3, 0x1b, 0x35, 0x80, 0x38, 0x96,
+ 0xf9, 0x44, 0x4f, 0x28, 0xb4, 0xeb, 0x45
+ };
+
+ const unsigned char hash_result [] = {
+ 0x17, 0x7f, 0x03, 0x42, 0x28, 0x1c, 0x31, 0xb9,
+ 0x0b, 0x1c, 0x7b, 0x9d, 0xaf, 0x7b, 0x43, 0x99,
+ 0x10, 0xc1, 0xab, 0xe3, 0x1b, 0x35, 0x80, 0x38,
+ 0x96, 0xf9, 0x44, 0x4f, 0x28, 0xb4, 0xeb, 0x45
+ };
+
+ const std::string key = "58Lqn5WIP961x3zuLGo5Uw==";
+ std::vector<SBFullHashResult> full_hashes;
+ bool re_key = false;
+ SafeBrowsingProtocolParser parser;
+ EXPECT_TRUE(parser.ParseGetHash(reinterpret_cast<const char*>(get_hash),
+ sizeof(get_hash),
+ key,
+ &re_key,
+ &full_hashes));
+ EXPECT_FALSE(re_key);
+ EXPECT_EQ(full_hashes.size(), 1);
+ EXPECT_EQ(memcmp(hash_result, &full_hashes[0].hash, sizeof(SBFullHash)), 0);
+}
+
+
+TEST(SafeBrowsingProtocolParsingTest, TestFormatHash) {
+ SafeBrowsingProtocolParser parser;
+ std::vector<SBPrefix> prefixes;
+ std::string get_hash;
+
+ prefixes.push_back(0x34333231);
+ prefixes.push_back(0x64636261);
+ prefixes.push_back(0x73727170);
+
+ parser.FormatGetHash(prefixes, &get_hash);
+ EXPECT_EQ(get_hash, "4:12\n1234abcdpqrs");
+}
+
+TEST(SafeBrowsingProtocolParsingTest, TestGetKey) {
+ SafeBrowsingProtocolParser parser;
+ std::string key_response("clientkey:10:0123456789\n"
+ "wrappedkey:20:abcdefghijklmnopqrst\n");
+
+ std::string client_key, wrapped_key;
+ EXPECT_TRUE(parser.ParseNewKey(key_response.data(),
+ static_cast<int>(key_response.length()),
+ &client_key,
+ &wrapped_key));
+
+ EXPECT_EQ(client_key, "0123456789");
+ EXPECT_EQ(wrapped_key, "abcdefghijklmnopqrst");
+}
+
+TEST(SafeBrowsingProtocolParsingTest, TestReKey) {
+ SafeBrowsingProtocolParser parser;
+ std::string update("n:1800\ni:phishy\ne:pleaserekey\n");
+
+ bool re_key = false;
+ bool reset = false;
+ int next_update = -1;
+ std::vector<SBChunkDelete> deletes;
+ std::vector<ChunkUrl> urls;
+ EXPECT_TRUE(parser.ParseUpdate(update.data(),
+ static_cast<int>(update.size()), "",
+ &next_update, &re_key,
+ &reset, &deletes, &urls));
+ EXPECT_TRUE(re_key);
+}
+
+TEST(SafeBrowsingProtocolParsingTest, TestReset) {
+ SafeBrowsingProtocolParser parser;
+ std::string update("n:1800\ni:phishy\nr:pleasereset\n");
+
+ bool re_key = false;
+ bool reset = false;
+ int next_update = -1;
+ std::vector<SBChunkDelete> deletes;
+ std::vector<ChunkUrl> urls;
+ EXPECT_TRUE(parser.ParseUpdate(update.data(),
+ static_cast<int>(update.size()), "",
+ &next_update, &re_key,
+ &reset, &deletes, &urls));
+ EXPECT_TRUE(reset);
+}
+
+TEST(SafeBrowsingProtocolParsingTest, TestVerifyUpdateMac) {
+ // TODO(paulg): Bug: http://b/1084719, skip this test on Windows 2000 until
+ // this bug is fixed.
+ if (win_util::GetWinVersion() <= win_util::WINVERSION_2000)
+ return;
+
+ SafeBrowsingProtocolParser parser;
+
+ const std::string update =
+ "m:XIU0LiQhAPJq6dynXwHbygjS5tw=\n"
+ "n:1895\n"
+ "i:goog-phish-shavar\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6501-6505:6501-6505,pcY6iVeT9-CBQ3fdAF0rpnKjR1Y=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6506-6510:6506-6510,SDBrYC3rX3KEPe72LOypnP6QYac=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6511-6520:6511-6520,9UQo-e7OkcsXT2wFWTAhOuWOsUs=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6521-6560:6521-6560,qVNw6JIpR1q6PIXST7J4LJ9n3Zg=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6561-6720:6561-6720,7OiJvCbiwvpzPITW-hQohY5NHuc=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6721-6880:6721-6880,oBS3svhoi9deIa0sWZ_gnD0ujj8=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_6881-7040:6881-7040,a0r8Xit4VvH39xgyQHZTPczKBIE=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_s_7041-7200:7041-7163,q538LChutGknBw55s6kcE2wTcvU=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8001-8160:8001-8024,8026-8045,8048-8049,8051-8134,8136-8152,8155-8160,j6XXAEWnjYk9tVVLBSdQvIEq2Wg=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8161-8320:8161-8215,8217-8222,8224-8320,YaNfiqdQOt-uLCLWVLj46AZpAjQ=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8321-8480:8321-8391,8393-8399,8402,8404-8419,8421-8425,8427,8431-8433,8435-8439,8441-8443,8445-8446,8448-8480,ALj31GQMwGiIeU3bM2ZYKITfU-U=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8481-8640:8481-8500,8502-8508,8510-8511,8513-8517,8519-8525,8527-8531,8533,8536-8539,8541-8576,8578-8638,8640,TlQYRmS_kZ5PBAUIUyNQDq0Jprs=\n"
+ "u:s.ytimg.com/safebrowsing/rd/goog-phish-shavar_a_8641-8800:8641-8689,8691-8731,8733-8786,x1Qf7hdNrO8b6yym03ZzNydDS1o=\n";
+
+ bool re_key = false;
+ bool reset = false;
+ int next_update = -1;
+ std::vector<SBChunkDelete> deletes;
+ std::vector<ChunkUrl> urls;
+ const std::string key("58Lqn5WIP961x3zuLGo5Uw==");
+ EXPECT_TRUE(parser.ParseUpdate(update.data(),
+ static_cast<int>(update.size()), key,
+ &next_update, &re_key,
+ &reset, &deletes, &urls));
+ EXPECT_FALSE(re_key);
+ EXPECT_EQ(next_update, 1895);
+}
+
+TEST(SafeBrowsingProtocolParsingTest, TestVerifyChunkMac) {
+ // TODO(paulg): Bug: http://b/1084719, skip this test on Windows 2000 until
+ // this bug is fixed.
+ if (win_util::GetWinVersion() <= win_util::WINVERSION_2000)
+ return;
+
+ SafeBrowsingProtocolParser parser;
+
+ const unsigned char chunk[] = {
+ 0x73, 0x3a, 0x32, 0x30, 0x30, 0x32, 0x3a, 0x34,
+ 0x3a, 0x32, 0x32, 0x0a, 0x2f, 0x4f, 0x89, 0x7a,
+ 0x01, 0x00, 0x00, 0x0a, 0x59, 0xc8, 0x71, 0xdf,
+ 0x9d, 0x29, 0x0c, 0xba, 0xd7, 0x00, 0x00, 0x00,
+ 0x0a, 0x59
+ };
+
+ bool re_key = false;
+ std::deque<SBChunk> chunks;
+ const std::string key("v_aDSz6jI92WeHCOoZ07QA==");
+ const std::string mac("W9Xp2fUcQ9V66If6Cvsrstpa4Kk=");
+
+ EXPECT_TRUE(parser.ParseChunk(reinterpret_cast<const char*>(chunk),
+ sizeof(chunk), key, mac,
+ &re_key, &chunks));
+ EXPECT_FALSE(re_key);
+
+ safe_browsing_util::FreeChunks(&chunks);
+} \ No newline at end of file
diff --git a/chrome/browser/safe_browsing/safe_browsing_blocking_page.cc b/chrome/browser/safe_browsing/safe_browsing_blocking_page.cc
new file mode 100644
index 0000000..ec6f671
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_blocking_page.cc
@@ -0,0 +1,361 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Implementation of the SafeBrowsingBlockingPage class.
+
+#include "chrome/browser/safe_browsing/safe_browsing_blocking_page.h"
+
+#include "chrome/app/locales/locale_settings.h"
+#include "chrome/browser/browser_process.h"
+#include "chrome/browser/browser_resources.h"
+#include "chrome/browser/dom_operation_notification_details.h"
+#include "chrome/browser/google_util.h"
+#include "chrome/browser/navigation_controller.h"
+#include "chrome/browser/navigation_entry.h"
+#include "chrome/browser/tab_util.h"
+#include "chrome/browser/web_contents.h"
+#include "chrome/common/jstemplate_builder.h"
+#include "chrome/common/l10n_util.h"
+#include "chrome/common/resource_bundle.h"
+#include "generated_resources.h"
+#include "net/base/escape.h"
+
+
+// For malware interstitial pages, we link the problematic URL to Google's
+// diagnostic page.
+// TODO(paulg): Change 'googleclient' to a proper client name before launch.
+static const char* const kSbDiagnosticUrl =
+ "http://safebrowsing.clients.google.com/safebrowsing/diagnostic?site=%s&client=googleclient";
+
+static const char* const kSbReportPhishingUrl =
+ "http://www.google.com/safebrowsing/report_error/";
+
+static const wchar_t* const kSbDiagnosticHtml =
+ L"<a href=\"\" onClick=\"sendCommand(4); return false;\">%s</a>";
+
+// Created on the io_thread.
+SafeBrowsingBlockingPage::SafeBrowsingBlockingPage(
+ SafeBrowsingService* sb_service,
+ SafeBrowsingService::Client* client,
+ int render_process_host_id,
+ int render_view_id,
+ const GURL& url,
+ ResourceType::Type resource_type,
+ SafeBrowsingService::UrlCheckResult result)
+ : sb_service_(sb_service),
+ client_(client),
+ render_process_host_id_(render_process_host_id),
+ render_view_id_(render_view_id),
+ url_(url),
+ result_(result),
+ proceed_(false),
+ tab_(NULL),
+ controller_(NULL),
+ delete_pending_(false),
+ is_main_frame_(resource_type == ResourceType::MAIN_FRAME),
+ created_temporary_entry_(false) {
+}
+
+// Deleted on the io_thread.
+SafeBrowsingBlockingPage::~SafeBrowsingBlockingPage() {
+}
+
+void SafeBrowsingBlockingPage::DisplayBlockingPage() {
+ TabContents* tab = tab_util::GetTabContentsByID(render_process_host_id_,
+ render_view_id_);
+ if (!tab || tab->type() != TAB_CONTENTS_WEB) {
+ NotifyDone();
+ return;
+ }
+
+ tab_ = tab;
+ controller_ = tab->controller();
+
+ // Register for notifications of events from this tab.
+ NotificationService* ns = NotificationService::current();
+ DCHECK(ns);
+ ns->AddObserver(this, NOTIFY_TAB_CLOSING,
+ Source<NavigationController>(controller_));
+ ns->AddObserver(this, NOTIFY_DOM_OPERATION_RESPONSE,
+ Source<TabContents>(tab_));
+
+ // Hold an extra reference to ourself until the interstitial is gone.
+ AddRef();
+
+ WebContents* web_contents = tab->AsWebContents();
+
+ // Load the HTML page and create the template components.
+ DictionaryValue strings;
+ ResourceBundle& rb = ResourceBundle::GetSharedInstance();
+ std::string html;
+
+ if (result_ == SafeBrowsingService::URL_MALWARE) {
+ std::wstring link = StringPrintf(kSbDiagnosticHtml,
+ l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_DIAGNOSTIC_PAGE).c_str());
+
+ strings.SetString(L"badURL", UTF8ToWide(url_.host()));
+ strings.SetString(L"title",
+ l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_TITLE));
+ strings.SetString(L"headLine",
+ l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_HEADLINE));
+
+ // Check to see if we're blocking the main page, or a sub-resource on the
+ // main page.
+ GURL top_url = tab_->GetURL();
+ if (top_url == url_) {
+ strings.SetString(L"description1",
+ l10n_util::GetStringF(IDS_SAFE_BROWSING_MALWARE_DESCRIPTION1,
+ UTF8ToWide(url_.host())));
+ strings.SetString(L"description2",
+ l10n_util::GetStringF(IDS_SAFE_BROWSING_MALWARE_DESCRIPTION2,
+ link,
+ UTF8ToWide(url_.host())));
+ } else {
+ strings.SetString(L"description1",
+ l10n_util::GetStringF(IDS_SAFE_BROWSING_MALWARE_DESCRIPTION4,
+ UTF8ToWide(top_url.host()),
+ UTF8ToWide(url_.host())));
+ strings.SetString(L"description2",
+ l10n_util::GetStringF(IDS_SAFE_BROWSING_MALWARE_DESCRIPTION5,
+ link,
+ UTF8ToWide(url_.host())));
+ }
+
+ strings.SetString(L"description3",
+ l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_DESCRIPTION3));
+ strings.SetString(L"confirm_text",
+ l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_DESCRIPTION_AGREE));
+ strings.SetString(L"continue_button",
+ l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_PROCEED_BUTTON));
+ strings.SetString(L"back_button",
+ l10n_util::GetString(IDS_SAFE_BROWSING_MALWARE_BACK_BUTTON));
+ strings.SetString(L"textdirection",
+ (l10n_util::GetTextDirection() == l10n_util::RIGHT_TO_LEFT) ?
+ L"rtl" : L"ltr");
+ html = rb.GetDataResource(IDR_SAFE_BROWSING_MALWARE_BLOCK);
+ } else {
+ strings.SetString(L"title",
+ l10n_util::GetString(IDS_SAFE_BROWSING_PHISHING_TITLE));
+ strings.SetString(L"headLine",
+ l10n_util::GetString(IDS_SAFE_BROWSING_PHISHING_HEADLINE));
+ strings.SetString(L"description1",
+ l10n_util::GetStringF(IDS_SAFE_BROWSING_PHISHING_DESCRIPTION1,
+ UTF8ToWide(url_.host())));
+ strings.SetString(L"description2",
+ l10n_util::GetStringF(IDS_SAFE_BROWSING_PHISHING_DESCRIPTION2,
+ UTF8ToWide(url_.host())));
+
+ strings.SetString(L"continue_button",
+ l10n_util::GetString(IDS_SAFE_BROWSING_PHISHING_PROCEED_BUTTON));
+ strings.SetString(L"back_button",
+ l10n_util::GetString(IDS_SAFE_BROWSING_PHISHING_BACK_BUTTON));
+ strings.SetString(L"report_error",
+ l10n_util::GetString(IDS_SAFE_BROWSING_PHISHING_REPORT_ERROR));
+ strings.SetString(L"textdirection",
+ (l10n_util::GetTextDirection() == l10n_util::RIGHT_TO_LEFT) ?
+ L"rtl" : L"ltr");
+ html = rb.GetDataResource(IDR_SAFE_BROWSING_PHISHING_BLOCK);
+ }
+
+ std::string html_page(jstemplate_builder::GetTemplateHtml(html,
+ &strings,
+ "template_root"));
+
+ // If the malware is the actual main frame and we have no pending entry
+ // (typically the navigation was initiated by the page), we create a fake
+ // navigation entry (so the location bar shows the page's URL).
+ if (is_main_frame_ && tab_->controller()->GetPendingEntryIndex() == -1) {
+ // New navigation.
+ NavigationEntry* nav_entry = new NavigationEntry(TAB_CONTENTS_WEB);
+
+ // We set the page ID to max page id so to ensure the controller considers
+ // this dummy entry a new one. Because we'll remove the entry when the
+ // interstitial is going away, it will not conflict with any future
+ // navigations.
+ nav_entry->SetPageID(tab_->GetMaxPageID() + 1);
+ nav_entry->SetPageType(NavigationEntry::INTERSTITIAL_PAGE);
+ nav_entry->SetURL(url_);
+ tab_->controller()->DidNavigateToEntry(nav_entry);
+ created_temporary_entry_ = true;
+ }
+
+ // Show the interstitial page.
+ web_contents->ShowInterstitialPage(html_page, this);
+}
+
+void SafeBrowsingBlockingPage::Observe(NotificationType type,
+ const NotificationSource& source,
+ const NotificationDetails& details) {
+ switch (type) {
+ case NOTIFY_TAB_CLOSING:
+ HandleClose();
+ break;
+ case NOTIFY_DOM_OPERATION_RESPONSE:
+ Continue(Details<DomOperationNotificationDetails>(details)->json());
+ break;
+ default:
+ NOTREACHED();
+ }
+}
+
+void SafeBrowsingBlockingPage::InterstitialClosed() {
+ HandleClose();
+}
+
+bool SafeBrowsingBlockingPage::GoBack() {
+ WebContents* web_contents = tab_->AsWebContents();
+ NavigationEntry* prev_entry =
+ web_contents->controller()->GetEntryAtOffset(-1);
+
+ if (!prev_entry) {
+ // Nothing to go to, default to about:blank. Navigating will cause the
+ // interstitial to hide which will trigger "this" to be deleted.
+ tab_->controller()->LoadURL(GURL("about:blank"),
+ PageTransition::AUTO_BOOKMARK);
+ } else if (prev_entry->GetType() != TAB_CONTENTS_WEB ||
+ prev_entry->restored() ||
+ !is_main_frame_) {
+ // We do navigate back if any of these is true:
+ // - the page is not a WebContents, its TabContents might have to be
+ // recreated.
+ // - we have not yet visited that navigation entry (typically session
+ // restore), in which case the page is not already available.
+ // - the interstitial was triggered by a sub-resource. In that case we
+ // really need to navigate, just hiding the interstitial would show the
+ // page containing the bad resource, and we don't want that.
+ web_contents->controller()->GoBack();
+ } else {
+ // Otherwise, the user was viewing a page and navigated to a URL that was
+ // interrupted by an interstitial. Thus, we can just hide the interstitial
+ // and show the page the user was on before.
+ web_contents->HideInterstitialPage(false, false);
+ }
+
+ // WARNING: at this point we are now either deleted or pending deletion from
+ // the IO thread.
+
+ // Remove the navigation entry for the malware page. Note that we always
+ // remove the entry even if we did not create it as it has been flagged as
+ // malware and we don't want the user navigating back to it.
+ web_contents->controller()->RemoveLastEntry();
+
+ return true;
+}
+
+void SafeBrowsingBlockingPage::Continue(const std::string& user_action) {
+ TabContents* tab = tab_util::GetTabContentsByID(render_process_host_id_,
+ render_view_id_);
+ DCHECK(tab);
+ WebContents* web = tab->AsWebContents();
+ if (user_action == "2") {
+ // User pressed "Learn more".
+ GURL url;
+ if (result_ == SafeBrowsingService::URL_MALWARE) {
+ url = GURL(l10n_util::GetString(IDS_LEARN_MORE_MALWARE_URL));
+ } else if (result_ == SafeBrowsingService::URL_PHISHING) {
+ url = GURL(l10n_util::GetString(IDS_LEARN_MORE_PHISHING_URL));
+ } else {
+ NOTREACHED();
+ }
+ web->OpenURL(url, CURRENT_TAB, PageTransition::LINK);
+ return;
+ }
+ if (user_action == "3") {
+ // User pressed "Report error" for a phishing site.
+ // Note that we cannot just put a link in the interstitial at this point.
+ // It is not OK to navigate in the context of an interstitial page.
+ DCHECK(result_ == SafeBrowsingService::URL_PHISHING);
+ GURL report_url =
+ safe_browsing_util::GeneratePhishingReportUrl(kSbReportPhishingUrl,
+ url_.spec());
+ web->OpenURL(report_url, CURRENT_TAB, PageTransition::LINK);
+ return;
+ }
+ if (user_action == "4") {
+ // We're going to take the user to Google's SafeBrowsing diagnostic page.
+ std::string diagnostic =
+ StringPrintf(kSbDiagnosticUrl,
+ EscapeQueryParamValue(url_.spec()).c_str());
+ GURL diagnostic_url(diagnostic);
+ diagnostic_url = google_util::AppendGoogleLocaleParam(diagnostic_url);
+ DCHECK(result_ == SafeBrowsingService::URL_MALWARE);
+ web->OpenURL(diagnostic_url, CURRENT_TAB, PageTransition::LINK);
+ return;
+ }
+
+ proceed_ = user_action == "1";
+
+ if (proceed_) {
+ // We are continuing, if we have created a temporary navigation entry,
+ // delete it as a new will be created on navigation.
+ if (created_temporary_entry_)
+ web->controller()->RemoveLastEntry();
+ if (is_main_frame_)
+ web->HideInterstitialPage(true, true);
+ else
+ web->HideInterstitialPage(false, false);
+ } else {
+ GoBack();
+ }
+
+ NotifyDone();
+}
+
+void SafeBrowsingBlockingPage::HandleClose() {
+ NotificationService* ns = NotificationService::current();
+ DCHECK(ns);
+ ns->RemoveObserver(this, NOTIFY_TAB_CLOSING,
+ Source<NavigationController>(controller_));
+ ns->RemoveObserver(this, NOTIFY_DOM_OPERATION_RESPONSE,
+ Source<TabContents>(tab_));
+
+ NotifyDone();
+ Release();
+}
+
+void SafeBrowsingBlockingPage::NotifyDone() {
+ if (delete_pending_)
+ return;
+
+ delete_pending_ = true;
+
+ if (tab_ && tab_->AsWebContents()) {
+ // Ensure the WebContents does not keep a pointer to us.
+ tab_->AsWebContents()->set_interstitial_delegate(NULL);
+ }
+
+ Thread* io_thread = g_browser_process->io_thread();
+ if (!io_thread)
+ return;
+
+ io_thread->message_loop()->PostTask(FROM_HERE, NewRunnableMethod(
+ sb_service_,
+ &SafeBrowsingService::OnBlockingPageDone,
+ this, client_, proceed_));
+}
diff --git a/chrome/browser/safe_browsing/safe_browsing_blocking_page.h b/chrome/browser/safe_browsing/safe_browsing_blocking_page.h
new file mode 100644
index 0000000..90dc03a
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_blocking_page.h
@@ -0,0 +1,135 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Classes for managing the SafeBrowsing interstitial pages.
+//
+// When a user is about to visit a page the SafeBrowsing system has deemed to
+// be malicious, either as malware or a phishing page, we show an interstitial
+// page with some options (go back, continue) to give the user a chance to avoid
+// the harmful page.
+//
+// The SafeBrowsingBlockingPage is created by the SafeBrowsingService on the IO
+// thread when we've determined that a page is malicious. The operation of the
+// blocking page occurs on the UI thread, where it waits for the user to make a
+// decision about what to do: either go back or continue on.
+//
+// The blocking page forwards the result of the user's choice back to the
+// SafeBrowsingService so that we can cancel the request for the new page, or
+// or allow it to continue.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_BLOCKING_PAGE_H__
+#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_BLOCKING_PAGE_H__
+
+#include "base/logging.h"
+#include "chrome/browser/interstitial_page_delegate.h"
+#include "chrome/browser/safe_browsing/safe_browsing_service.h"
+#include "chrome/common/notification_service.h"
+#include "googleurl/src/gurl.h"
+
+class MessageLoop;
+class TabContents;
+class NavigationController;
+
+class SafeBrowsingBlockingPage
+ : public InterstitialPageDelegate,
+ public base::RefCountedThreadSafe<SafeBrowsingBlockingPage>,
+ public NotificationObserver {
+ public:
+ // Created and destroyed on the IO thread, operates on the UI thread.
+ SafeBrowsingBlockingPage(SafeBrowsingService* service,
+ SafeBrowsingService::Client* client,
+ int render_process_host_id,
+ int render_view_id,
+ const GURL& url,
+ ResourceType::Type resource_type,
+ SafeBrowsingService::UrlCheckResult result);
+ ~SafeBrowsingBlockingPage();
+
+ // Display the page to the user. This method runs on the UI thread.
+ void DisplayBlockingPage();
+
+ // NotificationObserver interface, runs on the UI thread.
+ virtual void Observe(NotificationType type,
+ const NotificationSource& source,
+ const NotificationDetails& details);
+
+ const GURL& url() { return url_; }
+ int render_process_host_id() { return render_process_host_id_; }
+ int render_view_id() { return render_view_id_; }
+ SafeBrowsingService::UrlCheckResult result() { return result_; }
+
+ // InterstitialPageDelegate methods:
+ virtual void InterstitialClosed();
+ virtual bool GoBack();
+
+ private:
+ // Handle user action for blocking page navigation choices.
+ void Continue(const std::string& user_action);
+
+ // Tell the SafeBrowsingService that the handling of the current page is done.
+ void HandleClose();
+ void NotifyDone();
+
+ private:
+ // For reporting back user actions.
+ SafeBrowsingService* sb_service_;
+ SafeBrowsingService::Client* client_;
+ MessageLoop* report_loop_;
+
+ // For determining which tab to block.
+ int render_process_host_id_;
+ int render_view_id_;
+
+ GURL url_;
+ SafeBrowsingService::UrlCheckResult result_;
+
+ // Inform the SafeBrowsingService whether we are continuing with this page
+ // load or going back to the previous page.
+ bool proceed_;
+
+ // Stored for use in the notification service, and are only used for their
+ // pointer value, but not for calling methods on. This is done to allow us to
+ // unregister as observers after the tab has gone (is NULL).
+ TabContents* tab_;
+ NavigationController* controller_;
+
+ // Used for cleaning up after ourself.
+ bool delete_pending_;
+
+ // Whether the flagged resource is the main page (or a sub-resource is false).
+ bool is_main_frame_;
+
+ // Whether we have created a temporary navigation entry as part of showing
+ // the blocking page.
+ bool created_temporary_entry_;
+
+ DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingBlockingPage);
+};
+
+#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_BLOCKING_PAGE_H__ \ No newline at end of file
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc
new file mode 100644
index 0000000..9e7c223
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_database.cc
@@ -0,0 +1,1283 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "chrome/browser/safe_browsing/safe_browsing_database.h"
+
+#include "base/file_util.h"
+#include "base/logging.h"
+#include "base/message_loop.h"
+#include "base/sha2.h"
+#include "base/string_util.h"
+#include "chrome/browser/safe_browsing/bloom_filter.h"
+#include "chrome/browser/safe_browsing/chunk_range.h"
+#include "chrome/common/sqlite_compiled_statement.h"
+#include "chrome/common/sqlite_utils.h"
+
+// Database version. If this is different than what's stored on disk, the
+// database is reset.
+static const int kDatabaseVersion = 2;
+
+// Filename suffix for the bloom filter.
+static const wchar_t kBloomFilterFile[] = L" Filter";
+
+// Don't want to create too small of a bloom filter initially while we're
+// downloading the data and then keep having to rebuild it.
+static const int kBloomFilterMinSize = 250000;
+
+// How many bits to use per item. See the design doc for more information.
+static const int kBloomFilterSizeRatio = 13;
+
+// The minimum number of reads/misses before we will consider rebuilding the
+// bloom filter. This is needed because we don't want a few misses after
+// starting the browser to skew the percentage.
+// TODO(jabdelmalek): report to UMA how often we rebuild.
+static const int kBloomFilterMinReadsToCheckFP = 200;
+
+// The percentage of hit rate in the bloom filter when we regenerate it.
+static const double kBloomFilterMaxFPRate = 5.0;
+
+// When we awake from a low power state, we try to avoid doing expensive disk
+// operations for a few minutes to let the system page itself in and settle
+// down.
+static const int kOnResumeHoldupMs = 5 * 60 * 1000; // 5 minutes.
+
+// When doing any database operations that can take a long time, we do it in
+// small chunks up to this amount. Once this much time passes, we sleep for
+// the same amount and continue. This avoids blocking the thread so that if
+// we get a bloom filter hit, we don't block the network request.
+static const int kMaxThreadHoldupMs = 100;
+
+// How long to wait after updating the database to write the bloom filter.
+static const int kBloomFilterWriteDelayMs = (60 * 1000);
+
+// The maximum staleness for a cached entry.
+static const int kMaxStalenessMinutes = 45;
+
+SafeBrowsingDatabase::SafeBrowsingDatabase()
+ : db_(NULL),
+ init_(false),
+ transaction_count_(0),
+ asynchronous_(true),
+ chunk_inserted_callback_(NULL),
+#pragma warning(suppress: 4355) // can use this
+ bloom_read_factory_(this),
+#pragma warning(suppress: 4355) // can use this
+ bloom_write_factory_(this),
+#pragma warning(suppress: 4355) // can use this
+ process_factory_(this),
+#pragma warning(suppress: 4355) // can use this
+ reset_factory_(this),
+#pragma warning(suppress: 4355) // can use this
+ resume_factory_(this),
+ disk_delay_(kMaxThreadHoldupMs) {
+}
+
+SafeBrowsingDatabase::~SafeBrowsingDatabase() {
+ Close();
+}
+
+bool SafeBrowsingDatabase::Init(const std::wstring& filename,
+ Callback0::Type* chunk_inserted_callback) {
+ DCHECK(!init_ && filename_.empty());
+
+ filename_ = filename;
+ if (!Open())
+ return false;
+
+ bool load_filter = false;
+ if (!DoesSqliteTableExist(db_, "hosts")) {
+ if (!CreateTables()) {
+ // Database could be corrupt, try starting from scratch.
+ if (!ResetDatabase())
+ return false;
+ }
+ } else if (!CheckCompatibleVersion()) {
+ if (!ResetDatabase())
+ return false;
+ } else {
+ load_filter = true;
+ }
+
+ bloom_filter_filename_ = BloomFilterFilename(filename_);
+
+ if (load_filter) {
+ LoadBloomFilter();
+ } else {
+ bloom_filter_.reset(
+ new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio));
+ }
+
+ init_ = true;
+ chunk_inserted_callback_ = chunk_inserted_callback;
+ return true;
+}
+
+bool SafeBrowsingDatabase::Open() {
+ if (sqlite3_open(WideToUTF8(filename_).c_str(), &db_) != SQLITE_OK)
+ return false;
+
+ // Run the database in exclusive mode. Nobody else should be accessing the
+ // database while we're running, and this will give somewhat improved perf.
+ sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL);
+
+ statement_cache_.reset(new SqliteStatementCache(db_));
+ bloom_filter_read_count_= 0;
+ bloom_filter_fp_count_ = 0;
+ bloom_filter_building_ = false;
+
+ process_factory_.RevokeAll();
+ bloom_read_factory_.RevokeAll();
+ bloom_write_factory_.RevokeAll();
+
+ return true;
+}
+
+bool SafeBrowsingDatabase::Close() {
+ if (!db_)
+ return true;
+
+ process_factory_.RevokeAll();
+ bloom_read_factory_.RevokeAll();
+ bloom_write_factory_.RevokeAll();
+
+ if (!pending_add_del_.empty()) {
+ while (!pending_add_del_.empty())
+ pending_add_del_.pop();
+
+ EndTransaction();
+ }
+
+ while (!pending_chunks_.empty()) {
+ std::deque<SBChunk>* chunks = pending_chunks_.front();
+ safe_browsing_util::FreeChunks(chunks);
+ delete chunks;
+ pending_chunks_.pop();
+ EndTransaction();
+ }
+
+ statement_cache_.reset(); // Must free statements before closing DB.
+ transaction_.reset();
+ bool result = sqlite3_close(db_) == SQLITE_OK;
+ db_ = NULL;
+ return result;
+}
+
+bool SafeBrowsingDatabase::CreateTables() {
+ SQLTransaction transaction(db_);
+ transaction.Begin();
+
+ // We use an autoincrement integer as the primary key to allow full table
+ // scans to be quick. Otherwise if we used host, then we'd have to jump
+ // all over the table when doing a full table scan to generate the bloom
+ // filter and that's an order of magnitude slower. By marking host as
+ // unique, an index is created automatically.
+ if (sqlite3_exec(db_, "CREATE TABLE hosts ("
+ "id INTEGER PRIMARY KEY AUTOINCREMENT,"
+ "host INTEGER UNIQUE,"
+ "entries BLOB)",
+ NULL, NULL, NULL) != SQLITE_OK) {
+ return false;
+ }
+
+ if (sqlite3_exec(db_, "CREATE TABLE chunks ("
+ "list_id INTEGER,"
+ "chunk_type INTEGER,"
+ "chunk_id INTEGER,"
+ "hostkeys TEXT)",
+ NULL, NULL, NULL) != SQLITE_OK) {
+ return false;
+ }
+
+ if (sqlite3_exec(db_, "CREATE TABLE list_names ("
+ "id INTEGER PRIMARY KEY AUTOINCREMENT,"
+ "name TEXT)",
+ NULL, NULL, NULL) != SQLITE_OK) {
+ return false;
+ }
+
+ sqlite3_exec(db_, "CREATE INDEX chunks_chunk_id ON chunks(chunk_id)",
+ NULL, NULL, NULL);
+
+ std::string version = "PRAGMA user_version=";
+ version += StringPrintf("%d", kDatabaseVersion);
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, version.c_str());
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ if (statement->step() != SQLITE_DONE)
+ return false;
+
+ transaction.Commit();
+ return true;
+}
+
+// The SafeBrowsing service assumes this operation is synchronous.
+bool SafeBrowsingDatabase::ResetDatabase() {
+ hash_cache_.clear();
+
+ bool rv = Close();
+ DCHECK(rv);
+
+ if (!file_util::Delete(filename_, false)) {
+ NOTREACHED();
+ return false;
+ }
+
+ bloom_filter_.reset(
+ new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio));
+ file_util::Delete(bloom_filter_filename_, false);
+
+ if (!Open())
+ return false;
+
+ return CreateTables();
+}
+
+bool SafeBrowsingDatabase::CheckCompatibleVersion() {
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "PRAGMA user_version");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ int result = statement->step();
+ if (result != SQLITE_ROW)
+ return false;
+
+ return statement->column_int(0) == kDatabaseVersion;
+}
+
+bool SafeBrowsingDatabase::ContainsUrl(
+ const GURL& url,
+ std::string* matching_list,
+ std::vector<SBPrefix>* prefix_hits,
+ std::vector<SBFullHashResult>* full_hits,
+ Time last_update) {
+ matching_list->clear();
+ prefix_hits->clear();
+ if (!init_) {
+ DCHECK(false);
+ return false;
+ }
+
+ if (!url.is_valid())
+ return false;
+
+ std::vector<std::string> hosts, paths;
+ safe_browsing_util::GenerateHostsToCheck(url, &hosts);
+ safe_browsing_util::GeneratePathsToCheck(url, &paths);
+ if (hosts.size() == 0)
+ return false;
+
+ // Per the spec, if there is at least 3 components, check both the most
+ // significant three components and the most significant two components.
+ // If only two components, check the most significant two components.
+ // If it's an IP address, use the entire IP address as the host.
+ SBPrefix host_key_2, host_key_3, host_key_ip;
+ if (url.HostIsIPAddress()) {
+ base::SHA256HashString(url.host() + "/", &host_key_ip, sizeof(SBPrefix));
+ CheckUrl(url.host(), host_key_ip, paths, matching_list, prefix_hits);
+ } else {
+ base::SHA256HashString(hosts[0] + "/", &host_key_2, sizeof(SBPrefix));
+ if (hosts.size() > 1)
+ base::SHA256HashString(hosts[1] + "/", &host_key_3, sizeof(SBPrefix));
+
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ SBPrefix host_key = i == 0 ? host_key_2 : host_key_3;
+ CheckUrl(hosts[i], host_key, paths, matching_list, prefix_hits);
+ }
+ }
+
+ if (!matching_list->empty() || !prefix_hits->empty()) {
+ GetCachedFullHashes(prefix_hits, full_hits, last_update);
+ return true;
+ }
+
+ // Check if we're getting too many FPs in the bloom filter, in which case
+ // it's time to rebuild it.
+ bloom_filter_fp_count_++;
+ if (!bloom_filter_building_ &&
+ bloom_filter_read_count_ > kBloomFilterMinReadsToCheckFP) {
+ double fp_rate = bloom_filter_fp_count_ * 100 / bloom_filter_read_count_;
+ if (fp_rate > kBloomFilterMaxFPRate) {
+ DeleteBloomFilter();
+ MessageLoop::current()->PostTask(FROM_HERE,
+ bloom_read_factory_.NewRunnableMethod(
+ &SafeBrowsingDatabase::BuildBloomFilter));
+ }
+ }
+
+ return false;
+}
+
+void SafeBrowsingDatabase::CheckUrl(const std::string& host,
+ SBPrefix host_key,
+ const std::vector<std::string>& paths,
+ std::string* matching_list,
+ std::vector<SBPrefix>* prefix_hits) {
+ // First see if there are any entries in the db for this host.
+ SBHostInfo info;
+ if (!ReadInfo(host_key, &info, NULL))
+ return; // No hostkey found. This is definitely safe.
+
+ std::vector<SBFullHash> prefixes;
+ prefixes.resize(paths.size());
+ for (size_t i = 0; i < paths.size(); ++i)
+ base::SHA256HashString(host + paths[i], &prefixes[i], sizeof(SBFullHash));
+
+ std::vector<SBPrefix> hits;
+ int list_id = -1;
+ if (!info.Contains(prefixes, &list_id, &hits))
+ return;
+
+ if (list_id != -1) {
+ *matching_list = GetListName(list_id);
+ } else if (hits.empty()) {
+ prefix_hits->push_back(host_key);
+ } else {
+ for (size_t i = 0; i < hits.size(); ++i)
+ prefix_hits->push_back(hits[i]);
+ }
+}
+
+bool SafeBrowsingDatabase::ReadInfo(int host_key, SBHostInfo* info, int* id) {
+ STATS_COUNTER(L"SB.HostSelect", 1);
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT id, entries FROM hosts WHERE host=?");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ statement->bind_int(0, host_key);
+ int result = statement->step();
+ if (result == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ return false;
+ }
+
+ if (result == SQLITE_DONE)
+ return false;
+
+ if (result != SQLITE_ROW) {
+ DLOG(ERROR) << "SafeBrowsingDatabase got "
+ "statement->step() != SQLITE_ROW for "
+ << host_key;
+ return false;
+ }
+
+ if (id)
+ *id = statement->column_int(0);
+
+ return info->Initialize(statement->column_blob(1),
+ statement->column_bytes(1));
+}
+
+void SafeBrowsingDatabase::WriteInfo(int host_key,
+ const SBHostInfo& info,
+ int id) {
+ SQLITE_UNIQUE_STATEMENT(statement1, *statement_cache_,
+ "INSERT OR REPLACE INTO hosts"
+ "(host,entries)"
+ "VALUES (?,?)");
+
+ SQLITE_UNIQUE_STATEMENT(statement2, *statement_cache_,
+ "INSERT OR REPLACE INTO hosts"
+ "(id,host,entries)"
+ "VALUES (?,?,?)");
+
+ SqliteCompiledStatement& statement = id == 0 ? statement1 : statement2;
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ int start_index = 0;
+ if (id != 0) {
+ statement->bind_int(start_index++, id);
+ STATS_COUNTER(L"SB.HostReplace", 1);
+ } else {
+ STATS_COUNTER(L"SB.HostInsert", 1);
+ }
+
+ statement->bind_int(start_index++, host_key);
+ statement->bind_blob(start_index++, info.data(), info.size());
+ int rv = statement->step();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ } else {
+ DCHECK(rv == SQLITE_DONE);
+ }
+ AddHostToBloomFilter(host_key);
+}
+
+void SafeBrowsingDatabase::DeleteInfo(int host_key) {
+ STATS_COUNTER(L"SB.HostDelete", 1);
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "DELETE FROM hosts WHERE host=?");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ statement->bind_int(0, host_key);
+ int rv = statement->step();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ } else {
+ DCHECK(rv == SQLITE_DONE);
+ }
+}
+
+void SafeBrowsingDatabase::StartThrottledWork() {
+ if (process_factory_.empty())
+ RunThrottledWork();
+}
+
+void SafeBrowsingDatabase::RunThrottledWork() {
+ while (true) {
+ bool done = ProcessChunks();
+
+ if (done)
+ done = ProcessAddDel();
+
+ if (done)
+ break;
+
+ if (asynchronous_) {
+ // For production code, we want to throttle by calling InvokeLater to
+ // continue the work after a delay. However for unit tests we depend on
+ // updates to happen synchronously.
+ MessageLoop::current()->PostDelayedTask(FROM_HERE,
+ process_factory_.NewRunnableMethod(
+ &SafeBrowsingDatabase::RunThrottledWork), disk_delay_);
+ break;
+ } else {
+ Sleep(kMaxThreadHoldupMs);
+ }
+ }
+}
+
+void SafeBrowsingDatabase::InsertChunks(const std::string& list_name,
+ std::deque<SBChunk>* chunks) {
+ // We've going to be updating the bloom filter, so delete the on-disk
+ // serialization so that if the process crashes we'll generate a new one on
+ // startup, instead of reading a stale filter.
+ DeleteBloomFilter();
+
+ int list_id = GetListID(list_name);
+ std::deque<SBChunk>::iterator i = chunks->begin();
+ for (; i != chunks->end(); ++i) {
+ SBChunk& chunk = (*i);
+ std::deque<SBChunkHost>::iterator j = chunk.hosts.begin();
+ for (; j != chunk.hosts.end(); ++j) {
+ j->entry->set_list_id(list_id);
+ if (j->entry->IsAdd())
+ j->entry->set_chunk_id(chunk.chunk_number);
+ }
+ }
+
+ pending_chunks_.push(chunks);
+
+ BeginTransaction();
+ StartThrottledWork();
+}
+
+bool SafeBrowsingDatabase::ProcessChunks() {
+ if (pending_chunks_.empty())
+ return true;
+
+ while (!pending_chunks_.empty()) {
+ std::deque<SBChunk>* chunks = pending_chunks_.front();
+ bool done = false;
+ // The entries in one chunk are all either adds or subs.
+ if (chunks->front().hosts.front().entry->IsAdd()) {
+ done = ProcessAddChunks(chunks);
+ } else {
+ done = ProcessSubChunks(chunks);
+ }
+
+ if (!done)
+ return false;
+
+ delete chunks;
+ pending_chunks_.pop();
+ EndTransaction();
+ }
+
+ if (!bloom_filter_building_) {
+ if (asynchronous_) {
+ // When we're updating, there will usually be a bunch of pending_chunks_
+ // to process, and we don't want to keep writing the bloom filter to disk
+ // 10 or 20 times unnecessarily. So schedule to write it in a minute, and
+ // if any new updates happen in the meantime, push that forward.
+ if (!bloom_write_factory_.empty())
+ bloom_write_factory_.RevokeAll();
+
+ MessageLoop::current()->PostDelayedTask(FROM_HERE,
+ bloom_write_factory_.NewRunnableMethod(
+ &SafeBrowsingDatabase::WriteBloomFilter),
+ kBloomFilterWriteDelayMs);
+ } else {
+ WriteBloomFilter();
+ }
+ }
+
+ if (chunk_inserted_callback_)
+ chunk_inserted_callback_->Run();
+
+ return true;
+}
+
+bool SafeBrowsingDatabase::ProcessAddChunks(std::deque<SBChunk>* chunks) {
+ Time before = Time::Now();
+ while (!chunks->empty()) {
+ SBChunk& chunk = chunks->front();
+ int list_id = chunk.hosts.front().entry->list_id();
+ int chunk_id = chunk.chunk_number;
+
+ // The server can give us a chunk that we already have because it's part of
+ // a range. Don't add it again.
+ if (!ChunkExists(list_id, ADD_CHUNK, chunk_id)) {
+ while (!chunk.hosts.empty()) {
+ // Read the existing record for this host, if it exists.
+ SBPrefix host = chunk.hosts.front().host;
+ SBEntry* entry = chunk.hosts.front().entry;
+
+ UpdateInfo(host, entry, false);
+
+ if (!add_chunk_modified_hosts_.empty())
+ add_chunk_modified_hosts_.append(",");
+
+ add_chunk_modified_hosts_.append(StringPrintf("%d", host));
+
+ entry->Destroy();
+ chunk.hosts.pop_front();
+ if (!chunk.hosts.empty() &&
+ (Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) {
+ return false;
+ }
+ }
+
+ AddChunkInformation(list_id, ADD_CHUNK, chunk_id,
+ add_chunk_modified_hosts_);
+ add_chunk_modified_hosts_.clear();
+ }
+
+ chunks->pop_front();
+ }
+
+ return true;
+}
+
+bool SafeBrowsingDatabase::ProcessSubChunks(std::deque<SBChunk>* chunks) {
+ Time before = Time::Now();
+ while (!chunks->empty()) {
+ SBChunk& chunk = chunks->front();
+ int list_id = chunk.hosts.front().entry->list_id();
+ int chunk_id = chunk.chunk_number;
+
+ if (!ChunkExists(list_id, SUB_CHUNK, chunk_id)) {
+ while (!chunk.hosts.empty()) {
+ SBPrefix host = chunk.hosts.front().host;
+ SBEntry* entry = chunk.hosts.front().entry;
+ UpdateInfo(host, entry, true);
+
+ entry->Destroy();
+ chunk.hosts.pop_front();
+ if (!chunk.hosts.empty() &&
+ (Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) {
+ return false;
+ }
+ }
+
+ AddChunkInformation(list_id, SUB_CHUNK, chunk_id, "");
+ }
+
+ chunks->pop_front();
+ }
+
+ return true;
+}
+
+void SafeBrowsingDatabase::UpdateInfo(SBPrefix host_key,
+ SBEntry* entry,
+ bool persist) {
+ // If an existing record exists, and the new record is smaller, then reuse
+ // its entry to reduce database fragmentation.
+ int old_id = 0;
+ SBHostInfo info;
+ // If the bloom filter isn't there, then assume that the entry exists,
+ // otherwise test the bloom filter.
+ bool exists = !bloom_filter_.get() || bloom_filter_->Exists(host_key);
+ if (exists)
+ exists = ReadInfo(host_key, &info, &old_id);
+ int old_size = info.size();
+
+ if (entry->IsAdd()) {
+ info.AddPrefixes(entry);
+ } else {
+ ClearCachedHashes(entry);
+ info.RemovePrefixes(entry, persist);
+ }
+
+ if (old_size == info.size()) {
+ // The entry didn't change, so no point writing it.
+ return;
+ }
+
+ if (!info.size()) {
+ // Just delete the existing information instead of writing an empty one.
+ if (exists)
+ DeleteInfo(host_key);
+ return;
+ }
+
+ if (info.size() > old_size) {
+ // New record is larger, so just add a new entry.
+ old_id = 0;
+ }
+
+ WriteInfo(host_key, info, old_id);
+}
+
+void SafeBrowsingDatabase::DeleteChunks(
+ std::vector<SBChunkDelete>* chunk_deletes) {
+ BeginTransaction();
+ bool pending_add_del_were_empty = pending_add_del_.empty();
+
+ for (size_t i = 0; i < chunk_deletes->size(); ++i) {
+ const SBChunkDelete& chunk = (*chunk_deletes)[i];
+ std::vector<int> chunk_numbers;
+ RangesToChunks(chunk.chunk_del, &chunk_numbers);
+ for (size_t del = 0; del < chunk_numbers.size(); ++del) {
+ if (chunk.is_sub_del) {
+ SubDel(chunk.list_name, chunk_numbers[del]);
+ } else {
+ AddDel(chunk.list_name, chunk_numbers[del]);
+ }
+ }
+ }
+
+ if (pending_add_del_were_empty && !pending_add_del_.empty()) {
+ // Only start a transaction for pending AddDel work if we haven't started
+ // one already.
+ BeginTransaction();
+ StartThrottledWork();
+ }
+
+ delete chunk_deletes;
+ EndTransaction();
+}
+
+void SafeBrowsingDatabase::AddDel(const std::string& list_name,
+ int add_chunk_id) {
+ STATS_COUNTER(L"SB.ChunkSelect", 1);
+ int list_id = GetListID(list_name);
+ // Find all the prefixes that came from the given add_chunk_id.
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT hostkeys FROM chunks WHERE "
+ "list_id=? AND chunk_type=? AND chunk_id=?");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ std::string hostkeys_str;
+ statement->bind_int(0, list_id);
+ statement->bind_int(1, ADD_CHUNK);
+ statement->bind_int(2, add_chunk_id);
+ int rv = statement->step();
+ if (rv != SQLITE_ROW || !statement->column_string(0, &hostkeys_str)) {
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ } else {
+ NOTREACHED();
+ }
+
+ return;
+ }
+
+ AddDelWork work;
+ work.list_id = list_id;
+ work.add_chunk_id = add_chunk_id;
+ pending_add_del_.push(work);
+ SplitString(hostkeys_str, ',', &pending_add_del_.back().hostkeys);
+}
+
+bool SafeBrowsingDatabase::ProcessAddDel() {
+ if (pending_add_del_.empty())
+ return true;
+
+ Time before = Time::Now();
+ while (!pending_add_del_.empty()) {
+ AddDelWork& add_del_work = pending_add_del_.front();
+ ClearCachedHashesForChunk(add_del_work.list_id, add_del_work.add_chunk_id);
+ std::vector<std::string>& hostkeys = add_del_work.hostkeys;
+ for (size_t i = 0; i < hostkeys.size(); ++i) {
+ SBPrefix host = atoi(hostkeys[i].c_str());
+ // Doesn't matter if we use SUB_PREFIX or SUB_FULL_HASH since if there
+ // are no prefixes it's not used.
+ SBEntry* entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0);
+ entry->set_list_id(add_del_work.list_id);
+ entry->set_chunk_id(add_del_work.add_chunk_id);
+ UpdateInfo(host, entry, false);
+ entry->Destroy();
+ if ((Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) {
+ hostkeys.erase(hostkeys.begin(), hostkeys.begin() + i);
+ return false;
+ }
+ }
+
+ RemoveChunkId(add_del_work.list_id, ADD_CHUNK, add_del_work.add_chunk_id);
+ pending_add_del_.pop();
+ }
+
+ EndTransaction();
+
+ return true;
+}
+
+void SafeBrowsingDatabase::SubDel(const std::string& list_name,
+ int sub_chunk_id) {
+ RemoveChunkId(GetListID(list_name), SUB_CHUNK, sub_chunk_id);
+}
+
+void SafeBrowsingDatabase::AddChunkInformation(int list_id,
+ ChunkType type,
+ int chunk_id,
+ const std::string& hostkeys) {
+ STATS_COUNTER(L"SB.ChunkInsert", 1);
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "INSERT INTO chunks"
+ "(list_id,chunk_type,chunk_id,hostkeys)"
+ "VALUES (?,?,?,?)");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ statement->bind_int(0, list_id);
+ statement->bind_int(1, type);
+ statement->bind_int(2, chunk_id);
+ statement->bind_string(3, hostkeys);
+ int rv = statement->step();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ } else {
+ DCHECK(rv == SQLITE_DONE);
+ }
+}
+
+void SafeBrowsingDatabase::GetListsInfo(std::vector<SBListChunkRanges>* lists) {
+ lists->clear();
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT name,id FROM list_names");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ while (true) {
+ int rv = statement->step();
+ if (rv != SQLITE_ROW) {
+ if (rv == SQLITE_CORRUPT)
+ HandleCorruptDatabase();
+
+ break;
+ }
+ int list_id = statement->column_int(1);
+ lists->push_back(SBListChunkRanges(statement->column_string(0)));
+ GetChunkIds(list_id, ADD_CHUNK, &lists->back().adds);
+ GetChunkIds(list_id, SUB_CHUNK, &lists->back().subs);
+ }
+}
+
+void SafeBrowsingDatabase::GetChunkIds(int list_id,
+ ChunkType type,
+ std::string* list) {
+ list->clear();
+ STATS_COUNTER(L"SB.ChunkSelect", 1);
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT chunk_id FROM chunks WHERE list_id=? AND chunk_type=? "
+ "ORDER BY chunk_id");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ statement->bind_int(0, list_id);
+ statement->bind_int(1, type);
+
+ std::vector<int> chunk_ids;
+ while (true) {
+ int rv = statement->step();
+ if (rv != SQLITE_ROW) {
+ if (rv == SQLITE_CORRUPT)
+ HandleCorruptDatabase();
+
+ break;
+ }
+ chunk_ids.push_back(statement->column_int(0));
+ }
+
+ std::vector<ChunkRange> ranges;
+ ChunksToRanges(chunk_ids, &ranges);
+ RangesToString(ranges, list);
+}
+
+bool SafeBrowsingDatabase::ChunkExists(int list_id,
+ ChunkType type,
+ int chunk_id) {
+ STATS_COUNTER(L"SB.ChunkSelect", 1);
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT chunk_id FROM chunks WHERE"
+ " list_id=? AND chunk_type=? AND chunk_id=?");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ statement->bind_int(0, list_id);
+ statement->bind_int(1, type);
+ statement->bind_int(2, chunk_id);
+
+ int rv = statement->step();
+ if (rv == SQLITE_CORRUPT)
+ HandleCorruptDatabase();
+
+ return rv == SQLITE_ROW;
+}
+
+void SafeBrowsingDatabase::RemoveChunkId(int list_id,
+ ChunkType type,
+ int chunk_id) {
+ // Also remove the add chunk id from add_chunks
+ STATS_COUNTER(L"SB.ChunkDelete", 1);
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "DELETE FROM chunks WHERE list_id=? AND chunk_type=? AND chunk_id=?");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ statement->bind_int(0, list_id);
+ statement->bind_int(1, type);
+ statement->bind_int(2, chunk_id);
+ int rv = statement->step();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ } else {
+ DCHECK(rv == SQLITE_DONE);
+ }
+}
+
+int SafeBrowsingDatabase::AddList(const std::string& name) {
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "INSERT INTO list_names"
+ "(id,name)"
+ "VALUES (NULL,?)");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return 0;
+ }
+
+ statement->bind_string(0, name);
+ int rv = statement->step();
+ if (rv != SQLITE_DONE) {
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ } else {
+ NOTREACHED();
+ }
+
+ return 0;
+ }
+
+ return static_cast<int>(sqlite3_last_insert_rowid(db_));
+}
+
+int SafeBrowsingDatabase::GetListID(const std::string& name) {
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT id FROM list_names WHERE name=?");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return 0;
+ }
+
+ statement->bind_string(0, name);
+ int result = statement->step();
+ if (result == SQLITE_ROW)
+ return statement->column_int(0);
+
+ if (result == SQLITE_CORRUPT)
+ HandleCorruptDatabase();
+
+ // There isn't an existing entry so add one.
+ return AddList(name);
+}
+
+std::string SafeBrowsingDatabase::GetListName(int id) {
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT name FROM list_names WHERE id=?");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return 0;
+ }
+
+ statement->bind_int(0, id);
+ int result = statement->step();
+ if (result != SQLITE_ROW) {
+ if (result == SQLITE_CORRUPT)
+ HandleCorruptDatabase();
+
+ return std::string();
+ }
+
+ return statement->column_string(0);
+}
+
+std::wstring SafeBrowsingDatabase::BloomFilterFilename(
+ const std::wstring& db_filename) {
+ return db_filename + kBloomFilterFile;
+}
+
+void SafeBrowsingDatabase::LoadBloomFilter() {
+ DCHECK(!bloom_filter_filename_.empty());
+
+ int64 size_64;
+ if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) ||
+ size_64 == 0) {
+ BuildBloomFilter();
+ return;
+ }
+
+ int size = static_cast<int>(size_64);
+ char* data = new char[size];
+ CHECK(data);
+
+ Time before = Time::Now();
+ file_util::ReadFile(bloom_filter_filename_, data, size);
+ SB_DLOG(INFO) << "SafeBrowsingDatabase read bloom filter in " <<
+ (Time::Now() - before).InMilliseconds() << " ms";
+
+ bloom_filter_.reset(new BloomFilter(data, size));
+}
+
+void SafeBrowsingDatabase::DeleteBloomFilter() {
+ file_util::Delete(bloom_filter_filename_, false);
+}
+
+void SafeBrowsingDatabase::WriteBloomFilter() {
+ if (!bloom_filter_.get())
+ return;
+
+ Time before = Time::Now();
+ file_util::WriteFile(bloom_filter_filename_,
+ bloom_filter_->data(),
+ bloom_filter_->size());
+ SB_DLOG(INFO) << "SafeBrowsingDatabase wrote bloom filter in " <<
+ (Time::Now() - before).InMilliseconds() << " ms";
+}
+
+void SafeBrowsingDatabase::AddHostToBloomFilter(int host_key) {
+ if (bloom_filter_building_)
+ bloom_filter_temp_hostkeys_.push_back(host_key);
+ // Even if we're rebuilding the bloom filter, we still need to update the
+ // current one since we also use it to decide whether to do certain database
+ // operations during update.
+ if (bloom_filter_.get())
+ bloom_filter_->Insert(host_key);
+}
+
+void SafeBrowsingDatabase::BuildBloomFilter() {
+ // A bloom filter needs the size at creation, however doing a select count(*)
+ // is too slow since sqlite would have to enumerate each entry to get the
+ // count. So instead we load all the hostkeys into memory, and then when
+ // we've read all of them and have the total count, we can create the bloom
+ // filter.
+ bloom_filter_temp_hostkeys_.reserve(kBloomFilterMinSize);
+
+ bloom_filter_building_ = true;
+ bloom_filter_rebuild_time_ = Time::Now();
+
+ BeginTransaction();
+
+ OnReadHostKeys(0);
+}
+
+void SafeBrowsingDatabase::OnReadHostKeys(int start_id) {
+ // Since reading all the keys in one go could take > 20 seconds, instead we
+ // read them in small chunks.
+ STATS_COUNTER(L"SB.HostSelectForBloomFilter", 1);
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT host,id FROM hosts WHERE id > ? ORDER BY id");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ statement->bind_int(0, start_id);
+ Time before = Time::Now();
+ int count = 0;
+
+ int next_id = start_id + 1;
+ while (true) {
+ int rv = statement->step();
+ if (rv != SQLITE_ROW) {
+ if (rv == SQLITE_CORRUPT)
+ HandleCorruptDatabase();
+
+ break;
+ }
+
+ count++;
+ bloom_filter_temp_hostkeys_.push_back(statement->column_int(0));
+ next_id = statement->column_int(1) + 1;
+ if ((Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) {
+ if (asynchronous_) {
+ break;
+ } else {
+ Sleep(kMaxThreadHoldupMs);
+ }
+ }
+ }
+
+ TimeDelta chunk_time = Time::Now() - before;
+ int time_ms = static_cast<int>(chunk_time.InMilliseconds());
+ SB_DLOG(INFO) << "SafeBrowsingDatabase read " << count << " hostkeys in " <<
+ time_ms << " ms";
+
+ if (!count || !asynchronous_) {
+ OnDoneReadingHostKeys();
+ return;
+ }
+
+ // To avoid hammering the disk and disrupting other parts of Chrome that use
+ // the disk, we throttle the rebuilding.
+ MessageLoop::current()->PostDelayedTask(FROM_HERE,
+ bloom_read_factory_.NewRunnableMethod(
+ &SafeBrowsingDatabase::OnReadHostKeys, next_id),
+ disk_delay_);
+}
+
+void SafeBrowsingDatabase::OnDoneReadingHostKeys() {
+ EndTransaction();
+ Time before = Time::Now();
+ int number_of_keys = std::max(kBloomFilterMinSize,
+ static_cast<int>(bloom_filter_temp_hostkeys_.size()));
+ int filter_size = number_of_keys * kBloomFilterSizeRatio;
+ BloomFilter* filter = new BloomFilter(filter_size);
+ for (size_t i = 0; i < bloom_filter_temp_hostkeys_.size(); ++i)
+ filter->Insert(bloom_filter_temp_hostkeys_[i]);
+
+ bloom_filter_.reset(filter);
+
+ TimeDelta bloom_gen = Time::Now() - before;
+ TimeDelta delta = Time::Now() - bloom_filter_rebuild_time_;
+ SB_DLOG(INFO) << "SafeBrowsingDatabase built bloom filter in " <<
+ delta.InMilliseconds() << " ms total (" << bloom_gen.InMilliseconds()
+ << " ms to generate bloom filter). hostkey count: " <<
+ bloom_filter_temp_hostkeys_.size();
+
+ WriteBloomFilter();
+ bloom_filter_building_ = false;
+ bloom_filter_temp_hostkeys_.clear();
+ bloom_filter_read_count_ = 0;
+ bloom_filter_fp_count_ = 0;
+}
+
+bool SafeBrowsingDatabase::NeedToCheckUrl(const GURL& url) {
+ if (!bloom_filter_.get())
+ return true;
+
+ bloom_filter_read_count_++;
+
+ std::vector<std::string> hosts;
+ safe_browsing_util::GenerateHostsToCheck(url, &hosts);
+ if (hosts.size() == 0)
+ return false; // Could be about:blank.
+
+ SBPrefix host_key;
+ if (url.HostIsIPAddress()) {
+ base::SHA256HashString(url.host() + "/", &host_key, sizeof(SBPrefix));
+ if (bloom_filter_->Exists(host_key))
+ return true;
+ } else {
+ base::SHA256HashString(hosts[0] + "/", &host_key, sizeof(SBPrefix));
+ if (bloom_filter_->Exists(host_key))
+ return true;
+
+ if (hosts.size() > 1) {
+ base::SHA256HashString(hosts[1] + "/", &host_key, sizeof(SBPrefix));
+ if (bloom_filter_->Exists(host_key))
+ return true;
+ }
+ }
+ return false;
+}
+
+void SafeBrowsingDatabase::BeginTransaction() {
+ transaction_count_++;
+ if (transaction_.get() == NULL) {
+ transaction_.reset(new SQLTransaction(db_));
+ if (transaction_->Begin() != SQLITE_OK) {
+ DCHECK(false) << "Safe browsing database couldn't start transaction";
+ transaction_.reset();
+ }
+ }
+}
+
+void SafeBrowsingDatabase::EndTransaction() {
+ if (--transaction_count_ == 0) {
+ if (transaction_.get() != NULL) {
+ STATS_COUNTER(L"SB.TransactionCommit", 1);
+ transaction_->Commit();
+ transaction_.reset();
+ }
+ }
+}
+
+void SafeBrowsingDatabase::GetCachedFullHashes(
+ const std::vector<SBPrefix>* prefix_hits,
+ std::vector<SBFullHashResult>* full_hits,
+ Time last_update) {
+ DCHECK(prefix_hits && full_hits);
+
+ Time max_age = Time::Now() - TimeDelta::FromMinutes(kMaxStalenessMinutes);
+
+ for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin();
+ it != prefix_hits->end(); ++it) {
+ HashCache::iterator hit = hash_cache_.find(*it);
+ if (hit != hash_cache_.end()) {
+ HashList& entries = hit->second;
+ HashList::iterator eit = entries.begin();
+ while (eit != entries.end()) {
+ // An entry is valid if we've received an update in the past 45 minutes,
+ // or if this particular GetHash was received in the past 45 minutes.
+ if (max_age < last_update || eit->received > max_age) {
+ SBFullHashResult full_hash;
+ memcpy(&full_hash.hash.full_hash,
+ &eit->full_hash.full_hash,
+ sizeof(SBFullHash));
+ full_hash.list_name = GetListName(eit->list_id);
+ full_hash.add_chunk_id = eit->add_chunk_id;
+ full_hits->push_back(full_hash);
+ ++eit;
+ } else {
+ // Evict the expired entry.
+ eit = entries.erase(eit);
+ }
+ }
+
+ if (entries.empty())
+ hash_cache_.erase(hit);
+ }
+ }
+}
+
+void SafeBrowsingDatabase::CacheHashResults(
+ const std::vector<SBFullHashResult>& full_hits) {
+ const Time now = Time::Now();
+ for (std::vector<SBFullHashResult>::const_iterator it = full_hits.begin();
+ it != full_hits.end(); ++it) {
+ SBPrefix prefix;
+ memcpy(&prefix, &it->hash.full_hash, sizeof(prefix));
+ HashList& entries = hash_cache_[prefix];
+ HashCacheEntry entry;
+ entry.received = now;
+ entry.list_id = GetListID(it->list_name);
+ entry.add_chunk_id = it->add_chunk_id;
+ memcpy(&entry.full_hash, &it->hash.full_hash, sizeof(SBFullHash));
+ entries.push_back(entry);
+ }
+}
+
+void SafeBrowsingDatabase::ClearCachedHashes(const SBEntry* entry) {
+ for (int i = 0; i < entry->prefix_count(); ++i) {
+ SBPrefix prefix;
+ if (entry->type() == SBEntry::SUB_FULL_HASH)
+ memcpy(&prefix, &entry->FullHashAt(i), sizeof(SBPrefix));
+ else
+ prefix = entry->PrefixAt(i);
+
+ HashCache::iterator it = hash_cache_.find(prefix);
+ if (it != hash_cache_.end())
+ hash_cache_.erase(it);
+ }
+}
+
+// This clearing algorithm is a little inefficient, but we don't expect there to
+// be too many entries for this to matter. Also, this runs as a background task
+// during an update, so no user action is blocking on it.
+void SafeBrowsingDatabase::ClearCachedHashesForChunk(int list_id,
+ int add_chunk_id) {
+ HashCache::iterator it = hash_cache_.begin();
+ while (it != hash_cache_.end()) {
+ HashList& entries = it->second;
+ HashList::iterator eit = entries.begin();
+ while (eit != entries.end()) {
+ if (eit->list_id == list_id && eit->add_chunk_id == add_chunk_id)
+ eit = entries.erase(eit);
+ else
+ ++eit;
+ }
+ if (entries.empty())
+ it = hash_cache_.erase(it);
+ else
+ ++it;
+ }
+}
+
+void SafeBrowsingDatabase::HandleCorruptDatabase() {
+ MessageLoop::current()->PostTask(FROM_HERE,
+ reset_factory_.NewRunnableMethod(
+ &SafeBrowsingDatabase::OnHandleCorruptDatabase));
+}
+
+void SafeBrowsingDatabase::OnHandleCorruptDatabase() {
+ ResetDatabase();
+ DCHECK(false) << "SafeBrowsing database was corrupt and reset";
+}
+
+void SafeBrowsingDatabase::HandleResume() {
+ disk_delay_ = kOnResumeHoldupMs;
+ MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ resume_factory_.NewRunnableMethod(&SafeBrowsingDatabase::OnResumeDone),
+ kOnResumeHoldupMs);
+}
+
+void SafeBrowsingDatabase::OnResumeDone() {
+ disk_delay_ = kMaxThreadHoldupMs;
+} \ No newline at end of file
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.h b/chrome/browser/safe_browsing/safe_browsing_database.h
new file mode 100644
index 0000000..0dbe5a9
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_database.h
@@ -0,0 +1,322 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H__
+#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H__
+
+#include <hash_map>
+#include <list>
+#include <queue>
+#include <vector>
+
+#include "base/scoped_ptr.h"
+#include "base/task.h"
+#include "base/time.h"
+#include "chrome/browser/safe_browsing/safe_browsing_util.h"
+#include "chrome/common/sqlite_compiled_statement.h"
+#include "chrome/common/sqlite_utils.h"
+#include "googleurl/src/gurl.h"
+
+class BloomFilter;
+
+// Encapsulates the database that stores information about phishing and malware
+// sites. There is one on-disk database for all profiles, as it doesn't
+// contain user-specific data. This object is not thread-safe, i.e. all its
+// methods should be used on the same thread that it was created on, with the
+// exception of NeedToCheckUrl.
+class SafeBrowsingDatabase {
+ public:
+ SafeBrowsingDatabase();
+ ~SafeBrowsingDatabase();
+
+ // Initializes the database with the given filename. The callback is
+ // executed after finishing a chunk.
+ bool Init(const std::wstring& filename,
+ Callback0::Type* chunk_inserted_callback);
+
+ // Deletes the current database and creates a new one.
+ bool ResetDatabase();
+
+ // This function can be called on any thread to check if the given url may be
+ // in the database. If this function returns false, it is definitely not in
+ // the database and ContainsUrl doesn't need to be called. If it returns
+ // true, then the url might be in the database and ContainsUrl needs to be
+ // called. This function can only be called after Init succeeded.
+ bool NeedToCheckUrl(const GURL& url);
+
+ // Returns false if the given url is not in the database. If it returns
+ // true, then either "list" is the name of the matching list, or prefix_hits
+ // contains the matching hash prefixes.
+ bool ContainsUrl(const GURL& url,
+ std::string* matching_list,
+ std::vector<SBPrefix>* prefix_hits,
+ std::vector<SBFullHashResult>* full_hits,
+ Time last_update);
+
+ // Processes add/sub commands. Database will free the chunks when it's done.
+ void InsertChunks(const std::string& list_name, std::deque<SBChunk>* chunks);
+
+ // Processs adddel/subdel commands. Database will free chunk_deletes when
+ // it's done.
+ void DeleteChunks(std::vector<SBChunkDelete>* chunk_deletes);
+
+ // Returns the lists and their add/sub chunks.
+ void GetListsInfo(std::vector<SBListChunkRanges>* lists);
+
+ // Call this to make all database operations synchronous. While useful for
+ // testing, this should never be called in chrome.exe because it can lead
+ // to blocking user requests.
+ void set_synchronous() { asynchronous_ = false; }
+
+ // Store the results of a GetHash response.
+ void CacheHashResults(const std::vector<SBFullHashResult>& full_hits);
+
+ // Called when the user's machine has resumed from a lower power state.
+ void HandleResume();
+
+ private:
+ friend class SafeBrowsing_HashCaching_Test;
+
+ // Opens the database.
+ bool Open();
+
+ // Closes the database.
+ bool Close();
+
+ // Creates the SQL tables.
+ bool CreateTables();
+
+ // Checks the database version and if it's incompatible with the current one,
+ // resets the database.
+ bool CheckCompatibleVersion();
+
+ // Updates, or adds if new, a hostkey's record with the given add/sub entry.
+ // If this is a sub, removes the given prefixes, or all if prefixes is empty,
+ // from host_key's record. If persist is true, then if the add_chunk_id isn't
+ // found the entry will store this sub information for future reference.
+ // Otherwise the entry will not be modified if there are no matches.
+ void UpdateInfo(SBPrefix host, SBEntry* entry, bool persist);
+
+ // Returns true if any of the given prefixes exist for the given host.
+ // Also returns the matching list or any prefix matches.
+ void CheckUrl(const std::string& host,
+ SBPrefix host_key,
+ const std::vector<std::string>& paths,
+ std::string* matching_list,
+ std::vector<SBPrefix>* prefix_hits);
+
+ enum ChunkType {
+ ADD_CHUNK = 0,
+ SUB_CHUNK = 1,
+ };
+
+ // Adds information about the given chunk to the chunks table.
+ void AddChunkInformation(int list_id,
+ ChunkType type,
+ int chunk_id,
+ const std::string& hostkeys); // only used for add
+
+ // Return a comma separated list of chunk ids that are in the database for
+ // the given list and chunk type.
+ void GetChunkIds(int list_id, ChunkType type, std::string* list);
+
+ // Checks if a chunk is in the database.
+ bool ChunkExists(int list_id, ChunkType type, int chunk_id);
+
+ // Removes the given id from our list of chunk ids.
+ void RemoveChunkId(int list_id, ChunkType type, int chunk_id);
+
+ // Reads the host's information from the database. Returns true if it was
+ // found, or false otherwise.
+ bool ReadInfo(int host_key, SBHostInfo* info, int* id);
+
+ // Writes the host's information to the database, overwriting any existing
+ // information for that host_key if it existed.
+ void WriteInfo(int host_key, const SBHostInfo& info, int id);
+
+ // Deletes existing information for the given hostkey.
+ void DeleteInfo(int host_key);
+
+ // Adds the given list to the database. Returns its row id.
+ int AddList(const std::string& name);
+
+ // Given a list name, returns its internal id. If we haven't seen it before,
+ // an id is created and stored in the database. On error, returns 0.
+ int GetListID(const std::string& name);
+
+ // Given a list id, returns its name.
+ std::string GetListName(int id);
+
+ static std::wstring BloomFilterFilename(const std::wstring& db_filename);
+
+ // Load the bloom filter off disk. Generates one if it can't find it.
+ void LoadBloomFilter();
+
+ // Deletes the on-disk bloom filter, i.e. because it's stale.
+ void DeleteBloomFilter();
+
+ // Writes the current bloom filter to disk.
+ void WriteBloomFilter();
+
+ // Adds the host to the bloom filter.
+ void AddHostToBloomFilter(int host_key);
+
+ // Generate a bloom filter.
+ void BuildBloomFilter();
+
+ // Used when generating the bloom filter. Reads a small number of hostkeys
+ // starting at the given row id.
+ void OnReadHostKeys(int start_id);
+
+ // Called when we finished reading all the hostkeys from the database during
+ // bloom filter generation.
+ void OnDoneReadingHostKeys();
+
+ void StartThrottledWork();
+ void RunThrottledWork();
+
+ // Used when processing an add-del, add chunk and sub chunk commands in small
+ // batches so that the db thread is never blocked. They return true if
+ // complete, or false if there's still more work to do.
+ bool ProcessChunks();
+ bool ProcessAddDel();
+
+ bool ProcessAddChunks(std::deque<SBChunk>* chunks);
+ bool ProcessSubChunks(std::deque<SBChunk>* chunks);
+
+ void BeginTransaction();
+ void EndTransaction();
+
+ // Processes an add-del command, which deletes all the prefixes that came
+ // from that add chunk id.
+ void AddDel(const std::string& list_name, int add_chunk_id);
+
+ // Processes a sub-del command, which just removes the sub chunk id from
+ // our list.
+ void SubDel(const std::string& list_name, int sub_chunk_id);
+
+ // Looks up any cached full hashes we may have.
+ void GetCachedFullHashes(const std::vector<SBPrefix>* prefix_hits,
+ std::vector<SBFullHashResult>* full_hits,
+ Time last_update);
+
+ // Remove cached entries that have prefixes contained in the entry.
+ void ClearCachedHashes(const SBEntry* entry);
+
+ // Remove all GetHash entries that match the list and chunk id from an AddDel.
+ void ClearCachedHashesForChunk(int list_id, int add_chunk_id);
+
+ void HandleCorruptDatabase();
+ void OnHandleCorruptDatabase();
+
+ // Runs a small amount of time after the machine has resumed operation from
+ // a low power state.
+ void OnResumeDone();
+
+ // The database connection.
+ sqlite3* db_;
+
+ // Cache of compiled statements for our database.
+ scoped_ptr<SqliteStatementCache> statement_cache_;
+
+ int transaction_count_;
+ scoped_ptr<SQLTransaction> transaction_;
+
+ // True iff the database has been opened successfully.
+ bool init_;
+
+ std::wstring filename_;
+
+ // Used by the bloom filter.
+ std::wstring bloom_filter_filename_;
+ scoped_ptr<BloomFilter> bloom_filter_;
+ int bloom_filter_read_count_;
+ int bloom_filter_fp_count_;
+
+ // These are temp variables used when rebuilding the bloom filter.
+ bool bloom_filter_building_;
+ std::vector<int> bloom_filter_temp_hostkeys_;
+ int bloom_filter_last_hostkey_;
+ Time bloom_filter_rebuild_time_;
+
+ // Used to store throttled work for commands that write to the database.
+ std::queue<std::deque<SBChunk>*> pending_chunks_;
+
+ // Used during processing of an add chunk.
+ std::string add_chunk_modified_hosts_;
+
+ struct AddDelWork {
+ int list_id;
+ int add_chunk_id;
+ std::vector<std::string> hostkeys;
+ };
+
+ std::queue<AddDelWork> pending_add_del_;
+
+ // Controls whether database writes are done synchronously in one go or
+ // asynchronously in small chunks.
+ bool asynchronous_;
+
+ // Called after an add/sub chunk is processed.
+ Callback0::Type* chunk_inserted_callback_;
+
+ // Used to schedule small bits of work when writing to the database.
+ ScopedRunnableMethodFactory<SafeBrowsingDatabase> process_factory_;
+
+ // Used to schedule reading the database to rebuild the bloom filter.
+ ScopedRunnableMethodFactory<SafeBrowsingDatabase> bloom_read_factory_;
+
+ // Used to schedule writing the bloom filter after an update.
+ ScopedRunnableMethodFactory<SafeBrowsingDatabase> bloom_write_factory_;
+
+ // Used to schedule resetting the database because of corruption.
+ ScopedRunnableMethodFactory<SafeBrowsingDatabase> reset_factory_;
+
+ // Used to schedule resuming from a lower power state.
+ ScopedRunnableMethodFactory<SafeBrowsingDatabase> resume_factory_;
+
+ // Used for caching GetHash results.
+ typedef struct HashCacheEntry {
+ SBFullHash full_hash;
+ int list_id;
+ int add_chunk_id;
+ Time received;
+ } HashCacheEntry;
+
+ typedef std::list<HashCacheEntry> HashList;
+ typedef stdext::hash_map<SBPrefix, HashList> HashCache;
+ HashCache hash_cache_;
+
+ // The amount of time, in milliseconds, to wait before the next disk write.
+ int disk_delay_;
+
+ DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingDatabase);
+};
+
+#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H__ \ No newline at end of file
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc
new file mode 100644
index 0000000..bab0225
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc
@@ -0,0 +1,652 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Unit tests for the SafeBrowsing storage system (SafeBrowsingDatabase).
+
+#include "base/file_util.h"
+#include "base/logging.h"
+#include "base/path_service.h"
+#include "base/process_util.h"
+#include "base/sha2.h"
+#include "base/stats_counters.h"
+#include "base/string_util.h"
+#include "base/time.h"
+#include "chrome/browser/safe_browsing/protocol_parser.h"
+#include "chrome/browser/safe_browsing/safe_browsing_database.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+ SBPrefix Sha256Prefix(const std::string& str) {
+ SBPrefix hash;
+ base::SHA256HashString(str, &hash, sizeof(hash));
+ return hash;
+ }
+}
+
+// Helper function to do an AddDel or SubDel command.
+void DelChunk(SafeBrowsingDatabase* db,
+ const std::string& list,
+ int chunk_id,
+ bool is_sub_del) {
+ std::vector<SBChunkDelete>* deletes = new std::vector<SBChunkDelete>;
+ SBChunkDelete chunk_delete;
+ chunk_delete.list_name = list;
+ chunk_delete.is_sub_del = is_sub_del;
+ chunk_delete.chunk_del.push_back(ChunkRange(chunk_id));
+ deletes->push_back(chunk_delete);
+ db->DeleteChunks(deletes);
+}
+
+void AddDelChunk(SafeBrowsingDatabase* db, const std::string& list, int chunk_id) {
+ DelChunk(db, list, chunk_id, false);
+}
+
+void SubDelChunk(SafeBrowsingDatabase* db, const std::string& list, int chunk_id) {
+ DelChunk(db, list, chunk_id, true);
+}
+
+// Checks database reading/writing.
+TEST(SafeBrowsing, Database) {
+ std::wstring filename;
+ PathService::Get(base::DIR_TEMP, &filename);
+ filename.push_back(file_util::kPathSeparator);
+ filename.append(L"SafeBrowsingTestDatabase");
+ DeleteFile(filename.c_str()); // In case it existed from a previous run.
+
+ SafeBrowsingDatabase database;
+ database.set_synchronous();
+ EXPECT_TRUE(database.Init(filename, NULL));
+
+ // Add a simple chunk with one hostkey.
+ SBChunkHost host;
+ host.host = Sha256Prefix("www.evil.com/");
+ host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2);
+ host.entry->set_chunk_id(1);
+ host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html"));
+ host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/malware.html"));
+
+ SBChunk chunk;
+ chunk.chunk_number = 1;
+ chunk.hosts.push_back(host);
+
+ std::deque<SBChunk>* chunks = new std::deque<SBChunk>;
+ chunks->push_back(chunk);
+ database.InsertChunks("goog-malware", chunks);
+
+ // Add another chunk with two different hostkeys.
+ host.host = Sha256Prefix("www.evil.com/");
+ host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2);
+ host.entry->set_chunk_id(1);
+ host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/notevil1.html"));
+ host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/notevil2.html"));
+
+ chunk.chunk_number = 2;
+ chunk.hosts.clear();
+ chunk.hosts.push_back(host);
+
+ host.host = Sha256Prefix("www.good.com/");
+ host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2);
+ host.entry->SetPrefixAt(0, Sha256Prefix("www.good.com/good1.html"));
+ host.entry->SetPrefixAt(1, Sha256Prefix("www.good.com/good2.html"));
+
+ chunk.hosts.push_back(host);
+
+ chunks = new std::deque<SBChunk>;
+ chunks->push_back(chunk);
+
+ database.InsertChunks("goog-malware", chunks);
+
+ // and a chunk with an IP-based host
+ host.host = Sha256Prefix("192.168.0.1/");
+ host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1);
+ host.entry->SetPrefixAt(0, Sha256Prefix("192.168.0.1/malware.html"));
+
+ chunk.chunk_number = 3;
+ chunk.hosts.clear();
+ chunk.hosts.push_back(host);
+
+ chunks = new std::deque<SBChunk>;
+ chunks->push_back(chunk);
+ database.InsertChunks("goog-malware", chunks);
+
+
+ // Make sure they were added correctly.
+ std::vector<SBListChunkRanges> lists;
+ database.GetListsInfo(&lists);
+ EXPECT_EQ(lists.size(), 1);
+ EXPECT_EQ(lists[0].name, "goog-malware");
+ EXPECT_EQ(lists[0].adds, "1-3");
+ EXPECT_TRUE(lists[0].subs.empty());
+
+ const Time now = Time::Now();
+ std::vector<SBFullHashResult> full_hashes;
+ std::vector<SBPrefix> prefix_hits;
+ std::string matching_list;
+ EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/phishing.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+ EXPECT_EQ(prefix_hits[0], Sha256Prefix("www.evil.com/phishing.html"));
+ EXPECT_EQ(prefix_hits.size(), 1);
+
+ EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/malware.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/notevil1.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/notevil2.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ EXPECT_TRUE(database.ContainsUrl(GURL("http://www.good.com/good1.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ EXPECT_TRUE(database.ContainsUrl(GURL("http://www.good.com/good2.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ EXPECT_TRUE(database.ContainsUrl(GURL("http://192.168.0.1/malware.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ EXPECT_FALSE(database.ContainsUrl(GURL("http://www.evil.com/"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+ EXPECT_EQ(prefix_hits.size(), 0);
+
+ EXPECT_FALSE(database.ContainsUrl(GURL("http://www.evil.com/robots.txt"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ // Test removing a single prefix from the add chunk.
+ host.host = Sha256Prefix("www.evil.com/");
+ host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 2);
+ host.entry->set_chunk_id(2);
+ host.entry->SetChunkIdAtPrefix(0, 2);
+ host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/notevil1.html"));
+
+ chunk.chunk_number = 4;
+ chunk.hosts.clear();
+ chunk.hosts.push_back(host);
+
+ chunks = new std::deque<SBChunk>;
+ chunks->push_back(chunk);
+
+ database.InsertChunks("goog-malware", chunks);
+
+ EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/phishing.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+ EXPECT_EQ(prefix_hits[0], Sha256Prefix("www.evil.com/phishing.html"));
+ EXPECT_EQ(prefix_hits.size(), 1);
+
+ EXPECT_FALSE(database.ContainsUrl(GURL("http://www.evil.com/notevil1.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+ EXPECT_EQ(prefix_hits.size(), 0);
+
+ EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/notevil2.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ EXPECT_TRUE(database.ContainsUrl(GURL("http://www.good.com/good1.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ EXPECT_TRUE(database.ContainsUrl(GURL("http://www.good.com/good2.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ database.GetListsInfo(&lists);
+ EXPECT_EQ(lists.size(), 1);
+ EXPECT_EQ(lists[0].name, "goog-malware");
+ EXPECT_EQ(lists[0].subs, "4");
+
+ // Test removing all the prefixes from an add chunk.
+ AddDelChunk(&database, "goog-malware", 2);
+ EXPECT_FALSE(database.ContainsUrl(GURL("http://www.evil.com/notevil2.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ EXPECT_FALSE(database.ContainsUrl(GURL("http://www.good.com/good1.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ EXPECT_FALSE(database.ContainsUrl(GURL("http://www.good.com/good2.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ database.GetListsInfo(&lists);
+ EXPECT_EQ(lists.size(), 1);
+ EXPECT_EQ(lists[0].name, "goog-malware");
+ EXPECT_EQ(lists[0].subs, "4");
+
+ // The adddel command exposed a bug in the transaction code where any
+ // transaction after it would fail. Add a dummy entry and remove it to
+ // make sure the transcation work fine.
+ host.host = Sha256Prefix("www.redherring.com/");
+ host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1);
+ host.entry->set_chunk_id(1);
+ host.entry->SetPrefixAt(0, Sha256Prefix("www.redherring.com/index.html"));
+
+ chunk.chunk_number = 44;
+ chunk.hosts.clear();
+ chunk.hosts.push_back(host);
+
+ chunks = new std::deque<SBChunk>;
+ chunks->push_back(chunk);
+ database.InsertChunks("goog-malware", chunks);
+
+ // Now remove the dummy entry. If there are any problems with the
+ // transactions, asserts will fire.
+ AddDelChunk(&database, "goog-malware", 44);
+
+ // Test the subdel command.
+ SubDelChunk(&database, "goog-malware", 4);
+ database.GetListsInfo(&lists);
+ EXPECT_EQ(lists.size(), 1);
+ EXPECT_EQ(lists[0].name, "goog-malware");
+ EXPECT_EQ(lists[0].subs, "");
+
+ // Test a sub command coming in before the add.
+ host.host = Sha256Prefix("www.notevilanymore.com/");
+ host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0);
+ host.entry->set_chunk_id(10);
+
+ chunk.chunk_number = 5;
+ chunk.hosts.clear();
+ chunk.hosts.push_back(host);
+
+ chunks = new std::deque<SBChunk>;
+ chunks->push_back(chunk);
+ database.InsertChunks("goog-malware", chunks);
+
+ EXPECT_FALSE(database.ContainsUrl(GURL("http://www.notevilanymore.com/index.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ // Now insert the tardy add chunk.
+ host.host = Sha256Prefix("www.notevilanymore.com/");
+ host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2);
+ host.entry->SetPrefixAt(0, Sha256Prefix("www.notevilanymore.com/index.html"));
+ host.entry->SetPrefixAt(1, Sha256Prefix("www.notevilanymore.com/good.html"));
+
+ chunk.chunk_number = 10;
+ chunk.hosts.clear();
+ chunk.hosts.push_back(host);
+
+ chunks = new std::deque<SBChunk>;
+ chunks->push_back(chunk);
+ database.InsertChunks("goog-malware", chunks);
+
+ EXPECT_FALSE(database.ContainsUrl(GURL("http://www.notevilanymore.com/index.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ EXPECT_FALSE(database.ContainsUrl(GURL("http://www.notevilanymore.com/good.html"),
+ &matching_list, &prefix_hits,
+ &full_hashes, now));
+
+ DeleteFile(filename.c_str()); // Clean up.
+}
+
+// Utility function for setting up the database for the caching test.
+void PopulateDatabaseForCacheTest(SafeBrowsingDatabase* database) {
+ // Add a simple chunk with one hostkey and cache it.
+ SBChunkHost host;
+ host.host = Sha256Prefix("www.evil.com/");
+ host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2);
+ host.entry->set_chunk_id(1);
+ host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html"));
+ host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/malware.html"));
+
+ SBChunk chunk;
+ chunk.chunk_number = 1;
+ chunk.hosts.push_back(host);
+
+ std::deque<SBChunk>* chunks = new std::deque<SBChunk>;
+ chunks->push_back(chunk);
+ database->InsertChunks("goog-malware-shavar", chunks);
+
+ // Add the GetHash results to the cache.
+ SBFullHashResult full_hash;
+ base::SHA256HashString("www.evil.com/phishing.html",
+ &full_hash.hash, sizeof(SBFullHash));
+ full_hash.list_name = "goog-malware-shavar";
+ full_hash.add_chunk_id = 1;
+
+ std::vector<SBFullHashResult> results;
+ results.push_back(full_hash);
+
+ base::SHA256HashString("www.evil.com/malware.html",
+ &full_hash.hash, sizeof(SBFullHash));
+ results.push_back(full_hash);
+
+ database->CacheHashResults(results);
+}
+
+TEST(SafeBrowsing, HashCaching) {
+ std::wstring filename;
+ PathService::Get(base::DIR_TEMP, &filename);
+ filename.push_back(file_util::kPathSeparator);
+ filename.append(L"SafeBrowsingTestDatabase");
+ DeleteFile(filename.c_str()); // In case it existed from a previous run.
+
+ SafeBrowsingDatabase database;
+ database.set_synchronous();
+ EXPECT_TRUE(database.Init(filename, NULL));
+
+ PopulateDatabaseForCacheTest(&database);
+
+ // We should have both full hashes in the cache.
+ EXPECT_EQ(database.hash_cache_.size(), 2);
+
+ // Test the cache lookup for the first prefix.
+ std::string list;
+ std::vector<SBPrefix> prefixes;
+ std::vector<SBFullHashResult> full_hashes;
+ database.ContainsUrl(GURL("http://www.evil.com/phishing.html"),
+ &list, &prefixes, &full_hashes, Time::Now());
+ EXPECT_EQ(full_hashes.size(), 1);
+
+ SBFullHashResult full_hash;
+ base::SHA256HashString("www.evil.com/phishing.html",
+ &full_hash.hash, sizeof(SBFullHash));
+ EXPECT_EQ(memcmp(&full_hashes[0].hash,
+ &full_hash.hash, sizeof(SBFullHash)), 0);
+
+ prefixes.clear();
+ full_hashes.clear();
+
+ // Test the cache lookup for the second prefix.
+ database.ContainsUrl(GURL("http://www.evil.com/malware.html"),
+ &list, &prefixes, &full_hashes, Time::Now());
+ EXPECT_EQ(full_hashes.size(), 1);
+ base::SHA256HashString("www.evil.com/malware.html",
+ &full_hash.hash, sizeof(SBFullHash));
+ EXPECT_EQ(memcmp(&full_hashes[0].hash,
+ &full_hash.hash, sizeof(SBFullHash)), 0);
+
+ prefixes.clear();
+ full_hashes.clear();
+
+ // Test removing a prefix via a sub chunk.
+ SBChunkHost host;
+ host.host = Sha256Prefix("www.evil.com/");
+ host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 2);
+ host.entry->set_chunk_id(1);
+ host.entry->SetChunkIdAtPrefix(0, 1);
+ host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html"));
+
+ SBChunk chunk;
+ chunk.chunk_number = 2;
+ chunk.hosts.clear();
+ chunk.hosts.push_back(host);
+ std::deque<SBChunk>* chunks = new std::deque<SBChunk>;
+ chunks->push_back(chunk);
+ database.InsertChunks("goog-malware-shavar", chunks);
+
+ // This prefix should still be there.
+ database.ContainsUrl(GURL("http://www.evil.com/malware.html"),
+ &list, &prefixes, &full_hashes, Time::Now());
+ EXPECT_EQ(full_hashes.size(), 1);
+ base::SHA256HashString("www.evil.com/malware.html",
+ &full_hash.hash, sizeof(SBFullHash));
+ EXPECT_EQ(memcmp(&full_hashes[0].hash,
+ &full_hash.hash, sizeof(SBFullHash)), 0);
+
+ prefixes.clear();
+ full_hashes.clear();
+
+ // This prefix should be gone.
+ database.ContainsUrl(GURL("http://www.evil.com/phishing.html"),
+ &list, &prefixes, &full_hashes, Time::Now());
+ EXPECT_EQ(full_hashes.size(), 0);
+
+ prefixes.clear();
+ full_hashes.clear();
+
+ // Test that an AddDel for the original chunk removes the last cached entry.
+ AddDelChunk(&database, "goog-malware-shavar", 1);
+ database.ContainsUrl(GURL("http://www.evil.com/malware.html"),
+ &list, &prefixes, &full_hashes, Time::Now());
+ EXPECT_EQ(full_hashes.size(), 0);
+ EXPECT_EQ(database.hash_cache_.size(), 0);
+
+ prefixes.clear();
+ full_hashes.clear();
+
+ // Test that the cache won't return expired values. First we have to adjust
+ // the cached entries' received time to make them older, since the database
+ // cache insert uses Time::Now(). First, store some entries.
+ PopulateDatabaseForCacheTest(&database);
+ EXPECT_EQ(database.hash_cache_.size(), 2);
+
+ // Now adjust one of the entries times to be in the past.
+ Time expired = Time::Now() - TimeDelta::FromMinutes(60);
+ SBPrefix key;
+ memcpy(&key, &full_hash.hash, sizeof(SBPrefix));
+ SafeBrowsingDatabase::HashList& entries = database.hash_cache_[key];
+ SafeBrowsingDatabase::HashCacheEntry entry = entries.front();
+ entries.pop_front();
+ entry.received = expired;
+ entries.push_back(entry);
+
+ database.ContainsUrl(GURL("http://www.evil.com/malware.html"),
+ &list, &prefixes, &full_hashes, expired);
+ EXPECT_EQ(full_hashes.size(), 0);
+
+ // Expired entry was dumped.
+ EXPECT_EQ(database.hash_cache_.size(), 1);
+
+ // This entry should still exist.
+ database.ContainsUrl(GURL("http://www.evil.com/phishing.html"),
+ &list, &prefixes, &full_hashes, expired);
+ EXPECT_EQ(full_hashes.size(), 1);
+}
+
+void PrintStat(const wchar_t* name) {
+ int value = StatsTable::current()->GetCounterValue(name);
+ std::wstring out = StringPrintf(L"%s %d\r\n", name, value);
+ OutputDebugStringW(out.c_str());
+}
+
+std::wstring GetFullSBDataPath(const std::wstring& path) {
+ std::wstring full_path;
+ CHECK(PathService::Get(base::DIR_SOURCE_ROOT, &full_path));
+ file_util::AppendToPath(&full_path, L"chrome\\test\\data\\safe_browsing");
+ file_util::AppendToPath(&full_path, path);
+ CHECK(file_util::PathExists(full_path));
+ return full_path;
+}
+
+struct ChunksInfo {
+ std::deque<SBChunk>* chunks;
+ std::string listname;
+};
+
+void PeformUpdate(const std::wstring& initial_db,
+ const std::vector<ChunksInfo>& chunks,
+ std::vector<SBChunkDelete>* deletes) {
+ IoCounters before, after;
+
+ std::wstring filename;
+ PathService::Get(base::DIR_TEMP, &filename);
+ filename.push_back(file_util::kPathSeparator);
+ filename.append(L"SafeBrowsingTestDatabase");
+ DeleteFile(filename.c_str()); // In case it existed from a previous run.
+
+ if (!initial_db.empty()) {
+ std::wstring full_initial_db = GetFullSBDataPath(initial_db);
+ ASSERT_TRUE(file_util::CopyFile(full_initial_db, filename));
+ }
+
+ SafeBrowsingDatabase database;
+ database.set_synchronous();
+ EXPECT_TRUE(database.Init(filename, NULL));
+
+ Time before_time = Time::Now();
+ ProcessHandle handle = Process::Current().handle();
+ scoped_ptr<process_util::ProcessMetrics> metric(
+ process_util::ProcessMetrics::CreateProcessMetrics(handle));
+ CHECK(metric->GetIOCounters(&before));
+
+ database.DeleteChunks(deletes);
+
+ for (size_t i = 0; i < chunks.size(); ++i)
+ database.InsertChunks(chunks[i].listname, chunks[i].chunks);
+
+ CHECK(metric->GetIOCounters(&after));
+
+ OutputDebugStringA(StringPrintf("I/O Read Bytes: %d\r\n",
+ after.ReadTransferCount - before.ReadTransferCount).c_str());
+ OutputDebugStringA(StringPrintf("I/O Write Bytes: %d\r\n",
+ after.WriteTransferCount - before.WriteTransferCount).c_str());
+ OutputDebugStringA(StringPrintf("I/O Reads: %d\r\n",
+ after.ReadOperationCount - before.ReadOperationCount).c_str());
+ OutputDebugStringA(StringPrintf("I/O Writes: %d\r\n",
+ after.WriteOperationCount - before.WriteOperationCount).c_str());
+ OutputDebugStringA(StringPrintf("Finished in %d ms\r\n",
+ (Time::Now() - before_time).InMilliseconds()).c_str());
+
+ PrintStat(L"c:SB.HostSelect");
+ PrintStat(L"c:SB.HostSelectForBloomFilter");
+ PrintStat(L"c:SB.HostReplace");
+ PrintStat(L"c:SB.HostInsert");
+ PrintStat(L"c:SB.HostDelete");
+ PrintStat(L"c:SB.ChunkSelect");
+ PrintStat(L"c:SB.ChunkInsert");
+ PrintStat(L"c:SB.ChunkDelete");
+ PrintStat(L"c:SB.TransactionCommit");
+}
+
+void UpdateDatabase(const std::wstring& initial_db,
+ const std::wstring& response_path,
+ const std::wstring& updates_path) {
+
+ // First we read the chunks from disk, so that this isn't counted in IO bytes.
+ std::vector<ChunksInfo> chunks;
+
+ SafeBrowsingProtocolParser parser;
+ if (!updates_path.empty()) {
+ std::wstring data_dir = GetFullSBDataPath(updates_path);
+ file_util::FileEnumerator file_enum(
+ data_dir, false, file_util::FileEnumerator::FILES);
+ while (true) {
+ std::wstring file = file_enum.Next();
+ if (file.empty())
+ break;
+
+ int64 size64;
+ bool result = file_util::GetFileSize(file, &size64);
+ CHECK(result);
+
+ int size = static_cast<int>(size64);
+ scoped_array<char> data(new char[size]);
+ file_util::ReadFile(file, data.get(), size);
+
+ ChunksInfo info;
+ info.chunks = new std::deque<SBChunk>;
+
+ bool re_key;
+ result = parser.ParseChunk(data.get(), size, "", "", &re_key, info.chunks);
+ CHECK(result);
+
+ info.listname = WideToASCII(file_util::GetFilenameFromPath(file));
+ size_t index = info.listname.find('_'); // Get rid fo the _s or _a.
+ info.listname.resize(index);
+ info.listname.erase(0, 3); // Get rid of the 000 etc.
+
+ chunks.push_back(info);
+ }
+ }
+
+ std::vector<SBChunkDelete>* deletes = new std::vector<SBChunkDelete>;
+ if (!response_path.empty()) {
+ std::string update;
+ std::wstring full_response_path = GetFullSBDataPath(response_path);
+ if (file_util::ReadFileToString(full_response_path, &update)) {
+ int next_update;
+ bool result, rekey, reset;
+ std::vector<ChunkUrl> urls;
+ result = parser.ParseUpdate(update.c_str(),
+ static_cast<int>(update.length()),
+ "",
+ &next_update,
+ &rekey,
+ &reset,
+ deletes,
+ &urls);
+ DCHECK(result);
+ if (!updates_path.empty())
+ DCHECK(urls.size() == chunks.size());
+ }
+ }
+
+ PeformUpdate(initial_db, chunks, deletes);
+}
+
+// Counts the IO needed for the initial update of a database.
+// test\data\safe_browsing\download_update.py was used to fetch the add/sub
+// chunks that are read, in order to get repeatable runs.
+TEST(SafeBrowsing, DISABLED_DatabaseInitialIO) {
+ UpdateDatabase(L"", L"", L"initial");
+}
+
+// Counts the IO needed to update a month old database.
+// The data files were generated by running "..\download_update.py postdata"
+// in the "safe_browsing\old" directory.
+TEST(SafeBrowsing, DISABLED_DatabaseOldIO) {
+ UpdateDatabase(L"old\\SafeBrowsing", L"old\\response", L"old\\updates");
+}
+
+// Like DatabaseOldIO but only the deletes.
+TEST(SafeBrowsing, DISABLED_DatabaseOldDeletesIO) {
+ UpdateDatabase(L"old\\SafeBrowsing", L"old\\response", L"");
+}
+
+// Like DatabaseOldIO but only the updates.
+TEST(SafeBrowsing, DISABLED_DatabaseOldUpdatesIO) {
+ UpdateDatabase(L"old\\SafeBrowsing", L"", L"old\\updates");
+}
+
+// Does a a lot of addel's on very large chunks.
+TEST(SafeBrowsing, DISABLED_DatabaseOldLotsofDeletesIO) {
+ std::vector<ChunksInfo> chunks;
+ std::vector<SBChunkDelete>* deletes = new std::vector<SBChunkDelete>;
+ SBChunkDelete del;
+ del.is_sub_del = false;
+ del.list_name = "goog-malware-shavar";
+ del.chunk_del.push_back(ChunkRange(3539, 3579));
+ deletes->push_back(del);
+ PeformUpdate(L"old\\SafeBrowsing", chunks, deletes);
+} \ No newline at end of file
diff --git a/chrome/browser/safe_browsing/safe_browsing_service.cc b/chrome/browser/safe_browsing/safe_browsing_service.cc
new file mode 100644
index 0000000..f7e47f7
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_service.cc
@@ -0,0 +1,567 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+#include "chrome/browser/safe_browsing/safe_browsing_service.h"
+
+#include "base/histogram.h"
+#include "base/logging.h"
+#include "base/message_loop.h"
+#include "base/path_service.h"
+#include "base/string_util.h"
+#include "chrome/browser/browser_process.h"
+#include "chrome/browser/profile_manager.h"
+#include "chrome/browser/safe_browsing/protocol_manager.h"
+#include "chrome/browser/safe_browsing/safe_browsing_blocking_page.h"
+#include "chrome/browser/safe_browsing/safe_browsing_database.h"
+#include "chrome/common/chrome_constants.h"
+#include "chrome/common/chrome_paths.h"
+#include "chrome/common/pref_names.h"
+#include "chrome/common/pref_service.h"
+#include "net/base/registry_controlled_domain.h"
+
+SafeBrowsingService::SafeBrowsingService()
+ : io_loop_(NULL),
+ database_(NULL),
+ protocol_manager_(NULL),
+ enabled_(false),
+ resetting_(false) {
+}
+
+SafeBrowsingService::~SafeBrowsingService() {
+}
+
+// Only called on the UI thread.
+void SafeBrowsingService::Initialize(MessageLoop* io_loop) {
+ io_loop_ = io_loop;
+
+ // Get the profile's preference for SafeBrowsing.
+ std::wstring user_data_dir;
+ PathService::Get(chrome::DIR_USER_DATA, &user_data_dir);
+ ProfileManager* profile_manager = g_browser_process->profile_manager();
+ Profile* profile = profile_manager->GetDefaultProfile(user_data_dir);
+ PrefService* pref_service = profile->GetPrefs();
+ if (pref_service->GetBoolean(prefs::kSafeBrowsingEnabled))
+ Start();
+}
+
+// Start up SafeBrowsing objects. This can be called at browser start, or when
+// the user checks the "Enable SafeBrowsing" option in the Advanced options UI.
+void SafeBrowsingService::Start() {
+ DCHECK(!db_thread_.get());
+ db_thread_.reset(new Thread("Chrome_SafeBrowsingThread"));
+ if (!db_thread_->Start())
+ return;
+
+ db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::OnDBInitialize));
+
+ // Retrieve client MAC keys.
+ PrefService* local_state = g_browser_process->local_state();
+ std::string client_key, wrapped_key;
+ if (local_state) {
+ client_key =
+ WideToASCII(local_state->GetString(prefs::kSafeBrowsingClientKey));
+ wrapped_key =
+ WideToASCII(local_state->GetString(prefs::kSafeBrowsingWrappedKey));
+ }
+
+ io_loop_->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::OnIOInitialize, MessageLoop::current(),
+ client_key, wrapped_key));
+}
+
+void SafeBrowsingService::ShutDown() {
+ io_loop_->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::OnIOShutdown));
+}
+
+void SafeBrowsingService::OnIOInitialize(MessageLoop* notify_loop,
+ const std::string& client_key,
+ const std::string& wrapped_key) {
+ DCHECK(MessageLoop::current() == io_loop_);
+ enabled_ = true;
+ protocol_manager_ = new SafeBrowsingProtocolManager(this,
+ notify_loop,
+ client_key,
+ wrapped_key);
+ protocol_manager_->Initialize();
+}
+
+void SafeBrowsingService::OnDBInitialize() {
+ DCHECK(MessageLoop::current() == db_thread_->message_loop());
+ GetDatabase();
+}
+
+void SafeBrowsingService::OnIOShutdown() {
+ DCHECK(MessageLoop::current() == io_loop_);
+ if (!enabled_)
+ return;
+
+ enabled_ = false;
+
+ // This cancels all in-flight GetHash requests.
+ delete protocol_manager_;
+
+ if (db_thread_.get())
+ db_thread_->message_loop()->DeleteSoon(FROM_HERE, database_);
+
+ // Flush the database thread. Any in-progress database check results will be
+ // ignored and cleaned up below.
+ db_thread_.reset(NULL);
+
+ database_ = NULL;
+
+ // Delete checks once the database thread is done, calling back any clients
+ // with 'URL_SAFE'.
+ for (CurrentChecks::iterator it = checks_.begin();
+ it != checks_.end(); ++it) {
+ if ((*it)->client)
+ (*it)->client->OnUrlCheckResult((*it)->url, URL_SAFE);
+ delete *it;
+ }
+ checks_.clear();
+
+ gethash_requests_.clear();
+}
+
+// Runs on the UI thread.
+void SafeBrowsingService::OnEnable(bool enabled) {
+ if (enabled)
+ Start();
+ else
+ ShutDown();
+}
+
+bool SafeBrowsingService::CanCheckUrl(const GURL& url) const {
+ return url.SchemeIs("http") || url.SchemeIs("https");
+}
+
+bool SafeBrowsingService::CheckUrl(const GURL& url, Client* client) {
+ DCHECK(MessageLoop::current() == io_loop_);
+
+ if (!enabled_ || !database_)
+ return true;
+
+ if (!resetting_) {
+ Time start_time = Time::Now();
+ bool need_check = database_->NeedToCheckUrl(url);
+ UMA_HISTOGRAM_TIMES(L"SB.BloomFilter", Time::Now() - start_time);
+ if (!need_check)
+ return true; // The url is definitely safe.
+ }
+
+ // The url may or may not be safe, need to go to the database to be sure.
+ SafeBrowsingCheck* check = new SafeBrowsingCheck();
+ check->url = url;
+ check->client = client;
+ check->result = URL_SAFE;
+ check->need_get_hash = false;
+ check->start = Time::Now();
+ checks_.insert(check);
+
+ db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::CheckDatabase,
+ check, protocol_manager_->last_update()));
+ return false;
+}
+
+void SafeBrowsingService::DisplayBlockingPage(const GURL& url,
+ ResourceType::Type resource_type,
+ UrlCheckResult result,
+ Client* client,
+ MessageLoop* ui_loop,
+ int render_process_host_id,
+ int render_view_id) {
+ // Check if the user has already ignored our warning for this render_view
+ // and domain.
+ for (size_t i = 0; i < white_listed_entries_.size(); ++i) {
+ const WhiteListedEntry& entry = white_listed_entries_[i];
+ if (entry.render_process_host_id == render_process_host_id &&
+ entry.render_view_id == render_view_id &&
+ entry.result == result &&
+ entry.domain ==
+ RegistryControlledDomainService::GetDomainAndRegistry(url)) {
+ MessageLoop::current()->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::NotifyClientBlockingComplete,
+ client, true));
+ return;
+ }
+ }
+
+ SafeBrowsingBlockingPage* blocking_page = new SafeBrowsingBlockingPage(
+ this, client, render_process_host_id, render_view_id, url, resource_type,
+ result);
+ blocking_page->AddRef();
+ ui_loop->PostTask(FROM_HERE, NewRunnableMethod(
+ blocking_page, &SafeBrowsingBlockingPage::DisplayBlockingPage));
+}
+
+void SafeBrowsingService::CancelCheck(Client* client) {
+ DCHECK(MessageLoop::current() == io_loop_);
+
+ for (CurrentChecks::iterator i = checks_.begin(); i != checks_.end(); ++i) {
+ if ((*i)->client == client)
+ (*i)->client = NULL;
+ }
+}
+
+void SafeBrowsingService::CheckDatabase(SafeBrowsingCheck* info,
+ Time last_update) {
+ DCHECK(MessageLoop::current() == db_thread_->message_loop());
+ // If client == NULL it means it was cancelled, no need for db lookup.
+ if (info->client && GetDatabase()) {
+ Time now = Time::Now();
+ std::string list;
+ if (GetDatabase()->ContainsUrl(info->url,
+ &list,
+ &info->prefix_hits,
+ &info->full_hits,
+ last_update)) {
+ if (info->prefix_hits.empty()) {
+ info->result = GetResultFromListname(list);
+ } else {
+ if (info->full_hits.empty())
+ info->need_get_hash = true;
+ }
+ }
+ info->db_time = Time::Now() - now;
+ }
+
+ if (io_loop_)
+ io_loop_->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::OnCheckDone, info));
+}
+
+void SafeBrowsingService::OnCheckDone(SafeBrowsingCheck* info) {
+ DCHECK(MessageLoop::current() == io_loop_);
+
+ // If we've been shutdown during the database lookup, this check will already
+ // have been deleted (in OnIOShutdown).
+ if (!enabled_ || checks_.find(info) == checks_.end())
+ return;
+
+ UMA_HISTOGRAM_TIMES(L"SB.Database", Time::Now() - info->start);
+ if (info->client && info->need_get_hash) {
+ // We have a partial match so we need to query Google for the full hash.
+ // Clean up will happen in HandleGetHashResults.
+
+ // See if we have a GetHash request already in progress for this particular
+ // prefix. If so, we just append ourselves to the list of interested parties
+ // when the results arrive. We only do this for checks involving one prefix,
+ // since that is the common case (multiple prefixes will issue the request
+ // as normal).
+ if (info->prefix_hits.size() == 1) {
+ SBPrefix prefix = info->prefix_hits[0];
+ GetHashRequests::iterator it = gethash_requests_.find(prefix);
+ if (it != gethash_requests_.end()) {
+ // There's already a request in progress.
+ it->second.push_back(info);
+ return;
+ }
+
+ // No request in progress, so we're the first for this prefix.
+ GetHashRequestors requestors;
+ requestors.push_back(info);
+ gethash_requests_[prefix] = requestors;
+ }
+
+ // Reset the start time so that we can measure the network time without the
+ // database time.
+ info->start = Time::Now();
+ protocol_manager_->GetFullHash(info, info->prefix_hits);
+ } else {
+ // We may have cached results for previous GetHash queries.
+ HandleOneCheck(info, info->full_hits);
+ }
+}
+
+SafeBrowsingDatabase* SafeBrowsingService::GetDatabase() {
+ DCHECK(MessageLoop::current() == db_thread_->message_loop());
+ if (database_)
+ return database_;
+
+ std::wstring path;
+ bool result = PathService::Get(chrome::DIR_USER_DATA, &path);
+ DCHECK(result);
+
+ path.append(L"\\");
+ path.append(chrome::kSafeBrowsingFilename);
+
+ Time before = Time::Now();
+ SafeBrowsingDatabase* database = new SafeBrowsingDatabase();
+ Callback0::Type* callback =
+ NewCallback(this, &SafeBrowsingService::ChunkInserted);
+ result = database->Init(path, callback);
+ if (!result) {
+ NOTREACHED();
+ return NULL;
+ }
+
+ database_ = database;
+
+ TimeDelta open_time = Time::Now() - before;
+ SB_DLOG(INFO) << "SafeBrowsing database open took " <<
+ open_time.InMilliseconds() << " ms.";
+
+ return database_;
+}
+
+// Public API called only on the IO thread.
+// The SafeBrowsingProtocolManager has received the full hash results for
+// prefix hits detected in the database.
+void SafeBrowsingService::HandleGetHashResults(
+ SafeBrowsingCheck* check,
+ const std::vector<SBFullHashResult>& full_hashes) {
+ if (checks_.find(check) == checks_.end())
+ return;
+
+ DCHECK(enabled_);
+
+ UMA_HISTOGRAM_LONG_TIMES(L"SB.Network", Time::Now() - check->start);
+ OnHandleGetHashResults(check, full_hashes); // 'check' is deleted here.
+
+ db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::CacheHashResults, full_hashes));
+}
+
+void SafeBrowsingService::OnHandleGetHashResults(
+ SafeBrowsingCheck* check,
+ const std::vector<SBFullHashResult>& full_hashes) {
+ SBPrefix prefix = check->prefix_hits[0];
+ GetHashRequests::iterator it = gethash_requests_.find(prefix);
+ if (check->prefix_hits.size() > 1 || it == gethash_requests_.end()) {
+ HandleOneCheck(check, full_hashes);
+ return;
+ }
+
+ // Call back all interested parties.
+ GetHashRequestors& requestors = it->second;
+ for (GetHashRequestors::iterator r = requestors.begin();
+ r != requestors.end(); ++r) {
+ HandleOneCheck(*r, full_hashes);
+ }
+
+ gethash_requests_.erase(it);
+}
+
+void SafeBrowsingService::HandleOneCheck(
+ SafeBrowsingCheck* check,
+ const std::vector<SBFullHashResult>& full_hashes) {
+ if (check->client) {
+ UrlCheckResult result = URL_SAFE;
+ int index = safe_browsing_util::CompareFullHashes(check->url, full_hashes);
+ if (index != -1)
+ result = GetResultFromListname(full_hashes[index].list_name);
+
+ // Let the client continue handling the original request.
+ check->client->OnUrlCheckResult(check->url, result);
+ }
+
+ checks_.erase(check);
+ delete check;
+}
+
+void SafeBrowsingService::GetAllChunks() {
+ DCHECK(MessageLoop::current() == io_loop_);
+ DCHECK(enabled_);
+ db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::GetAllChunksFromDatabase));
+}
+
+void SafeBrowsingService::OnBlockingPageDone(SafeBrowsingBlockingPage* page,
+ Client* client,
+ bool proceed) {
+ NotifyClientBlockingComplete(client, proceed);
+
+ if (proceed) {
+ // Whitelist this domain and warning type for the given tab.
+ WhiteListedEntry entry;
+ entry.render_process_host_id = page->render_process_host_id();
+ entry.render_view_id = page->render_view_id();
+ entry.domain =
+ RegistryControlledDomainService::GetDomainAndRegistry(page->url());
+ entry.result = page->result();
+ white_listed_entries_.push_back(entry);
+ }
+
+ page->Release();
+}
+
+void SafeBrowsingService::NotifyClientBlockingComplete(Client* client,
+ bool proceed) {
+ client->OnBlockingPageComplete(proceed);
+}
+
+// This method runs on the UI loop to access the prefs.
+void SafeBrowsingService::OnNewMacKeys(const std::string& client_key,
+ const std::string& wrapped_key) {
+ PrefService* prefs = g_browser_process->local_state();
+ if (prefs) {
+ prefs->SetString(prefs::kSafeBrowsingClientKey, ASCIIToWide(client_key));
+ prefs->SetString(prefs::kSafeBrowsingWrappedKey, ASCIIToWide(wrapped_key));
+ }
+}
+
+void SafeBrowsingService::ChunkInserted() {
+ io_loop_->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::OnChunkInserted));
+}
+
+void SafeBrowsingService::OnChunkInserted() {
+ DCHECK(MessageLoop::current() == io_loop_);
+ protocol_manager_->OnChunkInserted();
+}
+
+// static
+void SafeBrowsingService::RegisterUserPrefs(PrefService* prefs) {
+ prefs->RegisterStringPref(prefs::kSafeBrowsingClientKey, L"");
+ prefs->RegisterStringPref(prefs::kSafeBrowsingWrappedKey, L"");
+}
+
+void SafeBrowsingService::ResetDatabase() {
+ DCHECK(MessageLoop::current() == io_loop_);
+ resetting_ = true;
+ db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::OnResetDatabase));
+}
+
+void SafeBrowsingService::OnResetDatabase() {
+ DCHECK(MessageLoop::current() == db_thread_->message_loop());
+ GetDatabase()->ResetDatabase();
+ io_loop_->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::OnResetComplete));
+}
+
+void SafeBrowsingService::OnResetComplete() {
+ DCHECK(MessageLoop::current() == io_loop_);
+ resetting_ = false;
+}
+
+void SafeBrowsingService::HandleChunk(const std::string& list,
+ std::deque<SBChunk>* chunks) {
+ DCHECK(MessageLoop::current() == io_loop_);
+ DCHECK(enabled_);
+ db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::HandleChunkForDatabase, list, chunks));
+}
+
+void SafeBrowsingService::HandleChunkForDatabase(
+ const std::string& list_name,
+ std::deque<SBChunk>* chunks) {
+ DCHECK(MessageLoop::current() == db_thread_->message_loop());
+
+ GetDatabase()->InsertChunks(list_name, chunks);
+}
+
+void SafeBrowsingService::HandleChunkDelete(
+ std::vector<SBChunkDelete>* chunk_deletes) {
+ DCHECK(MessageLoop::current() == io_loop_);
+ DCHECK(enabled_);
+ db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::DeleteChunks, chunk_deletes));
+}
+
+void SafeBrowsingService::DeleteChunks(
+ std::vector<SBChunkDelete>* chunk_deletes) {
+ DCHECK(MessageLoop::current() == db_thread_->message_loop());
+
+ GetDatabase()->DeleteChunks(chunk_deletes);
+}
+
+// Database worker function.
+void SafeBrowsingService::GetAllChunksFromDatabase() {
+ DCHECK(MessageLoop::current() == db_thread_->message_loop());
+ bool database_error = false;
+ std::vector<SBListChunkRanges> lists;
+ if (GetDatabase()) {
+ GetDatabase()->GetListsInfo(&lists);
+ } else {
+ database_error = true;
+ }
+
+ io_loop_->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::OnGetAllChunksFromDatabase, lists,
+ database_error));
+}
+
+// Called on the io thread with the results of all chunks.
+void SafeBrowsingService::OnGetAllChunksFromDatabase(
+ const std::vector<SBListChunkRanges>& lists, bool database_error) {
+ DCHECK(MessageLoop::current() == io_loop_);
+ if (!enabled_)
+ return;
+
+ protocol_manager_->OnGetChunksComplete(lists, database_error);
+}
+
+SafeBrowsingService::UrlCheckResult SafeBrowsingService::GetResultFromListname(
+ const std::string& list_name) {
+ if (safe_browsing_util::IsPhishingList(list_name)) {
+ return URL_PHISHING;
+ }
+
+ if (safe_browsing_util::IsMalwareList(list_name)) {
+ return URL_MALWARE;
+ }
+
+ SB_DLOG(INFO) << "Unknown safe browsing list " << list_name;
+ return URL_SAFE;
+}
+
+// static
+void SafeBrowsingService::LogPauseDelay(TimeDelta time) {
+ UMA_HISTOGRAM_LONG_TIMES(L"SB.Delay", time);
+}
+
+void SafeBrowsingService::CacheHashResults(
+ const std::vector<SBFullHashResult>& full_hashes) {
+ DCHECK(MessageLoop::current() == db_thread_->message_loop());
+ GetDatabase()->CacheHashResults(full_hashes);
+}
+
+void SafeBrowsingService::OnSuspend() {
+}
+
+// Tell the SafeBrowsing database not to do expensive disk operations for a few
+// minutes after waking up. It's quite likely that the act of resuming from a
+// low power state will involve much disk activity, which we don't want to
+// exacerbate.
+void SafeBrowsingService::OnResume() {
+ DCHECK(MessageLoop::current() == io_loop_);
+ if (enabled_) {
+ db_thread_->message_loop()->PostTask(FROM_HERE,
+ NewRunnableMethod(this, &SafeBrowsingService::HandleResume));
+ }
+}
+
+void SafeBrowsingService::HandleResume() {
+ DCHECK(MessageLoop::current() == db_thread_->message_loop());
+ GetDatabase()->HandleResume();
+} \ No newline at end of file
diff --git a/chrome/browser/safe_browsing/safe_browsing_service.h b/chrome/browser/safe_browsing/safe_browsing_service.h
new file mode 100644
index 0000000..77e87ec
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_service.h
@@ -0,0 +1,279 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// The Safe Browsing service is responsible for downloading anti-phishing and
+// anti-malware tables and checking urls against them.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_SERVICE_H__
+#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_SERVICE_H__
+
+#include <deque>
+#include <hash_map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "base/ref_counted.h"
+#include "base/scoped_ptr.h"
+#include "base/thread.h"
+#include "base/time.h"
+#include "chrome/browser/safe_browsing/safe_browsing_util.h"
+#include "googleurl/src/gurl.h"
+#include "webkit/glue/resource_type.h"
+
+class MessageLoop;
+class PrefService;
+class SafeBrowsingBlockingPage;
+class SafeBrowsingDatabase;
+class SafeBrowsingProtocolManager;
+
+// Construction needs to happen on the main thread.
+class SafeBrowsingService
+ : public base::RefCountedThreadSafe<SafeBrowsingService> {
+ public:
+ // Users of this service implement this interface to be notified
+ // asynchronously of the result.
+ enum UrlCheckResult {
+ URL_SAFE,
+ URL_PHISHING,
+ URL_MALWARE,
+ };
+
+ class Client {
+ public:
+ virtual ~Client() {}
+
+ // Called when the result of checking a URL is known.
+ virtual void OnUrlCheckResult(const GURL& url, UrlCheckResult result) = 0;
+
+ // Called when the user has made a decision about how to handle the
+ // SafeBrowsing interstitial page.
+ virtual void OnBlockingPageComplete(bool proceed) = 0;
+ };
+
+ // Creates the safe browsing service. Need to initialize before using.
+ SafeBrowsingService();
+ ~SafeBrowsingService();
+
+ // Initializes the service. io_loop is the message loop that the
+ // caller of this service (ResourceDispatcherHost) wants to be notified on
+ // for check results. db_loop is the message loop for the thread to do
+ // the database work.
+ void Initialize(MessageLoop* io_loop);
+
+ // Called to initialize objects that are used on the io_thread.
+ void OnIOInitialize(MessageLoop* notify_loop,
+ const std::string& client_key,
+ const std::string& wrapped_key);
+
+ // Called to initialize objects that are used on the db_thread.
+ void OnDBInitialize();
+
+ // Called to shutdown operations on the io_thread.
+ void OnIOShutdown();
+
+ // Called on the main thread to let us know that the io_thread is going away.
+ void ShutDown();
+
+ // Called on the IO thread.
+
+ // Returns true if the url's scheme can be checked.
+ bool CanCheckUrl(const GURL& url) const;
+
+ // Checks if the given url is safe or not. If we can synchronously determine
+ // that the url is safe, CheckUrl returns true. Otherwise it returns false,
+ // and "client" is called asynchronously with the result when it is ready.
+ bool CheckUrl(const GURL& url, Client* client);
+
+ // Cancels a pending check if the result is no longer needed.
+ void CancelCheck(Client* client);
+
+ // Displays an interstitial page.
+ void DisplayBlockingPage(const GURL& url,
+ ResourceType::Type resource_type,
+ UrlCheckResult result,
+ Client* client,
+ MessageLoop* ui_loop,
+ int render_process_host_id,
+ int render_view_id);
+
+ // Bundle of SafeBrowsing state for one URL check.
+ // TODO(paulg): Make this struct private to SafeBrowsingService and maintain
+ // request mappings using CancelableRequests instead (which can
+ // store this state for us).
+ struct SafeBrowsingCheck {
+ GURL url;
+ Client* client;
+ bool need_get_hash;
+ Time start; // Time that check was sent to SB service.
+ TimeDelta db_time; // How long DB look-up took.
+ UrlCheckResult result;
+ std::vector<SBPrefix> prefix_hits;
+ std::vector<SBFullHashResult> full_hits;
+ };
+
+ // API used by the SafeBrowsingProtocolManager to interface with the
+ // SafeBrowsing storage system.
+ void HandleGetHashResults(
+ SafeBrowsingCheck* check,
+ const std::vector<SBFullHashResult>& full_hashes);
+ void HandleChunk(const std::string& list, std::deque<SBChunk>* chunks);
+ void HandleChunkDelete(std::vector<SBChunkDelete>* chunk_deletes);
+ void GetAllChunks();
+
+ // The blocking page on the UI thread has completed.
+ void OnBlockingPageDone(SafeBrowsingBlockingPage* page,
+ Client* client,
+ bool proceed);
+
+ // Called when the SafeBrowsingProtocolManager has received updated MAC keys.
+ void OnNewMacKeys(const std::string& client_key,
+ const std::string& wrapped_key);
+
+ // Notification from the advanced options UI.
+ void OnEnable(bool enabled);
+ bool enabled() const { return enabled_; }
+
+ // Called by the database (on the db thread) when a chunk insertion is
+ // complete.
+ void ChunkInserted();
+
+ // Preference handling.
+ static void RegisterUserPrefs(PrefService* prefs);
+
+ // The SafeBrowsing system has instructed us to reset our database.
+ void ResetDatabase();
+
+ // Log the user perceived delay caused by SafeBrowsing. This delay is the time
+ // delta starting from when we would have started reading data from the
+ // network, and ending when the SafeBrowsing check completes indicating that
+ // the current page is 'safe'.
+ static void LogPauseDelay(TimeDelta time);
+
+ // We defer SafeBrowsing work for a short duration when the computer comes
+ // out of a suspend state to avoid thrashing the disk.
+ void OnSuspend();
+ void OnResume();
+
+ private:
+ // Should only be called on db thread as SafeBrowsingDatabase is not
+ // threadsafe.
+ SafeBrowsingDatabase* GetDatabase();
+
+ // Called on the database thread to check a url.
+ void CheckDatabase(SafeBrowsingCheck* info, Time last_update);
+
+ // Called on the IO thread with the check result.
+ void OnCheckDone(SafeBrowsingCheck* info);
+
+ // Called on the database thread to retrieve chunks.
+ void GetAllChunksFromDatabase();
+
+ // Called on the IOthread with the results of all chunks.
+ void OnGetAllChunksFromDatabase(const std::vector<SBListChunkRanges>& lists,
+ bool database_error);
+
+ // Called on the IO thread after the database reports that it added a chunk.
+ void OnChunkInserted();
+
+ // Called on the database thread to add/remove chunks and host keys.
+ // Callee will free the data when it's done.
+ void HandleChunkForDatabase(const std::string& list,
+ std::deque<SBChunk>* chunks);
+ void DeleteChunks(std::vector<SBChunkDelete>* chunk_deletes);
+
+ static UrlCheckResult GetResultFromListname(const std::string& list_name);
+
+ void NotifyClientBlockingComplete(Client* client, bool proceed);
+
+ void Start();
+ void Stop();
+
+ // Runs on the db thread to reset the database. We assume that resetting the
+ // database is a synchronous operation.
+ void OnResetDatabase();
+
+ // Runs on the io thread when the reset is complete.
+ void OnResetComplete();
+
+ // Store the results of a GetHash request. Runs on the database thread.
+ void CacheHashResults(const std::vector<SBFullHashResult>& full_hashes);
+
+ // Internal worker function for processing full hashes.
+ void OnHandleGetHashResults(SafeBrowsingCheck* check,
+ const std::vector<SBFullHashResult>& full_hashes);
+
+ void HandleOneCheck(SafeBrowsingCheck* check,
+ const std::vector<SBFullHashResult>& full_hashes);
+
+ // Runs on the database thread to inform the database we've resumed from a low
+ // power state.
+ void HandleResume();
+
+ MessageLoop* io_loop_;
+
+ typedef std::set<SafeBrowsingCheck*> CurrentChecks;
+ CurrentChecks checks_;
+
+ // Used for issuing only one GetHash request for a given prefix.
+ typedef std::vector<SafeBrowsingCheck*> GetHashRequestors;
+ typedef stdext::hash_map<SBPrefix, GetHashRequestors> GetHashRequests;
+ GetHashRequests gethash_requests_;
+
+ // The sqlite database. We don't use a scoped_ptr because it needs to be
+ // destructed on a different thread than this object.
+ SafeBrowsingDatabase* database_;
+
+ // Handles interaction with SafeBrowsing servers.
+ SafeBrowsingProtocolManager* protocol_manager_;
+
+ // Used for whitelisting a render view when the user ignores our warning.
+ struct WhiteListedEntry {
+ int render_process_host_id;
+ int render_view_id;
+ std::string domain;
+ UrlCheckResult result;
+ };
+
+ std::vector<WhiteListedEntry> white_listed_entries_;
+
+ // Whether the service is running. 'enabled_' is used by SafeBrowsingService
+ // on the IO thread during normal operations.
+ bool enabled_;
+
+ // The SafeBrowsing thread that runs database operations.
+ scoped_ptr<Thread> db_thread_;
+
+ // Indicates if we are in the process of resetting the database.
+ bool resetting_;
+
+ DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingService);
+};
+
+#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_SERVICE_H__
diff --git a/chrome/browser/safe_browsing/safe_browsing_util.cc b/chrome/browser/safe_browsing/safe_browsing_util.cc
new file mode 100644
index 0000000..53339a1e
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_util.cc
@@ -0,0 +1,630 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "chrome/browser/safe_browsing/safe_browsing_util.h"
+
+#include "base/hmac.h"
+#include "base/logging.h"
+#include "base/sha2.h"
+#include "base/string_util.h"
+#include "chrome/browser/google_util.h"
+#include "net/base/base64.h"
+#include "net/base/escape.h"
+#include "unicode/locid.h"
+
+static const int kSafeBrowsingMacDigestSize = 20;
+
+// Continue to this URL after submitting the phishing report form.
+// TODO(paulg): Change to a Chrome specific URL.
+static const char kContinueUrlFormat[] =
+ "http://www.google.com/tools/firefox/toolbar/FT2/intl/%s/submit_success.html";
+
+static const char kReportParams[] = "?tpl=generic&continue=%s&url=%s";
+
+namespace safe_browsing_util {
+
+void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts) {
+ // Per Safe Browsing Protocol 2 spec, first we try the host. Then we try up
+ // to 4 hostnames starting with the last 5 components and successively
+ // removing the leading component. The TLD is skipped.
+ hosts->clear();
+ int hostnames_checked = 0;
+
+ std::string host = url.host();
+ if (host.empty())
+ return;
+
+ const char* host_start = host.c_str();
+ const char* index = host_start + host.size() - 1;
+ bool skipped_tld = false;
+ while (index != host_start && hostnames_checked < 4) {
+ if (*index == '.') {
+ if (!skipped_tld) {
+ skipped_tld = true;
+ } else {
+ const char* host_to_check = index + 1;
+ hosts->push_back(host_to_check);
+ hostnames_checked++;
+ }
+ }
+
+ index--;
+ }
+
+ // Check the full host too.
+ hosts->push_back(host.c_str());
+}
+
+// Per the Safe Browsing 2 spec, we try the exact path with/without the query
+// parameters, and also the 4 paths formed by starting at the root and adding
+// more path components.
+void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths) {
+ paths->clear();
+ std::string path = url.path();
+ if (path.empty())
+ return;
+
+ if (url.has_query())
+ paths->push_back(path + "?" + url.query());
+
+ paths->push_back(path);
+ if (path == "/")
+ return;
+
+ int path_components_checked = 0;
+ const char* path_start = path.c_str();
+ const char* index = path_start;
+ const char* last_char = path_start + path.size() - 1;
+ while (*index && index != last_char && path_components_checked < 4) {
+ if (*index == '/') {
+ paths->push_back(std::string(path_start, index - path_start + 1));
+ path_components_checked++;
+ }
+
+ index++;
+ }
+}
+
+int CompareFullHashes(const GURL& url,
+ const std::vector<SBFullHashResult>& full_hashes) {
+ if (full_hashes.empty())
+ return -1;
+
+ std::vector<std::string> hosts, paths;
+ GenerateHostsToCheck(url, &hosts);
+ GeneratePathsToCheck(url, &paths);
+
+ for (size_t h = 0; h < hosts.size(); ++h) {
+ for (size_t p = 0; p < paths.size(); ++p) {
+ SBFullHash key;
+ base::SHA256HashString(hosts[h] + paths[p],
+ key.full_hash,
+ sizeof(SBFullHash));
+
+ for (size_t i = 0; i < full_hashes.size(); ++i) {
+ if (key == full_hashes[i].hash)
+ return static_cast<int>(i);
+ }
+ }
+ }
+
+ return -1;
+}
+
+bool IsPhishingList(const std::string& list_name) {
+ return list_name.find("-phish-") != std::string::npos;
+}
+
+bool IsMalwareList(const std::string& list_name) {
+ return list_name.find("-malware-") != std::string::npos;
+}
+
+static void DecodeWebSafe(std::string* decoded) {
+ DCHECK(decoded);
+ for (size_t i = 0; i < decoded->size(); ++i) {
+ switch ((*decoded)[i]) {
+ case '_':
+ (*decoded)[i] = '/';
+ break;
+ case '-':
+ (*decoded)[i] = '+';
+ break;
+ }
+ }
+}
+
+bool VerifyMAC(const std::string& key, const std::string& mac,
+ const char* data, int data_length) {
+ std::string key_copy = key;
+ DecodeWebSafe(&key_copy);
+ std::string decoded_key;
+ Base64Decode(key_copy, &decoded_key);
+
+ std::string mac_copy = mac;
+ DecodeWebSafe(&mac_copy);
+ std::string decoded_mac;
+ Base64Decode(mac_copy, &decoded_mac);
+
+ HMAC hmac(HMAC::SHA1,
+ reinterpret_cast<const unsigned char*>(decoded_key.data()),
+ static_cast<int>(decoded_key.length()));
+ const std::string data_str(data, data_length);
+ unsigned char digest[kSafeBrowsingMacDigestSize];
+ if (!hmac.Sign(data_str, digest, kSafeBrowsingMacDigestSize))
+ return false;
+
+ return memcmp(digest, decoded_mac.data(), kSafeBrowsingMacDigestSize) == 0;
+}
+
+void FreeChunks(std::deque<SBChunk>* chunks) {
+ while (!chunks->empty()) {
+ while (!chunks->front().hosts.empty()) {
+ chunks->front().hosts.front().entry->Destroy();
+ chunks->front().hosts.pop_front();
+ }
+ chunks->pop_front();
+ }
+}
+
+GURL GeneratePhishingReportUrl(const std::string& report_page,
+ const std::string& url_to_report) {
+ Locale locale = Locale::getDefault();
+ const char* lang = locale.getLanguage();
+ if (!lang)
+ lang = "en"; // fallback
+ const std::string continue_esc =
+ EscapeQueryParamValue(StringPrintf(kContinueUrlFormat, lang));
+ const std::string current_esc = EscapeQueryParamValue(url_to_report);
+ const std::string format = report_page + kReportParams;
+ GURL report_url(StringPrintf(format.c_str(),
+ continue_esc.c_str(),
+ current_esc.c_str()));
+ return google_util::AppendGoogleLocaleParam(report_url);
+}
+
+} // namespace safe_browsing_util
+
+const int SBEntry::kMinSize = sizeof(SBEntry::Data);
+
+SBEntry* SBEntry::Create(Type type, int prefix_count) {
+ int size = Size(type, prefix_count);
+ SBEntry *rv = static_cast<SBEntry*>(malloc(size));
+ memset(rv, 0, size);
+ rv->set_type(type);
+ rv->set_prefix_count(prefix_count);
+ return rv;
+}
+
+void SBEntry::Destroy() {
+ free(this);
+}
+
+bool SBEntry::IsValid() const {
+ switch (type()) {
+ case ADD_PREFIX:
+ case ADD_FULL_HASH:
+ case SUB_PREFIX:
+ case SUB_FULL_HASH:
+ return true;
+ default:
+ return false;
+ }
+}
+
+int SBEntry::Size() const {
+ return Size(type(), prefix_count());
+}
+
+int SBEntry::Size(Type type, int prefix_count) {
+ return sizeof(Data) + prefix_count * PrefixSize(type);
+}
+
+SBEntry* SBEntry::Enlarge(int extra_prefixes) {
+ int new_prefix_count = prefix_count() + extra_prefixes;
+ SBEntry* rv = SBEntry::Create(type(), new_prefix_count);
+ memcpy(rv, this, Size());
+ rv->set_prefix_count(new_prefix_count);
+ Destroy();
+ return rv;
+}
+
+void SBEntry::RemovePrefix(int index) {
+ DCHECK(index < prefix_count());
+ int bytes_to_copy = PrefixSize(type()) * (prefix_count() - index - 1);
+ void* to;
+ switch (type()) {
+ case ADD_PREFIX:
+ to = &add_prefixes_[index];
+ break;
+ case ADD_FULL_HASH:
+ to = &add_full_hashes_[index];
+ break;
+ case SUB_PREFIX:
+ to = &sub_prefixes_[index];
+ break;
+ case SUB_FULL_HASH:
+ to = &sub_full_hashes_[index];
+ break;
+ default:
+ NOTREACHED();
+ }
+
+ char* from = reinterpret_cast<char*>(to) + PrefixSize(type());
+ memmove(to, from, bytes_to_copy);
+ set_prefix_count(prefix_count() - 1);
+}
+
+bool SBEntry::PrefixesMatch(
+ int index, const SBEntry* that, int that_index) const {
+ // If they're of different hash sizes, or if they're both adds or subs, then
+ // they can't match.
+ if (HashLen() != that->HashLen() || IsAdd() == that->IsAdd())
+ return false;
+
+ if (ChunkIdAtPrefix(index) != that->ChunkIdAtPrefix(that_index))
+ return false;
+
+ if (HashLen() == sizeof(SBPrefix))
+ return PrefixAt(index) == that->PrefixAt(that_index);
+
+ return FullHashAt(index) == that->FullHashAt(that_index);
+}
+
+bool SBEntry::AddPrefixMatches(int index, const SBFullHash& full_hash) const {
+ DCHECK(IsAdd());
+
+ if (HashLen() == sizeof(SBFullHash))
+ return full_hash == add_full_hashes_[index];
+
+ SBPrefix prefix;
+ memcpy(&prefix, &full_hash, sizeof(SBPrefix));
+ return prefix == add_prefixes_[index];
+}
+
+bool SBEntry::IsAdd() const {
+ return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
+}
+
+bool SBEntry::IsSub() const {
+ return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
+}
+
+int SBEntry::HashLen() const {
+ if (type() == ADD_PREFIX || type() == SUB_PREFIX)
+ return sizeof(SBPrefix);
+
+ return sizeof(SBFullHash);
+}
+
+int SBEntry::PrefixSize(Type type) {
+ switch (type) {
+ case ADD_PREFIX:
+ return sizeof(SBPrefix);
+ case ADD_FULL_HASH:
+ return sizeof(SBFullHash);
+ case SUB_PREFIX:
+ return sizeof(SBSubPrefix);
+ case SUB_FULL_HASH:
+ return sizeof(SBSubFullHash);
+ default:
+ NOTREACHED();
+ return 0;
+ }
+}
+
+int SBEntry::ChunkIdAtPrefix(int index) const {
+ if (type() == SUB_PREFIX)
+ return sub_prefixes_[index].add_chunk;
+
+ if (type() == SUB_FULL_HASH)
+ return sub_full_hashes_[index].add_chunk;
+
+ return chunk_id();
+}
+
+void SBEntry::SetChunkIdAtPrefix(int index, int chunk_id) {
+ DCHECK(IsSub());
+
+ if (type() == SUB_PREFIX) {
+ sub_prefixes_[index].add_chunk = chunk_id;
+ } else {
+ sub_full_hashes_[index].add_chunk = chunk_id;
+ }
+}
+
+const SBPrefix& SBEntry::PrefixAt(int index) const {
+ DCHECK(HashLen() == sizeof(SBPrefix));
+
+ if (IsAdd())
+ return add_prefixes_[index];
+
+ return sub_prefixes_[index].prefix;
+}
+
+const SBFullHash& SBEntry::FullHashAt(int index) const {
+ DCHECK(HashLen() == sizeof(SBFullHash));
+
+ if (IsAdd())
+ return add_full_hashes_[index];
+
+ return sub_full_hashes_[index].prefix;
+}
+
+void SBEntry::SetPrefixAt(int index, const SBPrefix& prefix) {
+ DCHECK(HashLen() == sizeof(SBPrefix));
+
+ if (IsAdd()) {
+ add_prefixes_[index] = prefix;
+ } else {
+ sub_prefixes_[index].prefix = prefix;
+ }
+}
+
+void SBEntry::SetFullHashAt(int index, const SBFullHash& full_hash) {
+ DCHECK(HashLen() == sizeof(SBFullHash));
+
+ if (IsAdd()) {
+ add_full_hashes_[index] = full_hash;
+ } else {
+ sub_full_hashes_[index].prefix = full_hash;
+ }
+}
+
+
+
+SBHostInfo::SBHostInfo() : size_(0) {
+}
+
+bool SBHostInfo::Initialize(const void* data, int size) {
+ size_ = size;
+ if (!size_)
+ return true;
+
+ data_.reset(new char[size_]);
+ memcpy(data_.get(), data, size_);
+ if (!IsValid()) {
+ size_ = 0;
+ data_.reset();
+ return false;
+ }
+
+ return true;
+}
+
+bool SBHostInfo::IsValid() {
+ const SBEntry* entry = NULL;
+ while (GetNextEntry(&entry)) {
+ if (!entry->IsValid())
+ return false;
+ }
+ return true;
+}
+
+void SBHostInfo::Add(const SBEntry* entry) {
+ int new_size = size_ + entry->Size();
+ char* new_data = new char[new_size];
+ memcpy(new_data, data_.get(), size_);
+ memcpy(new_data + size_, entry, entry->Size());
+ data_.reset(new_data);
+ size_ = new_size;
+ DCHECK(IsValid());
+}
+
+void SBHostInfo::AddPrefixes(SBEntry* entry) {
+ DCHECK(entry->IsAdd());
+ const SBEntry* sub_entry = NULL;
+ // Remove any prefixes for which a sub already came.
+ while (GetNextEntry(&sub_entry)) {
+ if (sub_entry->IsAdd() || entry->list_id() != sub_entry->list_id())
+ continue;
+
+ if (sub_entry->prefix_count() == 0) {
+ if (entry->chunk_id() != sub_entry->chunk_id())
+ continue;
+
+ // We don't want to add any of these prefixes so just return. Also no
+ // more need to store the sub chunk data around for this chunk_id so
+ // remove it.
+ RemoveSubEntry(entry->list_id(), entry->chunk_id());
+ return;
+ }
+
+ // Remove any matching prefixes.
+ for (int i = 0; i < sub_entry->prefix_count(); ++i) {
+ for (int j = 0; j < entry->prefix_count(); ++j) {
+ if (entry->PrefixesMatch(j, sub_entry, i))
+ entry->RemovePrefix(j--);
+ }
+ }
+
+ RemoveSubEntry(entry->list_id(), entry->chunk_id());
+ break;
+ }
+
+ Add(entry);
+ DCHECK(IsValid());
+}
+
+void SBHostInfo::RemoveSubEntry(int list_id, int chunk_id) {
+ scoped_array<char> new_data(new char[size_]); // preallocate new data
+ char* write_ptr = new_data.get();
+ int new_size = 0;
+ const SBEntry* entry = NULL;
+ while (GetNextEntry(&entry)) {
+ if (entry->list_id() == list_id &&
+ entry->chunk_id() == chunk_id &&
+ entry->IsSub() &&
+ entry->prefix_count() == 0) {
+ continue;
+ }
+
+ SBEntry* new_sub_entry = const_cast<SBEntry*>(entry);
+ scoped_array<char> data;
+ if (entry->IsSub() && entry->list_id() == list_id && entry->prefix_count()) {
+ // Make a copy of the entry so that we can modify it.
+ data.reset(new char[entry->Size()]);
+ new_sub_entry = reinterpret_cast<SBEntry*>(data.get());
+ memcpy(new_sub_entry, entry, entry->Size());
+ int new_prefix_count = 0;
+ // Remove any matching prefixes.
+ for (int i = 0; i < new_sub_entry->prefix_count(); ++i) {
+ if (new_sub_entry->ChunkIdAtPrefix(i) == chunk_id)
+ new_sub_entry->RemovePrefix(i--);
+ }
+
+ if (new_sub_entry->prefix_count() == 0)
+ continue; // We removed the last prefix in the entry, so remove it.
+ }
+
+ memcpy(write_ptr, new_sub_entry, new_sub_entry->Size());
+ new_size += new_sub_entry->Size();
+ write_ptr += new_sub_entry->Size();
+ }
+
+ size_ = new_size;
+ data_.reset(new_data.release());
+ DCHECK(IsValid());
+}
+
+void SBHostInfo::RemovePrefixes(SBEntry* sub_entry, bool persist) {
+ DCHECK(sub_entry->IsSub());
+ scoped_array<char> new_data(new char[size_]);
+ char* write_ptr = new_data.get();
+ int new_size = 0;
+ const SBEntry* add_entry = NULL;
+ // Remove any of the prefixes that are in the database.
+ while (GetNextEntry(&add_entry)) {
+ SBEntry* new_add_entry = const_cast<SBEntry*>(add_entry);
+ scoped_array<char> data;
+ if (add_entry->IsAdd() && add_entry->list_id() == sub_entry->list_id()) {
+ if (sub_entry->prefix_count() == 0 &&
+ add_entry->chunk_id() == sub_entry->chunk_id()) {
+ // When prefixes are empty, that means we want to remove the entry for
+ // that host key completely. No need to add this sub chunk to the db.
+ persist = false;
+ continue;
+ } else if (sub_entry->prefix_count()) {
+ // Create another entry that doesn't have these prefixes.
+ data.reset(new char[add_entry->Size()]);
+ new_add_entry = reinterpret_cast<SBEntry*>(data.get());
+ memcpy(new_add_entry, add_entry, add_entry->Size());
+
+ for (int i = 0; i < new_add_entry->prefix_count(); ++i) {
+ for (int j = 0; j < sub_entry->prefix_count(); ++j) {
+ if (!sub_entry->PrefixesMatch(j, new_add_entry, i))
+ continue;
+
+ new_add_entry->RemovePrefix(i--);
+ sub_entry->RemovePrefix(j--);
+ if (sub_entry->prefix_count() == 0)
+ persist = false; // Sub entry is all used up.
+
+ break;
+ }
+ }
+ }
+ }
+
+ // If we didn't modify the entry, then add it. Else if we modified it,
+ // then only add it if there are prefixes left. Otherwise, it it had n
+ // prefixes and now it has 0, if we were to add it that would mean all
+ // prefixes from that host are in the database.
+ if (new_add_entry == add_entry || new_add_entry->prefix_count()) {
+ memcpy(write_ptr, new_add_entry, new_add_entry->Size());
+ new_size += new_add_entry->Size();
+ write_ptr += new_add_entry->Size();
+ }
+ }
+
+ if (persist && new_size == size_) {
+ // We didn't find any matches because the sub came before the add, so save
+ // it for later.
+ Add(sub_entry);
+ return;
+ }
+
+ size_ = new_size;
+ data_.reset(new_data.release());
+ DCHECK(IsValid());
+}
+
+bool SBHostInfo::Contains(const std::vector<SBFullHash>& prefixes,
+ int* list_id,
+ std::vector<SBPrefix>* prefix_hits) {
+ prefix_hits->clear();
+ *list_id = -1;
+ bool hits = false;
+ const SBEntry* add_entry = NULL;
+ while (GetNextEntry(&add_entry)) {
+ if (add_entry->IsSub())
+ continue;
+
+ if (add_entry->prefix_count() == 0) {
+ // This means all paths for this url are blacklisted.
+ return true;
+ }
+
+ for (int i = 0; i < add_entry->prefix_count(); ++i) {
+ for (size_t j = 0; j < prefixes.size(); ++j) {
+ if (!add_entry->AddPrefixMatches(i, prefixes[j]))
+ continue;
+
+ hits = true;
+ if (add_entry->HashLen() == sizeof(SBFullHash)) {
+ *list_id = add_entry->list_id();
+ } else {
+ prefix_hits->push_back(add_entry->PrefixAt(i));
+ }
+ }
+ }
+ }
+
+ return hits;
+}
+
+bool SBHostInfo::GetNextEntry(const SBEntry** entry) {
+ const char* current = reinterpret_cast<const char*>(*entry);
+
+ // It is an error to call this function with a |*entry| outside of |data_|.
+ DCHECK(!current || current >= data_.get());
+ DCHECK(!current || current + (*entry)->Size() <= data_.get() + size_);
+
+ // Compute the address of the next entry.
+ const char* next = current ? current + (*entry)->Size() : data_.get();
+ const SBEntry* next_entry = reinterpret_cast<const SBEntry*>(next);
+
+ // Validate that the next entry is wholly contained inside of |data_|.
+ const char* end = data_.get() + size_;
+ if (next + SBEntry::kMinSize <= end && next + next_entry->Size() <= end) {
+ *entry = next_entry;
+ return true;
+ }
+
+ return false;
+}
diff --git a/chrome/browser/safe_browsing/safe_browsing_util.h b/chrome/browser/safe_browsing/safe_browsing_util.h
new file mode 100644
index 0000000..b1b8151
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_util.h
@@ -0,0 +1,324 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Utilities for the SafeBrowsing code.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_UTIL_H__
+#define CHROME_BROWSER_SAFE_BROWSING_UTIL_H__
+
+#include <deque>
+#include <string>
+#include <vector>
+
+#include "base/scoped_ptr.h"
+#include "chrome/browser/safe_browsing/chunk_range.h"
+#include "googleurl/src/gurl.h"
+
+//#define SB_LOGGING_ENABLED
+#ifdef SB_LOGGING_ENABLED
+#define SB_DLOG(severity) DLOG_IF(INFO, 1)
+#else
+#define SB_DLOG(severity) DLOG_IF(INFO, 0)
+#endif
+
+// forward declaration
+class SBEntry;
+
+// Widely used typedefs -------------------------------------------------------
+
+// Container for holding a chunk URL and the MAC of the contents of the URL.
+typedef struct {
+ std::string url;
+ std::string mac;
+} ChunkUrl;
+
+// A truncated hash's type.
+typedef int SBPrefix;
+
+// A full hash.
+typedef struct {
+ char full_hash[32];
+} SBFullHash;
+
+inline bool operator==(const SBFullHash& rhash, const SBFullHash& lhash) {
+ return memcmp(rhash.full_hash, lhash.full_hash, sizeof(SBFullHash)) == 0;
+}
+
+// Container for information about a specific host in an add/sub chunk.
+struct SBChunkHost {
+ SBPrefix host;
+ SBEntry* entry;
+};
+
+// Container for an add/sub chunk.
+struct SBChunk {
+ int chunk_number;
+ std::deque<SBChunkHost> hosts;
+};
+
+// Used when we get a gethash response.
+struct SBFullHashResult {
+ SBFullHash hash;
+ std::string list_name;
+ int add_chunk_id;
+};
+
+// Contains information about a list in the database.
+struct SBListChunkRanges {
+ std::string name; // The list name.
+ std::string adds; // The ranges for add chunks.
+ std::string subs; // The ranges for sub chunks.
+
+ SBListChunkRanges(const std::string& n) : name(n) { }
+};
+
+// Container for deleting chunks from the database.
+struct SBChunkDelete {
+ std::string list_name;
+ bool is_sub_del;
+ std::vector<ChunkRange> chunk_del;
+};
+
+
+// Holds information about the prefixes for a hostkey. prefixes can either be
+// 4 bytes (truncated hash) or 32 bytes (full hash).
+// For adds:
+// [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
+// For subs:
+// [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
+// [add chunk][prefix][add chunk][prefix]
+class SBEntry {
+ public:
+ enum Type {
+ ADD_PREFIX, // 4 byte add entry.
+ SUB_PREFIX, // 4 byte sub entry.
+ ADD_FULL_HASH, // 32 byte add entry.
+ SUB_FULL_HASH, // 32 byte sub entry.
+ };
+
+ // The minimum size of an SBEntry.
+ static const int kMinSize;
+
+ // Creates a SBEntry with the necessary size for the given number of prefixes.
+ // Caller ownes the object and needs to free it by calling Destroy.
+ static SBEntry* Create(Type type, int prefix_count);
+
+ // Frees the entry's memory.
+ void Destroy();
+
+ // Returns whether this entry is internally consistent.
+ bool IsValid() const;
+
+ // Returns how many bytes this entry is.
+ int Size() const;
+
+ // Helper to return how much memory a given Entry would require.
+ static int Size(Type type, int prefix_count);
+
+ void set_list_id(int list_id) { data_.list_id = list_id; }
+ int list_id() const { return data_.list_id; }
+ void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
+ int chunk_id() const { return data_.chunk_id; }
+ int prefix_count() const { return data_.prefix_count; }
+ Type type() const { return data_.type; }
+
+ // Returns a new entry that is larger by the given number of prefixes, with
+ // all the existing data already copied over. The old entry is destroyed.
+ SBEntry* Enlarge(int extra_prefixes);
+
+ // Removes the prefix at the given index.
+ void RemovePrefix(int index);
+
+ // Returns true if the prefix/hash at the given index is equal to a
+ // prefix/hash at another entry's index. Works with all combinations of
+ // add/subs as long as they're the same size. Also checks chunk_ids.
+ bool PrefixesMatch(int index, const SBEntry* that, int that_index) const;
+
+ // Returns true if the add prefix/hash at the given index is equal to the
+ // given full hash.
+ bool AddPrefixMatches(int index, const SBFullHash& full_hash) const;
+
+ // Returns true if this is an add entry.
+ bool IsAdd() const;
+
+ // Returns true if this is a sub entry.
+ bool IsSub() const;
+
+ // Helper to return the size of the prefixes.
+ int HashLen() const;
+
+ // Helper to return the size of each prefix entry (i.e. for subs this
+ // includes an add chunk id).
+ static int PrefixSize(Type type);
+
+ // For add entries, returns the add chunk id. For sub entries, returns the
+ // add_chunk id for the prefix at the given index.
+ int ChunkIdAtPrefix(int index) const;
+
+ // Used for sub chunks to set the chunk id at a given index.
+ void SetChunkIdAtPrefix(int index, int chunk_id);
+
+ // Return the prefix/full hash at the given index. Caller is expected to
+ // call the right function based on the hash length.
+ const SBPrefix& PrefixAt(int index) const;
+ const SBFullHash& FullHashAt(int index) const;
+
+ // Return the prefix/full hash at the given index. Caller is expected to
+ // call the right function based on the hash length.
+ void SetPrefixAt(int index, const SBPrefix& prefix);
+ void SetFullHashAt(int index, const SBFullHash& full_hash);
+
+ private:
+ SBEntry();
+ ~SBEntry();
+
+ void set_prefix_count(int count) { data_.prefix_count = count; }
+ void set_type(Type type) { data_.type = type; }
+
+ // Container for a sub prefix.
+ struct SBSubPrefix {
+ int add_chunk;
+ SBPrefix prefix;
+ };
+
+ // Container for a sub full hash.
+ struct SBSubFullHash {
+ int add_chunk;
+ SBFullHash prefix;
+ };
+
+ // Keep the fixed data together in one struct so that we can get its size
+ // easily. If any of this is modified, the database will have to be cleared.
+ struct Data {
+ int list_id;
+ // For adds, this is the add chunk number.
+ // For subs: if prefix_count is 0 then this is the add chunk that this sub
+ // refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes
+ // or sub_full_hashes is used for each corresponding prefix.
+ int chunk_id;
+ Type type;
+ int prefix_count;
+ };
+
+ // The prefixes union must follow the fixed data so that they're contiguous
+ // in memory.
+ Data data_;
+ union {
+ SBPrefix add_prefixes_[1];
+ SBSubPrefix sub_prefixes_[1];
+ SBFullHash add_full_hashes_[1];
+ SBSubFullHash sub_full_hashes_[1];
+ };
+};
+
+
+// Holds the hostkey specific information in the database. This is basically a
+// collection of SBEntry objects.
+class SBHostInfo {
+ public:
+ SBHostInfo();
+ // By default, an empty SBHostInfo is created. Call this to deserialize from
+ // the database. Returns false if |data| is not internally consistent.
+ bool Initialize(const void* data, int size);
+
+ // Adds the given prefixes to the unsafe list. Note that the prefixes array
+ // might be modified internally.
+ void AddPrefixes(SBEntry* entry);
+
+ // Remove the given prefixes. If prefixes is empty, then all entries from
+ // sub.add_chunk_number are removed. Otherwise sub. add_chunk_id is ignored
+ // and the chunk_id from each element in sub.prefixes is checked. If persist
+ // is true and no matches are found, then the sub information will be stored
+ // and checked in case a future add comes in with that chunk_id.
+ void RemovePrefixes(SBEntry* entry, bool persist);
+
+ // Returns true if the host entry contains any of the prefixes. If a full
+ // hash matched, then list_id contains the list id. Otherwise list_id is -1
+ // and prefix_hits contains the matching prefixes if any are matched, or is
+ // empty if the entire host is blacklisted.
+ bool Contains(const std::vector<SBFullHash>& prefixes,
+ int* list_id,
+ std::vector<SBPrefix>* prefix_hits);
+
+ // Used for serialization.
+ const void* data() const { return data_.get(); }
+ const int size() const { return size_; }
+
+ private:
+ // Checks data_ for internal consistency.
+ bool IsValid();
+
+ // Allows enumeration of Entry structs. To start off, pass NULL for *entry,
+ // and then afterwards return the previous pointer.
+ bool GetNextEntry(const SBEntry** entry);
+
+ void Add(const SBEntry* entry);
+
+ void RemoveSubEntry(int list_id, int chunk_id);
+
+ // Collection of SBEntry objects.
+ scoped_array<char> data_;
+ int size_;
+};
+
+
+// Helper functions -----------------------------------------------------------
+
+namespace safe_browsing_util {
+
+void FreeChunks(std::deque<SBChunk>* chunks);
+
+// Given a URL, returns all the hosts we need to check. They are returned
+// in order of size (i.e. b.c is first, then a.b.c).
+void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
+
+// Given a URL, returns all the paths we need to check.
+void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
+
+// Given a URL, compare all the possible host + path full hashes to the set of
+// provided full hashes. Returns the index of the match if one is found, or -1
+// otherwise.
+int CompareFullHashes(const GURL& url,
+ const std::vector<SBFullHashResult>& full_hashes);
+
+bool IsPhishingList(const std::string& list_name);
+bool IsMalwareList(const std::string& list_name);
+
+// Returns 'true' if 'mac' can be verified using 'key' and 'data'.
+bool VerifyMAC(const std::string& key,
+ const std::string& mac,
+ const char* data,
+ int data_length);
+
+GURL GeneratePhishingReportUrl(const std::string& report_page,
+ const std::string& url_to_report);
+
+} // namespace safe_browsing_util
+
+#endif // CHROME_BROWSER_SAFE_BROWSING_UTIL_H__
diff --git a/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc
new file mode 100644
index 0000000..c67e554
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc
@@ -0,0 +1,260 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+#include "base/logging.h"
+#include "base/sha2.h"
+#include "chrome/browser/safe_browsing/safe_browsing_util.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+ bool VectorContains(const std::vector<std::string>& data,
+ const std::string& str) {
+ for (size_t i = 0; i < data.size(); ++i) {
+ if (data[i] == str)
+ return true;
+ }
+
+ return false;
+ }
+
+SBFullHash CreateFullHash(SBPrefix prefix) {
+ SBFullHash result;
+ memset(&result, 0, sizeof(result));
+ memcpy(&result, &prefix, sizeof(result));
+ return result;
+}
+}
+
+// Tests that we generate the required host/path combinations for testing
+// according to the Safe Browsing spec.
+// See section 6.2 in
+// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
+TEST(SafeBrowsingUtilTest, UrlParsing) {
+ std::vector<std::string> hosts, paths;
+
+ GURL url("http://a.b.c/1/2.html?param=1");
+ safe_browsing_util::GenerateHostsToCheck(url, &hosts);
+ safe_browsing_util::GeneratePathsToCheck(url, &paths);
+ EXPECT_EQ(hosts.size(), 2);
+ EXPECT_EQ(paths.size(), 4);
+ EXPECT_EQ(hosts[0], "b.c");
+ EXPECT_EQ(hosts[1], "a.b.c");
+
+ EXPECT_TRUE(VectorContains(paths, "/1/2.html?param=1"));
+ EXPECT_TRUE(VectorContains(paths, "/1/2.html"));
+ EXPECT_TRUE(VectorContains(paths, "/1/"));
+ EXPECT_TRUE(VectorContains(paths, "/"));
+
+ url = GURL("http://a.b.c.d.e.f.g/1.html");
+ safe_browsing_util::GenerateHostsToCheck(url, &hosts);
+ safe_browsing_util::GeneratePathsToCheck(url, &paths);
+ EXPECT_EQ(hosts.size(), 5);
+ EXPECT_EQ(paths.size(), 2);
+ EXPECT_EQ(hosts[0], "f.g");
+ EXPECT_EQ(hosts[1], "e.f.g");
+ EXPECT_EQ(hosts[2], "d.e.f.g");
+ EXPECT_EQ(hosts[3], "c.d.e.f.g");
+ EXPECT_EQ(hosts[4], "a.b.c.d.e.f.g");
+ EXPECT_TRUE(VectorContains(paths, "/1.html"));
+ EXPECT_TRUE(VectorContains(paths, "/"));
+
+ url = GURL("http://a.b/saw-cgi/eBayISAPI.dll/");
+ safe_browsing_util::GeneratePathsToCheck(url, &paths);
+ EXPECT_EQ(paths.size(), 3);
+ EXPECT_TRUE(VectorContains(paths, "/saw-cgi/eBayISAPI.dll/"));
+ EXPECT_TRUE(VectorContains(paths, "/saw-cgi/"));
+ EXPECT_TRUE(VectorContains(paths, "/"));
+}
+
+
+TEST(SafeBrowsingUtilTest, FullHashCompare) {
+ GURL url("http://www.evil.com/phish.html");
+ SBFullHashResult full_hash;
+ base::SHA256HashString(url.host() + url.path(),
+ &full_hash.hash,
+ sizeof(SBFullHash));
+ std::vector<SBFullHashResult> full_hashes;
+ full_hashes.push_back(full_hash);
+
+ EXPECT_EQ(safe_browsing_util::CompareFullHashes(url, full_hashes), 0);
+
+ url = GURL("http://www.evil.com/okay_path.html");
+ EXPECT_EQ(safe_browsing_util::CompareFullHashes(url, full_hashes), -1);
+}
+
+// Checks the reading/writing code of the database information for a hostkey.
+TEST(SafeBrowsing, HostInfo) {
+ // Test a simple case of adding a prefix from scratch.
+ SBEntry* entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1);
+ entry->SetPrefixAt(0, 0x01000000);
+ entry->set_list_id(1);
+ entry->set_chunk_id(1);
+
+ SBHostInfo info;
+ info.AddPrefixes(entry);
+ entry->Destroy();
+
+ int list_id;
+ std::vector<SBFullHash> full_hashes;
+ full_hashes.push_back(CreateFullHash(0x01000000));
+ std::vector<SBPrefix> prefix_hits;
+ EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ // Test appending prefixes to an existing entry.
+ entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2);
+ entry->SetPrefixAt(0, 0x02000000);
+ entry->SetPrefixAt(1, 0x02000001);
+ entry->set_list_id(1);
+ entry->set_chunk_id(2);
+ info.AddPrefixes(entry);
+ entry->Destroy();
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x01000000));
+ EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x02000000));
+ EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x02000001));
+ EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+
+ // Test removing the entire first entry.
+ entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0);
+ entry->set_list_id(1);
+ entry->set_chunk_id(1);
+ info.RemovePrefixes(entry, false);
+ entry->Destroy();
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x01000000));
+ EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x02000000));
+ EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x02000001));
+ EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ // Test removing one prefix from the second entry.
+ entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1);
+ entry->SetPrefixAt(0,0x02000000);
+ entry->SetChunkIdAtPrefix(0, 2);
+ entry->set_list_id(1);
+ info.RemovePrefixes(entry, false);
+ entry->Destroy();
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x02000000));
+ EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x02000001));
+ EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ // Test adding a sub that specifies a prefix before the add.
+ entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1);
+ entry->SetPrefixAt(0, 0x1000);
+ entry->SetChunkIdAtPrefix(0, 100);
+ entry->set_list_id(1);
+ info.RemovePrefixes(entry, true);
+ entry->Destroy();
+
+ // Make sure we don't get a match from a sub.
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x1000));
+ EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ // Now add the prefixes.
+ entry = SBEntry::Create(SBEntry::ADD_PREFIX, 3);
+ entry->SetPrefixAt(0, 0x10000);
+ entry->SetPrefixAt(1, 0x1000);
+ entry->SetPrefixAt(2, 0x100000);
+ entry->set_list_id(1);
+ entry->set_chunk_id(100);
+ info.AddPrefixes(entry);
+ entry->Destroy();
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x10000));
+ EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x1000));
+ EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x100000));
+ EXPECT_TRUE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ // Now try adding a sub that deletes all prefixes from the chunk.
+ entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0);
+ entry->set_list_id(1);
+ entry->set_chunk_id(100);
+ info.RemovePrefixes(entry, true);
+ entry->Destroy();
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x10000));
+ EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x100000));
+ EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits));
+
+ // Add a sub for all prefixes before the add comes.
+ entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0);
+ entry->set_list_id(1);
+ entry->set_chunk_id(200);
+ info.RemovePrefixes(entry, true);
+ entry->Destroy();
+
+ // Now add the prefixes.
+ entry = SBEntry::Create(SBEntry::ADD_PREFIX, 3);
+ entry->SetPrefixAt(0, 0x2000);
+ entry->SetPrefixAt(1, 0x20000);
+ entry->SetPrefixAt(2, 0x200000);
+ entry->set_list_id(1);
+ entry->set_chunk_id(200);
+ info.AddPrefixes(entry);
+ entry->Destroy();
+
+ // None of the prefixes should be found.
+ full_hashes.clear();
+ full_hashes.push_back(CreateFullHash(0x2000));
+ full_hashes.push_back(CreateFullHash(0x20000));
+ full_hashes.push_back(CreateFullHash(0x200000));
+ EXPECT_FALSE(info.Contains(full_hashes, &list_id, &prefix_hits));
+}