diff options
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist.cc | 93 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist.h | 43 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist_io.cc | 183 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist_io.h | 43 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist_io_unittest.cc | 47 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist_store.cc | 114 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist_store.h | 95 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist_unittest.cc | 80 | ||||
-rw-r--r-- | chrome/chrome.gyp | 5 | ||||
-rw-r--r-- | chrome/test/data/blacklist_small.pbl | 19 | ||||
-rw-r--r-- | chrome/test/data/blacklist_small.pbr | bin | 0 -> 262 bytes | |||
-rw-r--r-- | chrome/tools/pbl_tool/pbl_tool.cc | 49 |
12 files changed, 753 insertions, 18 deletions
diff --git a/chrome/browser/privacy_blacklist/blacklist.cc b/chrome/browser/privacy_blacklist/blacklist.cc index 8b575a8..e937ad2 100644 --- a/chrome/browser/privacy_blacklist/blacklist.cc +++ b/chrome/browser/privacy_blacklist/blacklist.cc @@ -7,19 +7,48 @@ #include <algorithm> #include <string> +#include "base/file_path.h" +#include "base/file_util.h" +#include "chrome/browser/privacy_blacklist/blacklist_store.h" +#include "net/http/http_util.h" + +#define STRINGIZE(s) #s + namespace { -bool matches(std::string pattern, std::string url) { +bool matches(const std::string& pattern, const std::string& url) { return url.find(pattern) != std::string::npos; } -} +const char* const cookie_headers[2] = { "cookie", "set-cookie" }; + +} // namespace // Value is not important, here just that the object has an address. const void* const Blacklist::kRequestDataKey = 0; +unsigned int Blacklist::String2Attribute(const std::string& s) { + if (s == STRINGIZE(kBlockAll)) + return kBlockAll; + else if (s == STRINGIZE(kDontSendCookies)) + return kDontSendCookies; + else if (s == STRINGIZE(kDontStoreCookies)) + return kDontStoreCookies; + else if (s == STRINGIZE(kDontPersistCookies)) + return kDontPersistCookies; + else if (s == STRINGIZE(kDontSendReferrer)) + return kDontSendReferrer; + else if (s == STRINGIZE(kDontSendUserAgent)) + return kDontSendUserAgent; + else if (s == STRINGIZE(kBlockByType)) + return kBlockByType; + else if (s == STRINGIZE(kBlockUnsecure)) + return kBlockUnsecure; + return 0; +} + bool Blacklist::Entry::MatchType(const std::string& type) const { - return std::find(types_->begin(), types_->end(), type) != types_->end(); + return std::find(types_.begin(), types_.end(), type) != types_.end(); } bool Blacklist::Entry::IsBlocked(const GURL& url) const { @@ -27,21 +56,68 @@ bool Blacklist::Entry::IsBlocked(const GURL& url) const { ((attributes_ & kBlockUnsecure) && !url.SchemeIsSecure()); } -Blacklist::Entry::Entry(const std::string& pattern, unsigned int attributes) - : pattern_(pattern), attributes_(attributes) {} +Blacklist::Entry::Entry(const std::string& pattern, const Provider* provider) + : pattern_(pattern), attributes_(0), provider_(provider) {} + +void Blacklist::Entry::AddAttributes(unsigned int attributes) { + attributes_ |= attributes; +} void Blacklist::Entry::AddType(const std::string& type) { - types_->push_back(type); + types_.push_back(type); +} + +void Blacklist::Entry::Merge(const Entry& entry) { + attributes_ |= entry.attributes_; + + std::copy(entry.types_.begin(), entry.types_.end(), + std::back_inserter(types_)); +} + +void Blacklist::Entry::SwapTypes(std::vector<std::string>* types) { + if (types && types->size()) { + types->swap(types_); + } } Blacklist::Blacklist(const FilePath& file) { - // TODO(idanan): Do something here. + // No blacklist, nothing to load. + if (file.value().empty()) + return; + + BlacklistStoreInput input(file_util::OpenFile(file, "rb")); + + // Read the providers + std::size_t n = input.ReadNumProviders(); + providers_.reserve(n); + std::string name; + std::string url; + for (std::size_t i = 0; i < n; ++i) { + input.ReadProvider(&name, &url); + providers_.push_back(new Provider(name.c_str(), url.c_str())); + } + + // Read the entries + n = input.ReadNumEntries(); + std::string pattern; + unsigned int attributes, provider; + std::vector<std::string> types; + for (unsigned int i = 0; i < n; ++i) { + input.ReadEntry(&pattern, &attributes, &types, &provider); + Entry* entry = new Entry(pattern, providers_[provider]); + entry->AddAttributes(attributes); + entry->SwapTypes(&types); + blacklist_.push_back(entry); + } } Blacklist::~Blacklist() { for (std::vector<Entry*>::iterator i = blacklist_.begin(); i != blacklist_.end(); ++i) delete *i; + for (std::vector<Provider*>::iterator i = providers_.begin(); + i != providers_.end(); ++i) + delete *i; } // Returns a pointer to the Blacklist-owned entry which matches the given @@ -55,8 +131,7 @@ const Blacklist::Entry* Blacklist::findMatch(const GURL& url) const { } std::string Blacklist::StripCookies(const std::string& header) { - // TODO(idanan): Implement this. - return header; + return net::HttpUtil::StripHeaders(header, cookie_headers, 2); } std::string Blacklist::StripCookieExpiry(const std::string& cookie) { diff --git a/chrome/browser/privacy_blacklist/blacklist.h b/chrome/browser/privacy_blacklist/blacklist.h index f3f49ea..9d2b2e6 100644 --- a/chrome/browser/privacy_blacklist/blacklist.h +++ b/chrome/browser/privacy_blacklist/blacklist.h @@ -9,7 +9,6 @@ #include <vector> #include "base/basictypes.h" -#include "base/scoped_ptr.h" #include "googleurl/src/gurl.h" #include "net/url_request/url_request.h" @@ -55,6 +54,23 @@ class Blacklist { // Key used to access data attached to URLRequest objects. static const void* const kRequestDataKey; + // Takes a string an returns the matching attribute, 0 if none matches. + static unsigned int String2Attribute(const std::string&); + + // Blacklist entries come from a provider, defined by a name and source URL. + class Provider { + public: + Provider() {} + Provider(const char* name, const char* url) : name_(name), url_(url) {} + const std::string& name() const { return name_; } + const std::string& url() const { return url_; } + void set_name(const std::string& name) { name_ = name; } + void set_url(const std::string& url) { url_ = url; } + private: + std::string name_; + std::string url_; + }; + // A single blacklist entry which is returned when a URL matches one of // the patterns. Entry objects are owned by the Blacklist that stores them. class Entry { @@ -65,6 +81,9 @@ class Blacklist { // Bitfield of filter-attributes matching the pattern. unsigned int attributes() const { return attributes_; } + // Provider of this blacklist entry, used for assigning blame ;) + const Provider* provider() const { return provider_; } + // Returns true if the given type matches one of the types for which // the filter-attributes of this pattern apply. This needs only to be // checked for content-type specific rules, as determined by calling @@ -76,14 +95,28 @@ class Blacklist { bool IsBlocked(const GURL&) const; private: - Entry(const std::string& pattern, unsigned int attributes); + // Construct with given pattern. + explicit Entry(const std::string& pattern, const Provider* provider); + + void AddAttributes(unsigned int attributes); void AddType(const std::string& type); + // Merge the attributes and types of the given entry with this one. + void Merge(const Entry& entry); + + // Swap the given vector content for the type vector for quick loading. + void SwapTypes(std::vector<std::string>* types); + std::string pattern_; unsigned int attributes_; - scoped_ptr< std::vector<std::string> > types_; + std::vector<std::string> types_; + + // Points to the provider of this entry, the providers are all + // owned by the blacklist. + const Provider* provider_; - friend class Blacklist; // Only Blacklist can create an entry. + friend class Blacklist; + friend class BlacklistIO; }; // When a request matches a Blacklist rule but the rule must be applied @@ -123,7 +156,9 @@ class Blacklist { private: std::vector<Entry*> blacklist_; + std::vector<Provider*> providers_; + FRIEND_TEST(BlacklistTest, Generic); DISALLOW_COPY_AND_ASSIGN(Blacklist); }; diff --git a/chrome/browser/privacy_blacklist/blacklist_io.cc b/chrome/browser/privacy_blacklist/blacklist_io.cc new file mode 100644 index 0000000..52ce0df --- /dev/null +++ b/chrome/browser/privacy_blacklist/blacklist_io.cc @@ -0,0 +1,183 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/privacy_blacklist/blacklist_io.h" + +#include <algorithm> +#include <string> + +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/string_util.h" +#include "base/string_tokenizer.h" +#include "chrome/browser/privacy_blacklist/blacklist_store.h" + +namespace { + +const char header[] = "[Chromium::PrivacyBlacklist]"; +const char name_tag[] = "Name:"; +const char url_tag[] = "URL:"; +const char arrow_tag[] = "=>"; + +class IsWhiteSpace { + public: + bool operator()(const char& c) const { + return IsAsciiWhitespace(c); + } +}; + +class IsNotWhiteSpace { + public: + bool operator()(const char& c) const { + return !IsAsciiWhitespace(c); + } +}; + +bool StartsWith(const char* cur, const char* end, + const char* tag, std::size_t size) { + return cur+size <= end && std::equal(tag, tag+size-1, cur); +} + +} // namespace + +BlacklistIO::BlacklistIO() {} + +BlacklistIO::~BlacklistIO() { + for (std::list<Blacklist::Entry*>::iterator i = blacklist_.begin(); + i != blacklist_.end(); ++i) { + delete *i; + } + for (std::list<Blacklist::Provider*>::iterator i = providers_.begin(); + i != providers_.end(); ++i) { + delete *i; + } +} + +bool BlacklistIO::Read(const FilePath& file) { + // Memory map for efficient parsing. If the file cannot fit in available + // memory it would be the least of our worries. Typical blacklist files + // are less than 200K. + file_util::MemoryMappedFile input; + if (!input.Initialize(file) || !input.data()) + return false; + + const char* cur = reinterpret_cast<const char*>(input.data()); + const char* end = cur + input.length(); + + // Check header. + if (!StartsWith(cur, end, header, arraysize(header))) + return false; + + Blacklist::Provider* provider = new Blacklist::Provider; + providers_.push_back(provider); + + cur = std::find(cur, end, '\n') + 1; // Skip past EOL. + + // Each loop iteration takes care of one input line. + while (cur < end) { + // Skip whitespace at beginning of line. + cur = std::find_if(cur, end, IsNotWhiteSpace()); + if (cur == end) + break; + + if (*cur == '#') { + cur = std::find(cur, end, '\n') + 1; + continue; + } + + if (*cur == '|') { + ++cur; + if (StartsWith(cur, end, name_tag, arraysize(name_tag))) { + // Edge condition: if the find below fails, the next one will too, + // so we'll just skip to the EOF below. + cur = std::find_if(cur+arraysize(name_tag), end, IsNotWhiteSpace()); + const char* skip = std::find_if(cur, end, IsWhiteSpace()); + if (skip < end) + provider->set_name(std::string(cur, skip)); + } else if (StartsWith(cur, end, url_tag, arraysize(url_tag))) { + cur = std::find_if(cur+arraysize(url_tag), end, IsNotWhiteSpace()); + const char* skip = std::find_if(cur, end, IsWhiteSpace()); + if (skip < end) + provider->set_url(std::string(cur, skip)); + } + cur = std::find(cur, end, '\n') + 1; + continue; + } + + const char* skip = std::find_if(cur, end, IsWhiteSpace()); + std::string pattern(cur, skip); + + cur = std::find_if(cur+pattern.size(), end, IsNotWhiteSpace()); + if (!StartsWith(cur, end, arrow_tag, arraysize(arrow_tag))) + return false; + + scoped_ptr<Blacklist::Entry> entry(new Blacklist::Entry(pattern, provider)); + + cur = std::find_if(cur+arraysize(arrow_tag), end, IsNotWhiteSpace()); + skip = std::find(cur, end, '\n'); + std::string buf(cur, skip); + cur = skip + 1; + + StringTokenizer tokenier(buf, " (),"); + tokenier.set_options(StringTokenizer::RETURN_DELIMS); + + bool in_attribute = false; + unsigned int last_attribute = 0; + + while (tokenier.GetNext()) { + if (tokenier.token_is_delim()) { + switch (*tokenier.token_begin()) { + case '(': + if (in_attribute) return false; + in_attribute = true; + continue; + case ')': + if (!in_attribute) return false; + in_attribute = false; + continue; + default: + // No state change for other delimiters. + continue; + } + } + + if (in_attribute) { + // The only attribute to support sub_tokens is kBlockByType, for now. + if (last_attribute == Blacklist::kBlockByType) + entry->AddType(tokenier.token()); + } else { + // Filter attribute. Unrecognized attributes are ignored. + last_attribute = Blacklist::String2Attribute(tokenier.token()); + entry->AddAttributes(last_attribute); + } + } + blacklist_.push_back(entry.release()); + } + return true; +} + +bool BlacklistIO::Write(const FilePath& file) { + BlacklistStoreOutput output(file_util::OpenFile(file, "wb")); + + // Output providers, give each one an index. + std::map<const Blacklist::Provider*, uint32> index; + uint32 current = 0; + output.ReserveProviders(providers_.size()); + for (std::list<Blacklist::Provider*>::const_iterator i = providers_.begin(); + i != providers_.end(); ++i, ++current) { + output.StoreProvider((*i)->name(), (*i)->url()); + index[*i] = current; + } + + // Output entries, replacing the provider with its index. + output.ReserveEntries(blacklist_.size()); + for (std::list<Blacklist::Entry*>::const_iterator i = blacklist_.begin(); + i != blacklist_.end(); ++i) { + output.StoreEntry((*i)->pattern_, + (*i)->attributes_, + (*i)->types_, + index[(*i)->provider_]); + } + return true; +} diff --git a/chrome/browser/privacy_blacklist/blacklist_io.h b/chrome/browser/privacy_blacklist/blacklist_io.h new file mode 100644 index 0000000..fd19ae6 --- /dev/null +++ b/chrome/browser/privacy_blacklist/blacklist_io.h @@ -0,0 +1,43 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_PRIVACY_BLACKLIST_BLACKLIST_IO_H_ +#define CHROME_BROWSER_PRIVACY_BLACKLIST_BLACKLIST_IO_H_ + +#include <list> + +#include "chrome/browser/privacy_blacklist/blacklist.h" + +class FilePath; + +// Helper class to keep state while reading multiple text blacklists to +// produce a single binary blacklist used by the Blacklist constructor. +class BlacklistIO { + public: + BlacklistIO(); + ~BlacklistIO(); + + // Reads a text blacklist, as downloaded from the blacklist provider. + bool Read(const FilePath& path); + + // Writes a binary blacklist with aggregated entries for all read blacklists. + bool Write(const FilePath& path); + + private: + // Introspection functions, for testing purposes. + const std::list<Blacklist::Entry*>& blacklist() const { + return blacklist_; + } + const std::list<Blacklist::Provider*>& providers() const { + return providers_; + } + + std::list<Blacklist::Entry*> blacklist_; + std::list<Blacklist::Provider*> providers_; + + FRIEND_TEST(BlacklistIOTest, Generic); + DISALLOW_COPY_AND_ASSIGN(BlacklistIO); +}; + +#endif // CHROME_BROWSER_PRIVACY_BLACKLIST_BLACKLIST_IO_H_ diff --git a/chrome/browser/privacy_blacklist/blacklist_io_unittest.cc b/chrome/browser/privacy_blacklist/blacklist_io_unittest.cc new file mode 100644 index 0000000..6899321 --- /dev/null +++ b/chrome/browser/privacy_blacklist/blacklist_io_unittest.cc @@ -0,0 +1,47 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/privacy_blacklist/blacklist_io.h" + +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/string_util.h" +#include "chrome/common/chrome_paths.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(BlacklistIOTest, Generic) { + // Testing data path. + std::wstring data_dir; + PathService::Get(chrome::DIR_TEST_DATA, &data_dir); + + std::wstring input(data_dir); + file_util::AppendToPath(&input, L"blacklist_small.pbl"); + + std::wstring expected(data_dir); + file_util::AppendToPath(&expected, L"blacklist_small.pbr"); + + BlacklistIO io; + EXPECT_TRUE(io.Read(FilePath::FromWStringHack(input))); + const std::list<Blacklist::Entry*>& blacklist = io.blacklist(); + EXPECT_EQ(5U, blacklist.size()); + + std::list<Blacklist::Entry*>::const_iterator i = blacklist.begin(); + EXPECT_EQ("@", (*i++)->pattern()); + EXPECT_EQ("@poor-security-site.com", (*i++)->pattern()); + EXPECT_EQ("@.ad-serving-place.com", (*i++)->pattern()); + EXPECT_EQ("www.site.com/anonymous/folder/@", (*i++)->pattern()); + EXPECT_EQ("www.site.com/bad/url", (*i++)->pattern()); + + EXPECT_EQ(1U, io.providers().size()); + EXPECT_EQ("Sample", io.providers().front()->name()); + EXPECT_EQ("http://www.google.com", io.providers().front()->url()); + + std::wstring output; + PathService::Get(base::DIR_TEMP, &output); + file_util::AppendToPath(&output, L"blacklist_small.pbr"); + CHECK(io.Write(FilePath::FromWStringHack(output))); + EXPECT_TRUE(file_util::ContentsEqual(output, expected)); + EXPECT_TRUE(file_util::Delete(output, false)); +} diff --git a/chrome/browser/privacy_blacklist/blacklist_store.cc b/chrome/browser/privacy_blacklist/blacklist_store.cc new file mode 100644 index 0000000..88ce48b --- /dev/null +++ b/chrome/browser/privacy_blacklist/blacklist_store.cc @@ -0,0 +1,114 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/privacy_blacklist/blacklist_store.h" + +#include <cstdio> + +#include "base/basictypes.h" +#include "base/file_util.h" + +namespace { + +const char cookie[] = "GCPBL100"; + +} + +void BlacklistStoreOutput::WriteUInt(uint32 i) { + fwrite(reinterpret_cast<char*>(&i), 1, sizeof(uint32), file_); +} + +void BlacklistStoreOutput::WriteString(const std::string& s) { + uint32 n = s.size(); + fwrite(reinterpret_cast<char*>(&n), 1, sizeof(uint32), file_); + fwrite(s.c_str(), 1, s.size(), file_); +} + +BlacklistStoreOutput::BlacklistStoreOutput(FILE* file) : file_(file) { + fwrite(cookie, 1, sizeof(cookie), file_); +} + +BlacklistStoreOutput::~BlacklistStoreOutput() { + file_util::CloseFile(file_); +} + +void BlacklistStoreOutput::ReserveProviders(uint32 num) { + WriteUInt(num); +} + +void BlacklistStoreOutput::StoreProvider(const std::string& name, + const std::string& url) { + WriteString(name); + WriteString(url); +} + +void BlacklistStoreOutput::ReserveEntries(uint32 num) { + WriteUInt(num); +} + +void BlacklistStoreOutput::StoreEntry(const std::string& pattern, + uint32 attributes, + const std::vector<std::string>& types, + uint32 provider) { + WriteString(pattern); + WriteUInt(attributes); + WriteUInt(types.size()); + for (uint32 i = 0; i < types.size(); ++i) + WriteString(types[i]); + WriteUInt(provider); +} + +uint32 BlacklistStoreInput::ReadUInt() { + char buf[sizeof(uint32)]; + fread(buf, 1, sizeof(uint32), file_); + return *reinterpret_cast<uint32*>(buf); +} + +std::string BlacklistStoreInput::ReadString() { + uint32 size = ReadUInt(); + + // Too long strings are not allowed. + if (size > 8192) { + return std::string(); + } + + char buf[8192]; + fread(buf, 1, size, file_); + return std::string(buf, size); +} + +BlacklistStoreInput::BlacklistStoreInput(FILE* file) : file_(file) { + char buf[sizeof(cookie)]; + fread(buf, 1, sizeof(cookie), file_); +} + +BlacklistStoreInput::~BlacklistStoreInput() { + file_util::CloseFile(file_); +} + +uint32 BlacklistStoreInput::ReadNumProviders() { + return ReadUInt(); +} + +void BlacklistStoreInput::ReadProvider(std::string* name, std::string* url) { + *name = ReadString(); + *url = ReadString(); +} + +uint32 BlacklistStoreInput::ReadNumEntries() { + return ReadUInt(); +} + +void BlacklistStoreInput::ReadEntry(std::string* pattern, + uint32* attributes, + std::vector<std::string>* types, + uint32* provider) { + *pattern = ReadString(); + *attributes = ReadUInt(); + if (uint32 n = ReadUInt()) { + while (n--) + types->push_back(ReadString()); + } + *provider = ReadUInt(); +} diff --git a/chrome/browser/privacy_blacklist/blacklist_store.h b/chrome/browser/privacy_blacklist/blacklist_store.h new file mode 100644 index 0000000..8ea96f6 --- /dev/null +++ b/chrome/browser/privacy_blacklist/blacklist_store.h @@ -0,0 +1,95 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_PRIVACY_BLACKLIST_BLACKLIST_DB_H_ +#define CHROME_BROWSER_PRIVACY_BLACKLIST_BLACKLIST_DB_H_ + +#include <cstdio> +#include <string> +#include <vector> + +#include "base/basictypes.h" + +class FilePath; + +// TODO(idanan): Error handling needed. I/O errors can always happen! + +//////////////////////////////////////////////////////////////////////////////// +// +// Blacklist Binary Storage Output Class +// +// Stores aggregate Privacy Blacklists efficiently on disk. The public +// functions below must be called in the order they are declared, as +// the input class is expected to read them in that order. The provider +// and entry output functions must be called the number of times set. +// +//////////////////////////////////////////////////////////////////////////////// +class BlacklistStoreOutput { + public: + explicit BlacklistStoreOutput(FILE* file); + ~BlacklistStoreOutput(); + + // Sets the number of providers stored. + void ReserveProviders(uint32); + + // Stores a provider. + void StoreProvider(const std::string& name, const std::string& url); + + // Sets the number of entries stored. + void ReserveEntries(uint32); + + // Stores an entry. + void StoreEntry(const std::string& pattern, + uint32 attributes, + const std::vector<std::string>& types, + uint32 provider); + + private: + // Writes basic types to the stream. + void WriteUInt(uint32); + void WriteString(const std::string&); + + FILE* file_; + DISALLOW_COPY_AND_ASSIGN(BlacklistStoreOutput); +}; + +//////////////////////////////////////////////////////////////////////////////// +// +// Blacklist Binary Storage Input Class +// +// Stores aggregate Privacy Blacklists efficiently on disk. The public +// functions below must be called in the order they are declared, as +// the output class is expected to write them in that order. The provider +// entries read functions must be called the correct number of times. +// +//////////////////////////////////////////////////////////////////////////////// +class BlacklistStoreInput { + public: + explicit BlacklistStoreInput(FILE* file); + ~BlacklistStoreInput(); + + // Reads the number of providers. + uint32 ReadNumProviders(); + + // Reads a provider. + void ReadProvider(std::string* name, std::string* url); + + // Reads the number of entries. + uint32 ReadNumEntries(); + + // Reads an entry. + void ReadEntry(std::string* pattern, + uint32* attributes, + std::vector<std::string>* types, + uint32* provider); + + private: + uint32 ReadUInt(); + std::string ReadString(); + + FILE* file_; + DISALLOW_COPY_AND_ASSIGN(BlacklistStoreInput); +}; + +#endif diff --git a/chrome/browser/privacy_blacklist/blacklist_unittest.cc b/chrome/browser/privacy_blacklist/blacklist_unittest.cc index ad363ad..4df5f7c 100644 --- a/chrome/browser/privacy_blacklist/blacklist_unittest.cc +++ b/chrome/browser/privacy_blacklist/blacklist_unittest.cc @@ -3,27 +3,105 @@ // found in the LICENSE file. #include "chrome/browser/privacy_blacklist/blacklist.h" + #include "base/file_path.h" +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/string_util.h" +#include "chrome/common/chrome_paths.h" #include "testing/gtest/include/gtest/gtest.h" TEST(BlacklistTest, Generic) { - FilePath path; + // Get path relative to test data dir. + std::wstring input; + PathService::Get(chrome::DIR_TEST_DATA, &input); + file_util::AppendToPath(&input, L"blacklist_small.pbr"); + + FilePath path = FilePath::FromWStringHack(input); Blacklist blacklist(path); + // This test is a friend, so inspect the internal structures. + EXPECT_EQ(5U, blacklist.blacklist_.size()); + std::vector<Blacklist::Entry*>::const_iterator i = + blacklist.blacklist_.begin(); + + EXPECT_EQ(Blacklist::kBlockByType|Blacklist::kDontPersistCookies, + (*i)->attributes()); + EXPECT_TRUE((*i)->MatchType("application/x-shockwave-flash")); + EXPECT_FALSE((*i)->MatchType("image/jpeg")); + EXPECT_EQ("@", (*i++)->pattern()); + + // All entries include global attributes. + // NOTE: Silly bitwise-or with zero to workaround a Mac compiler bug. + EXPECT_EQ(Blacklist::kBlockUnsecure|0, (*i)->attributes()); + EXPECT_FALSE((*i)->MatchType("application/x-shockwave-flash")); + EXPECT_FALSE((*i)->MatchType("image/jpeg")); + EXPECT_EQ("@poor-security-site.com", (*i++)->pattern()); + + EXPECT_EQ(Blacklist::kDontSendCookies|Blacklist::kDontStoreCookies, + (*i)->attributes()); + EXPECT_FALSE((*i)->MatchType("application/x-shockwave-flash")); + EXPECT_FALSE((*i)->MatchType("image/jpeg")); + EXPECT_EQ("@.ad-serving-place.com", (*i++)->pattern()); + + EXPECT_EQ(Blacklist::kDontSendUserAgent|Blacklist::kDontSendReferrer, + (*i)->attributes()); + EXPECT_FALSE((*i)->MatchType("application/x-shockwave-flash")); + EXPECT_FALSE((*i)->MatchType("image/jpeg")); + EXPECT_EQ("www.site.com/anonymous/folder/@", (*i++)->pattern()); + + // NOTE: Silly bitwise-or with zero to workaround a Mac compiler bug. + EXPECT_EQ(Blacklist::kBlockAll|0, (*i)->attributes()); + EXPECT_FALSE((*i)->MatchType("application/x-shockwave-flash")); + EXPECT_FALSE((*i)->MatchType("image/jpeg")); + EXPECT_EQ("www.site.com/bad/url", (*i++)->pattern()); + + EXPECT_EQ(1U, blacklist.providers_.size()); + EXPECT_EQ("Sample", blacklist.providers_.front()->name()); + EXPECT_EQ("http://www.google.com", blacklist.providers_.front()->url()); + // Empty blacklist should not match any URL. EXPECT_FALSE(blacklist.findMatch(GURL())); EXPECT_FALSE(blacklist.findMatch(GURL("http://www.google.com"))); + // StripCookieExpiry Tests std::string cookie1( "PREF=ID=14a549990453e42a:TM=1245183232:LM=1245183232:S=Occ7khRVIEE36Ao5;" " expires=Thu, 16-Jun-2011 20:13:52 GMT; path=/; domain=.google.com"); std::string cookie2( "PREF=ID=14a549990453e42a:TM=1245183232:LM=1245183232:S=Occ7khRVIEE36Ao5;" " path=/; domain=.google.com"); + std::string cookie3( + "PREF=ID=14a549990453e42a:TM=1245183232:LM=1245183232:S=Occ7khRVIEE36Ao5;" + " expires=Thu, 17-Jun-2011 02:13:52 GMT; path=/; domain=.google.com"); // No expiry, should be equal to itself after stripping. EXPECT_TRUE(cookie2 == Blacklist::StripCookieExpiry(cookie2)); // Expiry, should be equal to non-expiry version after stripping. EXPECT_TRUE(cookie2 == Blacklist::StripCookieExpiry(cookie1)); + + // Edge cases. + EXPECT_TRUE(std::string() == Blacklist::StripCookieExpiry(std::string())); + EXPECT_TRUE(Blacklist::StripCookieExpiry(cookie2) == + Blacklist::StripCookieExpiry(cookie3)); + + // StripCookies Test. Note that "\r\n" line terminators are used + // because the underlying net util uniformizes those when stripping + // headers. + std::string header1("Host: www.example.com\r\n"); + std::string header2("Upgrade: TLS/1.0, HTTP/1.1\r\n" + "Connection: Upgrade\r\n"); + std::string header3("Date: Mon, 12 Mar 2001 19:20:33 GMT\r\n" + "Expires: Mon, 12 Mar 2001 19:20:33 GMT\r\n" + "Content-Type: text/html\r\n" + "Set-Cookie: B=460soc0taq8c1&b=2; " + "expires=Thu, 15 Apr 2010 20:00:00 GMT; path=/;\r\n"); + std::string header4("Date: Mon, 12 Mar 2001 19:20:33 GMT\r\n" + "Expires: Mon, 12 Mar 2001 19:20:33 GMT\r\n" + "Content-Type: text/html\r\n"); + + EXPECT_TRUE(header1 == Blacklist::StripCookies(header1)); + EXPECT_TRUE(header2 == Blacklist::StripCookies(header2)); + EXPECT_TRUE(header4 == Blacklist::StripCookies(header3)); } diff --git a/chrome/chrome.gyp b/chrome/chrome.gyp index 3731471..623d25e 100644 --- a/chrome/chrome.gyp +++ b/chrome/chrome.gyp @@ -1312,6 +1312,10 @@ 'browser/printing/printer_query.h', 'browser/privacy_blacklist/blacklist.h', 'browser/privacy_blacklist/blacklist.cc', + 'browser/privacy_blacklist/blacklist_io.h', + 'browser/privacy_blacklist/blacklist_io.cc', + 'browser/privacy_blacklist/blacklist_store.h', + 'browser/privacy_blacklist/blacklist_store.cc', 'browser/process_singleton.h', 'browser/process_singleton_linux.cc', 'browser/process_singleton_mac.cc', @@ -3680,6 +3684,7 @@ 'browser/password_manager/password_store_mac_unittest.cc', 'browser/printing/print_job_unittest.cc', 'browser/privacy_blacklist/blacklist_unittest.cc', + 'browser/privacy_blacklist/blacklist_io_unittest.cc', 'browser/profile_manager_unittest.cc', 'browser/renderer_host/audio_renderer_host_unittest.cc', 'browser/renderer_host/file_system_accessor_unittest.cc', diff --git a/chrome/test/data/blacklist_small.pbl b/chrome/test/data/blacklist_small.pbl new file mode 100644 index 0000000..c753231 --- /dev/null +++ b/chrome/test/data/blacklist_small.pbl @@ -0,0 +1,19 @@ +[Chromium::PrivacyBlacklist] +|Name: Sample +|URL: http://www.google.com Text here is ignored +|Icon: Unsupported feature !!!!! + +# Default match attributes (matches everything) +@ => kBlockByType(application/x-shockwave-flash), kDontPersistCookies + +# Affect an entire site +@poor-security-site.com => kBlockUnsecure + +# Affect subdomains of an entire site +@.ad-serving-place.com => kDontSendCookies, kDontStoreCookies + +# Affect site files under a subfolder +www.site.com/anonymous/folder/@ => kDontSendUserAgent, kDontSendReferrer + +# Affect a specific URL +www.site.com/bad/url => kBlockAll diff --git a/chrome/test/data/blacklist_small.pbr b/chrome/test/data/blacklist_small.pbr Binary files differnew file mode 100644 index 0000000..c3f8571 --- /dev/null +++ b/chrome/test/data/blacklist_small.pbr diff --git a/chrome/tools/pbl_tool/pbl_tool.cc b/chrome/tools/pbl_tool/pbl_tool.cc index 6c0cdde..355c7ea 100644 --- a/chrome/tools/pbl_tool/pbl_tool.cc +++ b/chrome/tools/pbl_tool/pbl_tool.cc @@ -3,14 +3,55 @@ // found in the LICENSE file. // This tool manages privacy blacklists. Primarily for loading a text -// blacklist into the binary agregate blacklist. +// blacklist into the binary aggregate blacklist. #include <iostream> #include "base/process_util.h" +#include "base/string_util.h" #include "chrome/browser/privacy_blacklist/blacklist.h" +#include "chrome/browser/privacy_blacklist/blacklist_io.h" -int main(int argc, char* argv[]) { +#ifdef OS_POSIX +#define ICHAR char +#define ICERR std::cerr +#define IMAIN main +#else +#define ICHAR wchar_t +#define ICERR std::wcerr +#define IMAIN wmain +#endif + +namespace { + +int PrintUsage(int argc, ICHAR* argv[]) { + ICERR << "Usage: " << argv[0] << " <source> <target>\n" + " <source> is the text blacklist (.pbl) to load.\n" + " <target> is the binary output blacklist repository.\n\n" + "Adds all entries from <source> to <target>.\n" + "Creates <target> if it does not exist.\n"; + return 1; +} + +} + +int IMAIN(int argc, ICHAR* argv[]) { base::EnableTerminationOnHeapCorruption(); - std::cout << "Aw, Snap! This is not implemented yet." << std::endl; - CHECK(std::string() == Blacklist::StripCookies(std::string())); + + if (argc < 3) + return PrintUsage(argc, argv); + + FilePath input(argv[1]); + FilePath output(argv[2]); + + BlacklistIO io; + if (io.Read(input)) { + if (io.Write(output)) { + return 0; + } else { + ICERR << "Error writing output file " << argv[2] << "\n"; + } + } else { + ICERR << "Error reading input file " << argv[1] << "\n"; + } + return -1; } |