diff options
author | idanan@chromium.org <idanan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-07-15 17:05:49 +0000 |
---|---|---|
committer | idanan@chromium.org <idanan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-07-15 17:05:49 +0000 |
commit | 8d090ca8c25795d526761bcae9fb301b51b2ee43 (patch) | |
tree | 75eba8a5fe8e8f6e82dea3974faec5f9bf57f079 | |
parent | e5bf919de97cab42aa719a13a939487cf9f18242 (diff) | |
download | chromium_src-8d090ca8c25795d526761bcae9fb301b51b2ee43.zip chromium_src-8d090ca8c25795d526761bcae9fb301b51b2ee43.tar.gz chromium_src-8d090ca8c25795d526761bcae9fb301b51b2ee43.tar.bz2 |
Privacy Blacklist IOImplemented parsing of input (text) blacklists. This is the format in which we will download privacy blacklist.Implemented storing and reading of aggregated blacklists in a binary format. This is the repository which Chromium will read on startup which is more efficient to load than the downloaded blacklists.Added concept of providers to blacklist entries so that we can determine the source of a pattern match, for future display in the UI.Unit tests added for the new IO classes. Privacy Blacklist conversion tool implemented to a single text blacklist into the binary format. Still needs options for storing multiple blacklists (TODO).
BUG=none
TEST=Blacklist*
Review URL: http://codereview.chromium.org/155298
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@20734 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist.cc | 93 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist.h | 43 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist_io.cc | 183 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist_io.h | 43 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist_io_unittest.cc | 47 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist_store.cc | 114 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist_store.h | 95 | ||||
-rw-r--r-- | chrome/browser/privacy_blacklist/blacklist_unittest.cc | 80 | ||||
-rw-r--r-- | chrome/chrome.gyp | 5 | ||||
-rw-r--r-- | chrome/test/data/blacklist_small.pbl | 19 | ||||
-rw-r--r-- | chrome/test/data/blacklist_small.pbr | bin | 0 -> 262 bytes | |||
-rw-r--r-- | chrome/tools/pbl_tool/pbl_tool.cc | 49 |
12 files changed, 753 insertions, 18 deletions
diff --git a/chrome/browser/privacy_blacklist/blacklist.cc b/chrome/browser/privacy_blacklist/blacklist.cc index 8b575a8..e937ad2 100644 --- a/chrome/browser/privacy_blacklist/blacklist.cc +++ b/chrome/browser/privacy_blacklist/blacklist.cc @@ -7,19 +7,48 @@ #include <algorithm> #include <string> +#include "base/file_path.h" +#include "base/file_util.h" +#include "chrome/browser/privacy_blacklist/blacklist_store.h" +#include "net/http/http_util.h" + +#define STRINGIZE(s) #s + namespace { -bool matches(std::string pattern, std::string url) { +bool matches(const std::string& pattern, const std::string& url) { return url.find(pattern) != std::string::npos; } -} +const char* const cookie_headers[2] = { "cookie", "set-cookie" }; + +} // namespace // Value is not important, here just that the object has an address. const void* const Blacklist::kRequestDataKey = 0; +unsigned int Blacklist::String2Attribute(const std::string& s) { + if (s == STRINGIZE(kBlockAll)) + return kBlockAll; + else if (s == STRINGIZE(kDontSendCookies)) + return kDontSendCookies; + else if (s == STRINGIZE(kDontStoreCookies)) + return kDontStoreCookies; + else if (s == STRINGIZE(kDontPersistCookies)) + return kDontPersistCookies; + else if (s == STRINGIZE(kDontSendReferrer)) + return kDontSendReferrer; + else if (s == STRINGIZE(kDontSendUserAgent)) + return kDontSendUserAgent; + else if (s == STRINGIZE(kBlockByType)) + return kBlockByType; + else if (s == STRINGIZE(kBlockUnsecure)) + return kBlockUnsecure; + return 0; +} + bool Blacklist::Entry::MatchType(const std::string& type) const { - return std::find(types_->begin(), types_->end(), type) != types_->end(); + return std::find(types_.begin(), types_.end(), type) != types_.end(); } bool Blacklist::Entry::IsBlocked(const GURL& url) const { @@ -27,21 +56,68 @@ bool Blacklist::Entry::IsBlocked(const GURL& url) const { ((attributes_ & kBlockUnsecure) && !url.SchemeIsSecure()); } -Blacklist::Entry::Entry(const std::string& pattern, unsigned int attributes) - : pattern_(pattern), attributes_(attributes) {} +Blacklist::Entry::Entry(const std::string& pattern, const Provider* provider) + : pattern_(pattern), attributes_(0), provider_(provider) {} + +void Blacklist::Entry::AddAttributes(unsigned int attributes) { + attributes_ |= attributes; +} void Blacklist::Entry::AddType(const std::string& type) { - types_->push_back(type); + types_.push_back(type); +} + +void Blacklist::Entry::Merge(const Entry& entry) { + attributes_ |= entry.attributes_; + + std::copy(entry.types_.begin(), entry.types_.end(), + std::back_inserter(types_)); +} + +void Blacklist::Entry::SwapTypes(std::vector<std::string>* types) { + if (types && types->size()) { + types->swap(types_); + } } Blacklist::Blacklist(const FilePath& file) { - // TODO(idanan): Do something here. + // No blacklist, nothing to load. + if (file.value().empty()) + return; + + BlacklistStoreInput input(file_util::OpenFile(file, "rb")); + + // Read the providers + std::size_t n = input.ReadNumProviders(); + providers_.reserve(n); + std::string name; + std::string url; + for (std::size_t i = 0; i < n; ++i) { + input.ReadProvider(&name, &url); + providers_.push_back(new Provider(name.c_str(), url.c_str())); + } + + // Read the entries + n = input.ReadNumEntries(); + std::string pattern; + unsigned int attributes, provider; + std::vector<std::string> types; + for (unsigned int i = 0; i < n; ++i) { + input.ReadEntry(&pattern, &attributes, &types, &provider); + Entry* entry = new Entry(pattern, providers_[provider]); + entry->AddAttributes(attributes); + entry->SwapTypes(&types); + blacklist_.push_back(entry); + } } Blacklist::~Blacklist() { for (std::vector<Entry*>::iterator i = blacklist_.begin(); i != blacklist_.end(); ++i) delete *i; + for (std::vector<Provider*>::iterator i = providers_.begin(); + i != providers_.end(); ++i) + delete *i; } // Returns a pointer to the Blacklist-owned entry which matches the given @@ -55,8 +131,7 @@ const Blacklist::Entry* Blacklist::findMatch(const GURL& url) const { } std::string Blacklist::StripCookies(const std::string& header) { - // TODO(idanan): Implement this. - return header; + return net::HttpUtil::StripHeaders(header, cookie_headers, 2); } std::string Blacklist::StripCookieExpiry(const std::string& cookie) { diff --git a/chrome/browser/privacy_blacklist/blacklist.h b/chrome/browser/privacy_blacklist/blacklist.h index f3f49ea..9d2b2e6 100644 --- a/chrome/browser/privacy_blacklist/blacklist.h +++ b/chrome/browser/privacy_blacklist/blacklist.h @@ -9,7 +9,6 @@ #include <vector> #include "base/basictypes.h" -#include "base/scoped_ptr.h" #include "googleurl/src/gurl.h" #include "net/url_request/url_request.h" @@ -55,6 +54,23 @@ class Blacklist { // Key used to access data attached to URLRequest objects. static const void* const kRequestDataKey; + // Takes a string an returns the matching attribute, 0 if none matches. + static unsigned int String2Attribute(const std::string&); + + // Blacklist entries come from a provider, defined by a name and source URL. + class Provider { + public: + Provider() {} + Provider(const char* name, const char* url) : name_(name), url_(url) {} + const std::string& name() const { return name_; } + const std::string& url() const { return url_; } + void set_name(const std::string& name) { name_ = name; } + void set_url(const std::string& url) { url_ = url; } + private: + std::string name_; + std::string url_; + }; + // A single blacklist entry which is returned when a URL matches one of // the patterns. Entry objects are owned by the Blacklist that stores them. class Entry { @@ -65,6 +81,9 @@ class Blacklist { // Bitfield of filter-attributes matching the pattern. unsigned int attributes() const { return attributes_; } + // Provider of this blacklist entry, used for assigning blame ;) + const Provider* provider() const { return provider_; } + // Returns true if the given type matches one of the types for which // the filter-attributes of this pattern apply. This needs only to be // checked for content-type specific rules, as determined by calling @@ -76,14 +95,28 @@ class Blacklist { bool IsBlocked(const GURL&) const; private: - Entry(const std::string& pattern, unsigned int attributes); + // Construct with given pattern. + explicit Entry(const std::string& pattern, const Provider* provider); + + void AddAttributes(unsigned int attributes); void AddType(const std::string& type); + // Merge the attributes and types of the given entry with this one. + void Merge(const Entry& entry); + + // Swap the given vector content for the type vector for quick loading. + void SwapTypes(std::vector<std::string>* types); + std::string pattern_; unsigned int attributes_; - scoped_ptr< std::vector<std::string> > types_; + std::vector<std::string> types_; + + // Points to the provider of this entry, the providers are all + // owned by the blacklist. + const Provider* provider_; - friend class Blacklist; // Only Blacklist can create an entry. + friend class Blacklist; + friend class BlacklistIO; }; // When a request matches a Blacklist rule but the rule must be applied @@ -123,7 +156,9 @@ class Blacklist { private: std::vector<Entry*> blacklist_; + std::vector<Provider*> providers_; + FRIEND_TEST(BlacklistTest, Generic); DISALLOW_COPY_AND_ASSIGN(Blacklist); }; diff --git a/chrome/browser/privacy_blacklist/blacklist_io.cc b/chrome/browser/privacy_blacklist/blacklist_io.cc new file mode 100644 index 0000000..52ce0df --- /dev/null +++ b/chrome/browser/privacy_blacklist/blacklist_io.cc @@ -0,0 +1,183 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/privacy_blacklist/blacklist_io.h" + +#include <algorithm> +#include <string> + +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/string_util.h" +#include "base/string_tokenizer.h" +#include "chrome/browser/privacy_blacklist/blacklist_store.h" + +namespace { + +const char header[] = "[Chromium::PrivacyBlacklist]"; +const char name_tag[] = "Name:"; +const char url_tag[] = "URL:"; +const char arrow_tag[] = "=>"; + +class IsWhiteSpace { + public: + bool operator()(const char& c) const { + return IsAsciiWhitespace(c); + } +}; + +class IsNotWhiteSpace { + public: + bool operator()(const char& c) const { + return !IsAsciiWhitespace(c); + } +}; + +bool StartsWith(const char* cur, const char* end, + const char* tag, std::size_t size) { + return cur+size <= end && std::equal(tag, tag+size-1, cur); +} + +} // namespace + +BlacklistIO::BlacklistIO() {} + +BlacklistIO::~BlacklistIO() { + for (std::list<Blacklist::Entry*>::iterator i = blacklist_.begin(); + i != blacklist_.end(); ++i) { + delete *i; + } + for (std::list<Blacklist::Provider*>::iterator i = providers_.begin(); + i != providers_.end(); ++i) { + delete *i; + } +} + +bool BlacklistIO::Read(const FilePath& file) { + // Memory map for efficient parsing. If the file cannot fit in available + // memory it would be the least of our worries. Typical blacklist files + // are less than 200K. + file_util::MemoryMappedFile input; + if (!input.Initialize(file) || !input.data()) + return false; + + const char* cur = reinterpret_cast<const char*>(input.data()); + const char* end = cur + input.length(); + + // Check header. + if (!StartsWith(cur, end, header, arraysize(header))) + return false; + + Blacklist::Provider* provider = new Blacklist::Provider; + providers_.push_back(provider); + + cur = std::find(cur, end, '\n') + 1; // Skip past EOL. + + // Each loop iteration takes care of one input line. + while (cur < end) { + // Skip whitespace at beginning of line. + cur = std::find_if(cur, end, IsNotWhiteSpace()); + if (cur == end) + break; + + if (*cur == '#') { + cur = std::find(cur, end, '\n') + 1; + continue; + } + + if (*cur == '|') { + ++cur; + if (StartsWith(cur, end, name_tag, arraysize(name_tag))) { + // Edge condition: if the find below fails, the next one will too, + // so we'll just skip to the EOF below. + cur = std::find_if(cur+arraysize(name_tag), end, IsNotWhiteSpace()); + const char* skip = std::find_if(cur, end, IsWhiteSpace()); + if (skip < end) + provider->set_name(std::string(cur, skip)); + } else if (StartsWith(cur, end, url_tag, arraysize(url_tag))) { + cur = std::find_if(cur+arraysize(url_tag), end, IsNotWhiteSpace()); + const char* skip = std::find_if(cur, end, IsWhiteSpace()); + if (skip < end) + provider->set_url(std::string(cur, skip)); + } + cur = std::find(cur, end, '\n') + 1; + continue; + } + + const char* skip = std::find_if(cur, end, IsWhiteSpace()); + std::string pattern(cur, skip); + + cur = std::find_if(cur+pattern.size(), end, IsNotWhiteSpace()); + if (!StartsWith(cur, end, arrow_tag, arraysize(arrow_tag))) + return false; + + scoped_ptr<Blacklist::Entry> entry(new Blacklist::Entry(pattern, provider)); + + cur = std::find_if(cur+arraysize(arrow_tag), end, IsNotWhiteSpace()); + skip = std::find(cur, end, '\n'); + std::string buf(cur, skip); + cur = skip + 1; + + StringTokenizer tokenier(buf, " (),"); + tokenier.set_options(StringTokenizer::RETURN_DELIMS); + + bool in_attribute = false; + unsigned int last_attribute = 0; + + while (tokenier.GetNext()) { + if (tokenier.token_is_delim()) { + switch (*tokenier.token_begin()) { + case '(': + if (in_attribute) return false; + in_attribute = true; + continue; + case ')': + if (!in_attribute) return false; + in_attribute = false; + continue; + default: + // No state change for other delimiters. + continue; + } + } + + if (in_attribute) { + // The only attribute to support sub_tokens is kBlockByType, for now. + if (last_attribute == Blacklist::kBlockByType) + entry->AddType(tokenier.token()); + } else { + // Filter attribute. Unrecognized attributes are ignored. + last_attribute = Blacklist::String2Attribute(tokenier.token()); + entry->AddAttributes(last_attribute); + } + } + blacklist_.push_back(entry.release()); + } + return true; +} + +bool BlacklistIO::Write(const FilePath& file) { + BlacklistStoreOutput output(file_util::OpenFile(file, "wb")); + + // Output providers, give each one an index. + std::map<const Blacklist::Provider*, uint32> index; + uint32 current = 0; + output.ReserveProviders(providers_.size()); + for (std::list<Blacklist::Provider*>::const_iterator i = providers_.begin(); + i != providers_.end(); ++i, ++current) { + output.StoreProvider((*i)->name(), (*i)->url()); + index[*i] = current; + } + + // Output entries, replacing the provider with its index. + output.ReserveEntries(blacklist_.size()); + for (std::list<Blacklist::Entry*>::const_iterator i = blacklist_.begin(); + i != blacklist_.end(); ++i) { + output.StoreEntry((*i)->pattern_, + (*i)->attributes_, + (*i)->types_, + index[(*i)->provider_]); + } + return true; +} diff --git a/chrome/browser/privacy_blacklist/blacklist_io.h b/chrome/browser/privacy_blacklist/blacklist_io.h new file mode 100644 index 0000000..fd19ae6 --- /dev/null +++ b/chrome/browser/privacy_blacklist/blacklist_io.h @@ -0,0 +1,43 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_PRIVACY_BLACKLIST_BLACKLIST_IO_H_ +#define CHROME_BROWSER_PRIVACY_BLACKLIST_BLACKLIST_IO_H_ + +#include <list> + +#include "chrome/browser/privacy_blacklist/blacklist.h" + +class FilePath; + +// Helper class to keep state while reading multiple text blacklists to +// produce a single binary blacklist used by the Blacklist constructor. +class BlacklistIO { + public: + BlacklistIO(); + ~BlacklistIO(); + + // Reads a text blacklist, as downloaded from the blacklist provider. + bool Read(const FilePath& path); + + // Writes a binary blacklist with aggregated entries for all read blacklists. + bool Write(const FilePath& path); + + private: + // Introspection functions, for testing purposes. + const std::list<Blacklist::Entry*>& blacklist() const { + return blacklist_; + } + const std::list<Blacklist::Provider*>& providers() const { + return providers_; + } + + std::list<Blacklist::Entry*> blacklist_; + std::list<Blacklist::Provider*> providers_; + + FRIEND_TEST(BlacklistIOTest, Generic); + DISALLOW_COPY_AND_ASSIGN(BlacklistIO); +}; + +#endif // CHROME_BROWSER_PRIVACY_BLACKLIST_BLACKLIST_IO_H_ diff --git a/chrome/browser/privacy_blacklist/blacklist_io_unittest.cc b/chrome/browser/privacy_blacklist/blacklist_io_unittest.cc new file mode 100644 index 0000000..6899321 --- /dev/null +++ b/chrome/browser/privacy_blacklist/blacklist_io_unittest.cc @@ -0,0 +1,47 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/privacy_blacklist/blacklist_io.h" + +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/string_util.h" +#include "chrome/common/chrome_paths.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(BlacklistIOTest, Generic) { + // Testing data path. + std::wstring data_dir; + PathService::Get(chrome::DIR_TEST_DATA, &data_dir); + + std::wstring input(data_dir); + file_util::AppendToPath(&input, L"blacklist_small.pbl"); + + std::wstring expected(data_dir); + file_util::AppendToPath(&expected, L"blacklist_small.pbr"); + + BlacklistIO io; + EXPECT_TRUE(io.Read(FilePath::FromWStringHack(input))); + const std::list<Blacklist::Entry*>& blacklist = io.blacklist(); + EXPECT_EQ(5U, blacklist.size()); + + std::list<Blacklist::Entry*>::const_iterator i = blacklist.begin(); + EXPECT_EQ("@", (*i++)->pattern()); + EXPECT_EQ("@poor-security-site.com", (*i++)->pattern()); + EXPECT_EQ("@.ad-serving-place.com", (*i++)->pattern()); + EXPECT_EQ("www.site.com/anonymous/folder/@", (*i++)->pattern()); + EXPECT_EQ("www.site.com/bad/url", (*i++)->pattern()); + + EXPECT_EQ(1U, io.providers().size()); + EXPECT_EQ("Sample", io.providers().front()->name()); + EXPECT_EQ("http://www.google.com", io.providers().front()->url()); + + std::wstring output; + PathService::Get(base::DIR_TEMP, &output); + file_util::AppendToPath(&output, L"blacklist_small.pbr"); + CHECK(io.Write(FilePath::FromWStringHack(output))); + EXPECT_TRUE(file_util::ContentsEqual(output, expected)); + EXPECT_TRUE(file_util::Delete(output, false)); +} diff --git a/chrome/browser/privacy_blacklist/blacklist_store.cc b/chrome/browser/privacy_blacklist/blacklist_store.cc new file mode 100644 index 0000000..88ce48b --- /dev/null +++ b/chrome/browser/privacy_blacklist/blacklist_store.cc @@ -0,0 +1,114 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/privacy_blacklist/blacklist_store.h" + +#include <cstdio> + +#include "base/basictypes.h" +#include "base/file_util.h" + +namespace { + +const char cookie[] = "GCPBL100"; + +} + +void BlacklistStoreOutput::WriteUInt(uint32 i) { + fwrite(reinterpret_cast<char*>(&i), 1, sizeof(uint32), file_); +} + +void BlacklistStoreOutput::WriteString(const std::string& s) { + uint32 n = s.size(); + fwrite(reinterpret_cast<char*>(&n), 1, sizeof(uint32), file_); + fwrite(s.c_str(), 1, s.size(), file_); +} + +BlacklistStoreOutput::BlacklistStoreOutput(FILE* file) : file_(file) { + fwrite(cookie, 1, sizeof(cookie), file_); +} + +BlacklistStoreOutput::~BlacklistStoreOutput() { + file_util::CloseFile(file_); +} + +void BlacklistStoreOutput::ReserveProviders(uint32 num) { + WriteUInt(num); +} + +void BlacklistStoreOutput::StoreProvider(const std::string& name, + const std::string& url) { + WriteString(name); + WriteString(url); +} + +void BlacklistStoreOutput::ReserveEntries(uint32 num) { + WriteUInt(num); +} + +void BlacklistStoreOutput::StoreEntry(const std::string& pattern, + uint32 attributes, + const std::vector<std::string>& types, + uint32 provider) { + WriteString(pattern); + WriteUInt(attributes); + WriteUInt(types.size()); + for (uint32 i = 0; i < types.size(); ++i) + WriteString(types[i]); + WriteUInt(provider); +} + +uint32 BlacklistStoreInput::ReadUInt() { + char buf[sizeof(uint32)]; + fread(buf, 1, sizeof(uint32), file_); + return *reinterpret_cast<uint32*>(buf); +} + +std::string BlacklistStoreInput::ReadString() { + uint32 size = ReadUInt(); + + // Too long strings are not allowed. + if (size > 8192) { + return std::string(); + } + + char buf[8192]; + fread(buf, 1, size, file_); + return std::string(buf, size); +} + +BlacklistStoreInput::BlacklistStoreInput(FILE* file) : file_(file) { + char buf[sizeof(cookie)]; + fread(buf, 1, sizeof(cookie), file_); +} + +BlacklistStoreInput::~BlacklistStoreInput() { + file_util::CloseFile(file_); +} + +uint32 BlacklistStoreInput::ReadNumProviders() { + return ReadUInt(); +} + +void BlacklistStoreInput::ReadProvider(std::string* name, std::string* url) { + *name = ReadString(); + *url = ReadString(); +} + +uint32 BlacklistStoreInput::ReadNumEntries() { + return ReadUInt(); +} + +void BlacklistStoreInput::ReadEntry(std::string* pattern, + uint32* attributes, + std::vector<std::string>* types, + uint32* provider) { + *pattern = ReadString(); + *attributes = ReadUInt(); + if (uint32 n = ReadUInt()) { + while (n--) + types->push_back(ReadString()); + } + *provider = ReadUInt(); +} diff --git a/chrome/browser/privacy_blacklist/blacklist_store.h b/chrome/browser/privacy_blacklist/blacklist_store.h new file mode 100644 index 0000000..8ea96f6 --- /dev/null +++ b/chrome/browser/privacy_blacklist/blacklist_store.h @@ -0,0 +1,95 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_PRIVACY_BLACKLIST_BLACKLIST_DB_H_ +#define CHROME_BROWSER_PRIVACY_BLACKLIST_BLACKLIST_DB_H_ + +#include <cstdio> +#include <string> +#include <vector> + +#include "base/basictypes.h" + +class FilePath; + +// TODO(idanan): Error handling needed. I/O errors can always happen! + +//////////////////////////////////////////////////////////////////////////////// +// +// Blacklist Binary Storage Output Class +// +// Stores aggregate Privacy Blacklists efficiently on disk. The public +// functions below must be called in the order they are declared, as +// the input class is expected to read them in that order. The provider +// and entry output functions must be called the number of times set. +// +//////////////////////////////////////////////////////////////////////////////// +class BlacklistStoreOutput { + public: + explicit BlacklistStoreOutput(FILE* file); + ~BlacklistStoreOutput(); + + // Sets the number of providers stored. + void ReserveProviders(uint32); + + // Stores a provider. + void StoreProvider(const std::string& name, const std::string& url); + + // Sets the number of entries stored. + void ReserveEntries(uint32); + + // Stores an entry. + void StoreEntry(const std::string& pattern, + uint32 attributes, + const std::vector<std::string>& types, + uint32 provider); + + private: + // Writes basic types to the stream. + void WriteUInt(uint32); + void WriteString(const std::string&); + + FILE* file_; + DISALLOW_COPY_AND_ASSIGN(BlacklistStoreOutput); +}; + +//////////////////////////////////////////////////////////////////////////////// +// +// Blacklist Binary Storage Input Class +// +// Stores aggregate Privacy Blacklists efficiently on disk. The public +// functions below must be called in the order they are declared, as +// the output class is expected to write them in that order. The provider +// entries read functions must be called the correct number of times. +// +//////////////////////////////////////////////////////////////////////////////// +class BlacklistStoreInput { + public: + explicit BlacklistStoreInput(FILE* file); + ~BlacklistStoreInput(); + + // Reads the number of providers. + uint32 ReadNumProviders(); + + // Reads a provider. + void ReadProvider(std::string* name, std::string* url); + + // Reads the number of entries. + uint32 ReadNumEntries(); + + // Reads an entry. + void ReadEntry(std::string* pattern, + uint32* attributes, + std::vector<std::string>* types, + uint32* provider); + + private: + uint32 ReadUInt(); + std::string ReadString(); + + FILE* file_; + DISALLOW_COPY_AND_ASSIGN(BlacklistStoreInput); +}; + +#endif diff --git a/chrome/browser/privacy_blacklist/blacklist_unittest.cc b/chrome/browser/privacy_blacklist/blacklist_unittest.cc index ad363ad..4df5f7c 100644 --- a/chrome/browser/privacy_blacklist/blacklist_unittest.cc +++ b/chrome/browser/privacy_blacklist/blacklist_unittest.cc @@ -3,27 +3,105 @@ // found in the LICENSE file. #include "chrome/browser/privacy_blacklist/blacklist.h" + #include "base/file_path.h" +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/string_util.h" +#include "chrome/common/chrome_paths.h" #include "testing/gtest/include/gtest/gtest.h" TEST(BlacklistTest, Generic) { - FilePath path; + // Get path relative to test data dir. + std::wstring input; + PathService::Get(chrome::DIR_TEST_DATA, &input); + file_util::AppendToPath(&input, L"blacklist_small.pbr"); + + FilePath path = FilePath::FromWStringHack(input); Blacklist blacklist(path); + // This test is a friend, so inspect the internal structures. + EXPECT_EQ(5U, blacklist.blacklist_.size()); + std::vector<Blacklist::Entry*>::const_iterator i = + blacklist.blacklist_.begin(); + + EXPECT_EQ(Blacklist::kBlockByType|Blacklist::kDontPersistCookies, + (*i)->attributes()); + EXPECT_TRUE((*i)->MatchType("application/x-shockwave-flash")); + EXPECT_FALSE((*i)->MatchType("image/jpeg")); + EXPECT_EQ("@", (*i++)->pattern()); + + // All entries include global attributes. + // NOTE: Silly bitwise-or with zero to workaround a Mac compiler bug. + EXPECT_EQ(Blacklist::kBlockUnsecure|0, (*i)->attributes()); + EXPECT_FALSE((*i)->MatchType("application/x-shockwave-flash")); + EXPECT_FALSE((*i)->MatchType("image/jpeg")); + EXPECT_EQ("@poor-security-site.com", (*i++)->pattern()); + + EXPECT_EQ(Blacklist::kDontSendCookies|Blacklist::kDontStoreCookies, + (*i)->attributes()); + EXPECT_FALSE((*i)->MatchType("application/x-shockwave-flash")); + EXPECT_FALSE((*i)->MatchType("image/jpeg")); + EXPECT_EQ("@.ad-serving-place.com", (*i++)->pattern()); + + EXPECT_EQ(Blacklist::kDontSendUserAgent|Blacklist::kDontSendReferrer, + (*i)->attributes()); + EXPECT_FALSE((*i)->MatchType("application/x-shockwave-flash")); + EXPECT_FALSE((*i)->MatchType("image/jpeg")); + EXPECT_EQ("www.site.com/anonymous/folder/@", (*i++)->pattern()); + + // NOTE: Silly bitwise-or with zero to workaround a Mac compiler bug. + EXPECT_EQ(Blacklist::kBlockAll|0, (*i)->attributes()); + EXPECT_FALSE((*i)->MatchType("application/x-shockwave-flash")); + EXPECT_FALSE((*i)->MatchType("image/jpeg")); + EXPECT_EQ("www.site.com/bad/url", (*i++)->pattern()); + + EXPECT_EQ(1U, blacklist.providers_.size()); + EXPECT_EQ("Sample", blacklist.providers_.front()->name()); + EXPECT_EQ("http://www.google.com", blacklist.providers_.front()->url()); + // Empty blacklist should not match any URL. EXPECT_FALSE(blacklist.findMatch(GURL())); EXPECT_FALSE(blacklist.findMatch(GURL("http://www.google.com"))); + // StripCookieExpiry Tests std::string cookie1( "PREF=ID=14a549990453e42a:TM=1245183232:LM=1245183232:S=Occ7khRVIEE36Ao5;" " expires=Thu, 16-Jun-2011 20:13:52 GMT; path=/; domain=.google.com"); std::string cookie2( "PREF=ID=14a549990453e42a:TM=1245183232:LM=1245183232:S=Occ7khRVIEE36Ao5;" " path=/; domain=.google.com"); + std::string cookie3( + "PREF=ID=14a549990453e42a:TM=1245183232:LM=1245183232:S=Occ7khRVIEE36Ao5;" + " expires=Thu, 17-Jun-2011 02:13:52 GMT; path=/; domain=.google.com"); // No expiry, should be equal to itself after stripping. EXPECT_TRUE(cookie2 == Blacklist::StripCookieExpiry(cookie2)); // Expiry, should be equal to non-expiry version after stripping. EXPECT_TRUE(cookie2 == Blacklist::StripCookieExpiry(cookie1)); + + // Edge cases. + EXPECT_TRUE(std::string() == Blacklist::StripCookieExpiry(std::string())); + EXPECT_TRUE(Blacklist::StripCookieExpiry(cookie2) == + Blacklist::StripCookieExpiry(cookie3)); + + // StripCookies Test. Note that "\r\n" line terminators are used + // because the underlying net util uniformizes those when stripping + // headers. + std::string header1("Host: www.example.com\r\n"); + std::string header2("Upgrade: TLS/1.0, HTTP/1.1\r\n" + "Connection: Upgrade\r\n"); + std::string header3("Date: Mon, 12 Mar 2001 19:20:33 GMT\r\n" + "Expires: Mon, 12 Mar 2001 19:20:33 GMT\r\n" + "Content-Type: text/html\r\n" + "Set-Cookie: B=460soc0taq8c1&b=2; " + "expires=Thu, 15 Apr 2010 20:00:00 GMT; path=/;\r\n"); + std::string header4("Date: Mon, 12 Mar 2001 19:20:33 GMT\r\n" + "Expires: Mon, 12 Mar 2001 19:20:33 GMT\r\n" + "Content-Type: text/html\r\n"); + + EXPECT_TRUE(header1 == Blacklist::StripCookies(header1)); + EXPECT_TRUE(header2 == Blacklist::StripCookies(header2)); + EXPECT_TRUE(header4 == Blacklist::StripCookies(header3)); } diff --git a/chrome/chrome.gyp b/chrome/chrome.gyp index 3731471..623d25e 100644 --- a/chrome/chrome.gyp +++ b/chrome/chrome.gyp @@ -1312,6 +1312,10 @@ 'browser/printing/printer_query.h', 'browser/privacy_blacklist/blacklist.h', 'browser/privacy_blacklist/blacklist.cc', + 'browser/privacy_blacklist/blacklist_io.h', + 'browser/privacy_blacklist/blacklist_io.cc', + 'browser/privacy_blacklist/blacklist_store.h', + 'browser/privacy_blacklist/blacklist_store.cc', 'browser/process_singleton.h', 'browser/process_singleton_linux.cc', 'browser/process_singleton_mac.cc', @@ -3680,6 +3684,7 @@ 'browser/password_manager/password_store_mac_unittest.cc', 'browser/printing/print_job_unittest.cc', 'browser/privacy_blacklist/blacklist_unittest.cc', + 'browser/privacy_blacklist/blacklist_io_unittest.cc', 'browser/profile_manager_unittest.cc', 'browser/renderer_host/audio_renderer_host_unittest.cc', 'browser/renderer_host/file_system_accessor_unittest.cc', diff --git a/chrome/test/data/blacklist_small.pbl b/chrome/test/data/blacklist_small.pbl new file mode 100644 index 0000000..c753231 --- /dev/null +++ b/chrome/test/data/blacklist_small.pbl @@ -0,0 +1,19 @@ +[Chromium::PrivacyBlacklist] +|Name: Sample +|URL: http://www.google.com Text here is ignored +|Icon: Unsupported feature !!!!! + +# Default match attributes (matches everything) +@ => kBlockByType(application/x-shockwave-flash), kDontPersistCookies + +# Affect an entire site +@poor-security-site.com => kBlockUnsecure + +# Affect subdomains of an entire site +@.ad-serving-place.com => kDontSendCookies, kDontStoreCookies + +# Affect site files under a subfolder +www.site.com/anonymous/folder/@ => kDontSendUserAgent, kDontSendReferrer + +# Affect a specific URL +www.site.com/bad/url => kBlockAll diff --git a/chrome/test/data/blacklist_small.pbr b/chrome/test/data/blacklist_small.pbr Binary files differnew file mode 100644 index 0000000..c3f8571 --- /dev/null +++ b/chrome/test/data/blacklist_small.pbr diff --git a/chrome/tools/pbl_tool/pbl_tool.cc b/chrome/tools/pbl_tool/pbl_tool.cc index 6c0cdde..355c7ea 100644 --- a/chrome/tools/pbl_tool/pbl_tool.cc +++ b/chrome/tools/pbl_tool/pbl_tool.cc @@ -3,14 +3,55 @@ // found in the LICENSE file. // This tool manages privacy blacklists. Primarily for loading a text -// blacklist into the binary agregate blacklist. +// blacklist into the binary aggregate blacklist. #include <iostream> #include "base/process_util.h" +#include "base/string_util.h" #include "chrome/browser/privacy_blacklist/blacklist.h" +#include "chrome/browser/privacy_blacklist/blacklist_io.h" -int main(int argc, char* argv[]) { +#ifdef OS_POSIX +#define ICHAR char +#define ICERR std::cerr +#define IMAIN main +#else +#define ICHAR wchar_t +#define ICERR std::wcerr +#define IMAIN wmain +#endif + +namespace { + +int PrintUsage(int argc, ICHAR* argv[]) { + ICERR << "Usage: " << argv[0] << " <source> <target>\n" + " <source> is the text blacklist (.pbl) to load.\n" + " <target> is the binary output blacklist repository.\n\n" + "Adds all entries from <source> to <target>.\n" + "Creates <target> if it does not exist.\n"; + return 1; +} + +} + +int IMAIN(int argc, ICHAR* argv[]) { base::EnableTerminationOnHeapCorruption(); - std::cout << "Aw, Snap! This is not implemented yet." << std::endl; - CHECK(std::string() == Blacklist::StripCookies(std::string())); + + if (argc < 3) + return PrintUsage(argc, argv); + + FilePath input(argv[1]); + FilePath output(argv[2]); + + BlacklistIO io; + if (io.Read(input)) { + if (io.Write(output)) { + return 0; + } else { + ICERR << "Error writing output file " << argv[2] << "\n"; + } + } else { + ICERR << "Error reading input file " << argv[1] << "\n"; + } + return -1; } |