diff options
author | kaliamoorthi@chromium.org <kaliamoorthi@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-04-11 16:44:38 +0000 |
---|---|---|
committer | kaliamoorthi@chromium.org <kaliamoorthi@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-04-11 16:44:38 +0000 |
commit | b3a495f23fa99d04526aced6d1519b52ed9bfa57 (patch) | |
tree | be1949d8036b8254f0eeac8fe722e4e7ae2a128e | |
parent | 23edced5b7db3aa1fe129f60e70d8a0221f65664 (diff) | |
download | chromium_src-b3a495f23fa99d04526aced6d1519b52ed9bfa57.zip chromium_src-b3a495f23fa99d04526aced6d1519b52ed9bfa57.tar.gz chromium_src-b3a495f23fa99d04526aced6d1519b52ed9bfa57.tar.bz2 |
Adds support in URLBlacklist to include query parameters.
This CL adds support in URLBlacklist for blacklist or whitelist an url based on query parameters.
This CL extends crrev.com/219613002
BUG=349997
Review URL: https://codereview.chromium.org/225023026
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@263261 0039d316-1c4b-4281-b951-d872f2087c98
4 files changed, 255 insertions, 19 deletions
diff --git a/chrome/browser/managed_mode/managed_mode_url_filter.cc b/chrome/browser/managed_mode/managed_mode_url_filter.cc index 75fb303..d34a71e 100644 --- a/chrome/browser/managed_mode/managed_mode_url_filter.cc +++ b/chrome/browser/managed_mode/managed_mode_url_filter.cc @@ -89,11 +89,13 @@ bool FilterBuilder::AddPattern(const std::string& pattern, int site_id) { std::string host; uint16 port; std::string path; + std::string query; bool match_subdomains = true; URLBlacklist::SegmentURLCallback callback = static_cast<URLBlacklist::SegmentURLCallback>(URLFixerUpper::SegmentURL); if (!URLBlacklist::FilterToComponents( - callback, pattern, &scheme, &host, &match_subdomains, &port, &path)) { + callback, pattern, + &scheme, &host, &match_subdomains, &port, &path, &query)) { LOG(ERROR) << "Invalid pattern " << pattern; return false; } @@ -101,7 +103,7 @@ bool FilterBuilder::AddPattern(const std::string& pattern, int site_id) { scoped_refptr<URLMatcherConditionSet> condition_set = URLBlacklist::CreateConditionSet( &contents_->url_matcher, ++matcher_id_, - scheme, host, match_subdomains, port, path); + scheme, host, match_subdomains, port, path, query, true); all_conditions_.push_back(condition_set); contents_->matcher_site_map[matcher_id_] = site_id; return true; diff --git a/chrome/browser/policy/url_blacklist_manager_unittest.cc b/chrome/browser/policy/url_blacklist_manager_unittest.cc index 834b308..b4a4232 100644 --- a/chrome/browser/policy/url_blacklist_manager_unittest.cc +++ b/chrome/browser/policy/url_blacklist_manager_unittest.cc @@ -174,9 +174,14 @@ TEST_P(URLBlacklistFilterToComponentsTest, FilterToComponents) { uint16 port = 42; std::string path; - URLBlacklist::FilterToComponents(GetSegmentURLCallback(), GetParam().filter(), - &scheme, &host, &match_subdomains, &port, - &path); + URLBlacklist::FilterToComponents(GetSegmentURLCallback(), + GetParam().filter(), + &scheme, + &host, + &match_subdomains, + &port, + &path, + NULL); EXPECT_EQ(GetParam().scheme(), scheme); EXPECT_EQ(GetParam().host(), host); EXPECT_EQ(GetParam().match_subdomains(), match_subdomains); @@ -466,6 +471,142 @@ TEST_F(URLBlacklistManagerTest, Filtering) { EXPECT_FALSE(blacklist.IsURLBlocked(GURL("http://example.com"))); } +TEST_F(URLBlacklistManagerTest, QueryParameters) { + URLBlacklist blacklist(GetSegmentURLCallback()); + scoped_ptr<base::ListValue> blocked(new base::ListValue); + scoped_ptr<base::ListValue> allowed(new base::ListValue); + + // Block domain and all subdomains, for any filtered scheme. + blocked->AppendString("youtube.com"); + allowed->AppendString("youtube.com/watch?v=XYZ"); + blacklist.Block(blocked.get()); + blacklist.Allow(allowed.get()); + + EXPECT_TRUE(blacklist.IsURLBlocked(GURL("http://youtube.com"))); + EXPECT_TRUE(blacklist.IsURLBlocked(GURL("http://youtube.com/watch?v=123"))); + EXPECT_TRUE( + blacklist.IsURLBlocked(GURL("http://youtube.com/watch?v=123&v=XYZ"))); + EXPECT_TRUE( + blacklist.IsURLBlocked(GURL("http://youtube.com/watch?v=XYZ&v=123"))); + EXPECT_FALSE(blacklist.IsURLBlocked(GURL("http://youtube.com/watch?v=XYZ"))); + EXPECT_FALSE( + blacklist.IsURLBlocked(GURL("http://youtube.com/watch?v=XYZ&foo=bar"))); + EXPECT_FALSE( + blacklist.IsURLBlocked(GURL("http://youtube.com/watch?foo=bar&v=XYZ"))); + + allowed.reset(new base::ListValue); + allowed->AppendString("youtube.com/watch?av=XYZ&ag=123"); + blacklist.Allow(allowed.get()); + EXPECT_TRUE(blacklist.IsURLBlocked(GURL("http://youtube.com"))); + EXPECT_TRUE(blacklist.IsURLBlocked(GURL("http://youtube.com/watch?av=123"))); + EXPECT_TRUE(blacklist.IsURLBlocked(GURL("http://youtube.com/watch?av=XYZ"))); + EXPECT_TRUE( + blacklist.IsURLBlocked(GURL("http://youtube.com/watch?av=123&ag=XYZ"))); + EXPECT_TRUE( + blacklist.IsURLBlocked(GURL("http://youtube.com/watch?ag=XYZ&av=123"))); + EXPECT_FALSE( + blacklist.IsURLBlocked(GURL("http://youtube.com/watch?av=XYZ&ag=123"))); + EXPECT_FALSE( + blacklist.IsURLBlocked(GURL("http://youtube.com/watch?ag=123&av=XYZ"))); + EXPECT_TRUE(blacklist.IsURLBlocked( + GURL("http://youtube.com/watch?av=XYZ&ag=123&av=123"))); + EXPECT_TRUE(blacklist.IsURLBlocked( + GURL("http://youtube.com/watch?av=XYZ&ag=123&ag=1234"))); + + allowed.reset(new base::ListValue); + allowed->AppendString("youtube.com/watch?foo=bar*&vid=2*"); + blacklist.Allow(allowed.get()); + EXPECT_TRUE(blacklist.IsURLBlocked(GURL("http://youtube.com"))); + EXPECT_TRUE(blacklist.IsURLBlocked(GURL("http://youtube.com/watch?vid=2"))); + EXPECT_TRUE(blacklist.IsURLBlocked(GURL("http://youtube.com/watch?foo=bar"))); + EXPECT_FALSE( + blacklist.IsURLBlocked(GURL("http://youtube.com/watch?vid=2&foo=bar"))); + EXPECT_FALSE( + blacklist.IsURLBlocked(GURL("http://youtube.com/watch?vid=2&foo=bar1"))); + EXPECT_FALSE( + blacklist.IsURLBlocked(GURL("http://youtube.com/watch?vid=234&foo=bar"))); + EXPECT_FALSE(blacklist.IsURLBlocked( + GURL("http://youtube.com/watch?vid=234&foo=bar23"))); + + blocked.reset(new base::ListValue); + blocked->AppendString("youtube1.com/disallow?v=44678"); + blacklist.Block(blocked.get()); + EXPECT_FALSE(blacklist.IsURLBlocked(GURL("http://youtube1.com"))); + EXPECT_FALSE(blacklist.IsURLBlocked(GURL("http://youtube1.com?v=123"))); + // Path does not match + EXPECT_FALSE(blacklist.IsURLBlocked(GURL("http://youtube1.com?v=44678"))); + EXPECT_TRUE( + blacklist.IsURLBlocked(GURL("http://youtube1.com/disallow?v=44678"))); + EXPECT_FALSE( + blacklist.IsURLBlocked(GURL("http://youtube1.com/disallow?v=4467"))); + EXPECT_FALSE(blacklist.IsURLBlocked( + GURL("http://youtube1.com/disallow?v=4467&v=123"))); + EXPECT_TRUE(blacklist.IsURLBlocked( + GURL("http://youtube1.com/disallow?v=4467&v=123&v=44678"))); + + blocked.reset(new base::ListValue); + blocked->AppendString("youtube1.com/disallow?g=*"); + blacklist.Block(blocked.get()); + EXPECT_FALSE(blacklist.IsURLBlocked(GURL("http://youtube1.com"))); + EXPECT_FALSE(blacklist.IsURLBlocked(GURL("http://youtube1.com?ag=123"))); + EXPECT_TRUE( + blacklist.IsURLBlocked(GURL("http://youtube1.com/disallow?g=123"))); + EXPECT_TRUE( + blacklist.IsURLBlocked(GURL("http://youtube1.com/disallow?ag=13&g=123"))); + + blocked.reset(new base::ListValue); + blocked->AppendString("youtube2.com/disallow?a*"); + blacklist.Block(blocked.get()); + EXPECT_FALSE(blacklist.IsURLBlocked(GURL("http://youtube2.com"))); + EXPECT_TRUE(blacklist.IsURLBlocked( + GURL("http://youtube2.com/disallow?b=123&a21=467"))); + EXPECT_TRUE( + blacklist.IsURLBlocked(GURL("http://youtube2.com/disallow?abba=true"))); + EXPECT_FALSE( + blacklist.IsURLBlocked(GURL("http://youtube2.com/disallow?baba=true"))); + + allowed.reset(new base::ListValue); + blocked.reset(new base::ListValue); + blocked->AppendString("youtube3.com"); + allowed->AppendString("youtube3.com/watch?fo*"); + blacklist.Block(blocked.get()); + blacklist.Allow(allowed.get()); + EXPECT_TRUE(blacklist.IsURLBlocked(GURL("http://youtube3.com"))); + EXPECT_TRUE( + blacklist.IsURLBlocked(GURL("http://youtube3.com/watch?b=123&a21=467"))); + EXPECT_FALSE(blacklist.IsURLBlocked( + GURL("http://youtube3.com/watch?b=123&a21=467&foo1"))); + EXPECT_FALSE(blacklist.IsURLBlocked( + GURL("http://youtube3.com/watch?b=123&a21=467&foo=bar"))); + EXPECT_FALSE(blacklist.IsURLBlocked( + GURL("http://youtube3.com/watch?b=123&a21=467&fo=ba"))); + EXPECT_FALSE( + blacklist.IsURLBlocked(GURL("http://youtube3.com/watch?foriegn=true"))); + EXPECT_FALSE(blacklist.IsURLBlocked(GURL("http://youtube3.com/watch?fold"))); + + allowed.reset(new base::ListValue); + blocked.reset(new base::ListValue); + blocked->AppendString("youtube4.com"); + allowed->AppendString("youtube4.com?*"); + blacklist.Block(blocked.get()); + blacklist.Allow(allowed.get()); + EXPECT_TRUE(blacklist.IsURLBlocked(GURL("http://youtube4.com"))); + EXPECT_FALSE(blacklist.IsURLBlocked(GURL("http://youtube4.com/?hello"))); + EXPECT_FALSE(blacklist.IsURLBlocked(GURL("http://youtube4.com/?foo"))); + + allowed.reset(new base::ListValue); + blocked.reset(new base::ListValue); + blocked->AppendString("youtube5.com?foo=bar"); + allowed->AppendString("youtube5.com?foo1=bar1&foo2=bar2&"); + blacklist.Block(blocked.get()); + blacklist.Allow(allowed.get()); + EXPECT_FALSE(blacklist.IsURLBlocked(GURL("http://youtube5.com"))); + EXPECT_TRUE(blacklist.IsURLBlocked(GURL("http://youtube5.com/?foo=bar&a=b"))); + // More specific filter is given precedence. + EXPECT_FALSE(blacklist.IsURLBlocked( + GURL("http://youtube5.com/?a=b&foo=bar&foo1=bar1&foo2=bar2"))); +} + TEST_F(URLBlacklistManagerTest, BlockAllWithExceptions) { URLBlacklist blacklist(GetSegmentURLCallback()); diff --git a/components/policy/core/browser/url_blacklist_manager.cc b/components/policy/core/browser/url_blacklist_manager.cc index ff42adb..1da4adf 100644 --- a/components/policy/core/browser/url_blacklist_manager.cc +++ b/components/policy/core/browser/url_blacklist_manager.cc @@ -20,6 +20,7 @@ #include "net/base/load_flags.h" #include "net/base/net_errors.h" #include "net/url_request/url_request.h" +#include "url/url_parse.h" using url_matcher::URLMatcher; using url_matcher::URLMatcherCondition; @@ -27,6 +28,7 @@ using url_matcher::URLMatcherConditionFactory; using url_matcher::URLMatcherConditionSet; using url_matcher::URLMatcherPortFilter; using url_matcher::URLMatcherSchemeFilter; +using url_matcher::URLQueryElementMatcherCondition; namespace policy { @@ -48,6 +50,54 @@ scoped_ptr<URLBlacklist> BuildBlacklist( return blacklist.Pass(); } +// Tokenise the parameter |query| and add appropriate query element matcher +// conditions to the |query_conditions|. +void ProcessQueryToConditions( + url_matcher::URLMatcherConditionFactory* condition_factory, + const std::string& query, + bool allow, + std::set<URLQueryElementMatcherCondition>* query_conditions) { + url_parse::Component query_left = url_parse::MakeRange(0, query.length()); + url_parse::Component key; + url_parse::Component value; + // Depending on the filter type being black-list or white-list, the matcher + // choose any or every match. The idea is a URL should be black-listed if + // there is any occurrence of the key value pair. It should be white-listed + // only if every occurrence of the key is followed by the value. This avoids + // situations such as a user appending a white-listed video parameter in the + // end of the query and watching a video of his choice (the last parameter is + // ignored by some web servers like youtube's). + URLQueryElementMatcherCondition::Type match_type = + allow ? URLQueryElementMatcherCondition::MATCH_ALL + : URLQueryElementMatcherCondition::MATCH_ANY; + + while (ExtractQueryKeyValue(query.data(), &query_left, &key, &value)) { + URLQueryElementMatcherCondition::QueryElementType query_element_type = + value.len ? URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY_VALUE + : URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY; + URLQueryElementMatcherCondition::QueryValueMatchType query_value_match_type; + if (!value.len && key.len && query[key.end() - 1] == '*') { + --key.len; + query_value_match_type = + URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX; + } else if (value.len && query[value.end() - 1] == '*') { + --value.len; + query_value_match_type = + URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX; + } else { + query_value_match_type = + URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_EXACT; + } + query_conditions->insert( + URLQueryElementMatcherCondition(query.substr(key.begin, key.len), + query.substr(value.begin, value.len), + query_value_match_type, + query_element_type, + match_type, + condition_factory)); + } +} + } // namespace struct URLBlacklist::FilterComponents { @@ -58,6 +108,8 @@ struct URLBlacklist::FilterComponents { std::string host; uint16 port; std::string path; + std::string query; + int number_of_key_value_pairs; bool match_subdomains; bool allow; }; @@ -77,17 +129,31 @@ void URLBlacklist::AddFilters(bool allow, DCHECK(success); FilterComponents components; components.allow = allow; - if (!FilterToComponents(segment_url_, pattern, &components.scheme, - &components.host, &components.match_subdomains, - &components.port, &components.path)) { + if (!FilterToComponents(segment_url_, + pattern, + &components.scheme, + &components.host, + &components.match_subdomains, + &components.port, + &components.path, + &components.query)) { LOG(ERROR) << "Invalid pattern " << pattern; continue; } - all_conditions.push_back( - CreateConditionSet(url_matcher_.get(), ++id_, components.scheme, - components.host, components.match_subdomains, - components.port, components.path)); + scoped_refptr<URLMatcherConditionSet> condition_set = + CreateConditionSet(url_matcher_.get(), + ++id_, + components.scheme, + components.host, + components.match_subdomains, + components.port, + components.path, + components.query, + allow); + components.number_of_key_value_pairs = + condition_set->query_conditions().size(); + all_conditions.push_back(condition_set); filters_[id_] = components; } url_matcher_->AddConditionSets(all_conditions); @@ -133,7 +199,8 @@ bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url, std::string* host, bool* match_subdomains, uint16* port, - std::string* path) { + std::string* path, + std::string* query) { url_parse::Parsed parsed; if (segment_url(filter, &parsed) == kFileScheme) { @@ -207,6 +274,13 @@ bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url, else path->clear(); + if (query) { + if (parsed.query.is_nonempty()) + query->assign(filter, parsed.query.begin, parsed.query.len); + else + query->clear(); + } + return true; } @@ -218,7 +292,9 @@ scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet( const std::string& host, bool match_subdomains, uint16 port, - const std::string& path) { + const std::string& path, + const std::string& query, + bool allow) { URLMatcherConditionFactory* condition_factory = url_matcher->condition_factory(); std::set<URLMatcherCondition> conditions; @@ -226,6 +302,12 @@ scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet( condition_factory->CreateHostSuffixPathPrefixCondition(host, path) : condition_factory->CreateHostEqualsPathPrefixCondition(host, path)); + std::set<URLQueryElementMatcherCondition> query_conditions; + if (!query.empty()) { + ProcessQueryToConditions( + condition_factory, query, allow, &query_conditions); + } + scoped_ptr<URLMatcherSchemeFilter> scheme_filter; if (!scheme.empty()) scheme_filter.reset(new URLMatcherSchemeFilter(scheme)); @@ -237,8 +319,11 @@ scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet( port_filter.reset(new URLMatcherPortFilter(ranges)); } - return new URLMatcherConditionSet(id, conditions, - scheme_filter.Pass(), port_filter.Pass()); + return new URLMatcherConditionSet(id, + conditions, + query_conditions, + scheme_filter.Pass(), + port_filter.Pass()); } // static @@ -259,6 +344,9 @@ bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs, if (path_length != other_path_length) return path_length > other_path_length; + if (lhs.number_of_key_value_pairs != rhs.number_of_key_value_pairs) + return lhs.number_of_key_value_pairs > rhs.number_of_key_value_pairs; + if (lhs.allow && !rhs.allow) return true; diff --git a/components/policy/core/browser/url_blacklist_manager.h b/components/policy/core/browser/url_blacklist_manager.h index c910e70..9052e95 100644 --- a/components/policy/core/browser/url_blacklist_manager.h +++ b/components/policy/core/browser/url_blacklist_manager.h @@ -77,17 +77,20 @@ class POLICY_EXPORT URLBlacklist { // of the hostname (if it is one.) // |port| is 0 if none is explicitly defined. // |path| does not include query parameters. + // |query| contains the query parameters ('?' not included). static bool FilterToComponents(SegmentURLCallback segment_url, const std::string& filter, std::string* scheme, std::string* host, bool* match_subdomains, uint16* port, - std::string* path); + std::string* path, + std::string* query); // Creates a condition set that can be used with the |url_matcher|. |id| needs // to be a unique number that will be returned by the |url_matcher| if the URL - // matches that condition set. + // matches that condition set. |allow| indicates if it is a white-list (true) + // or black-list (false) filter. static scoped_refptr<url_matcher::URLMatcherConditionSet> CreateConditionSet( url_matcher::URLMatcher* url_matcher, url_matcher::URLMatcherConditionSet::ID id, @@ -95,7 +98,9 @@ class POLICY_EXPORT URLBlacklist { const std::string& host, bool match_subdomains, uint16 port, - const std::string& path); + const std::string& path, + const std::string& query, + bool allow); private: struct FilterComponents; |