// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/renderer/safe_browsing/phishing_classifier.h" #include #include "base/bind.h" #include "base/command_line.h" #include "base/memory/scoped_ptr.h" #include "base/strings/string16.h" #include "base/strings/utf_string_conversions.h" #include "chrome/common/chrome_switches.h" #include "chrome/common/safe_browsing/client_model.pb.h" #include "chrome/common/safe_browsing/csd.pb.h" #include "chrome/renderer/safe_browsing/features.h" #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/murmurhash3_util.h" #include "chrome/renderer/safe_browsing/scorer.h" #include "chrome/test/base/in_process_browser_test.h" #include "chrome/test/base/ui_test_utils.h" #include "content/public/renderer/render_view.h" #include "crypto/sha2.h" #include "net/dns/mock_host_resolver.h" #include "net/test/embedded_test_server/embedded_test_server.h" #include "net/test/embedded_test_server/http_response.h" #include "testing/gmock/include/gmock/gmock.h" #include "url/gurl.h" using ::testing::AllOf; using ::testing::Contains; using ::testing::Not; using ::testing::Pair; namespace { // The first RenderFrame is routing ID 1, and the first RenderView is 2. const int kRenderViewRoutingId = 2; } namespace safe_browsing { class PhishingClassifierTest : public InProcessBrowserTest { protected: PhishingClassifierTest() : url_tld_token_net_(features::kUrlTldToken + std::string("net")), page_link_domain_phishing_(features::kPageLinkDomain + std::string("phishing.com")), page_term_login_(features::kPageTerm + std::string("login")) { } void SetUpCommandLine(base::CommandLine* command_line) override { command_line->AppendSwitch(switches::kSingleProcess); #if defined(OS_WIN) // Don't want to try to create a GPU process. command_line->AppendSwitch(switches::kDisableGpu); #endif } void SetUpOnMainThread() override { // Construct a model to test with. We include one feature from each of // the feature extractors, which allows us to verify that they all ran. ClientSideModel model; model.add_hashes(crypto::SHA256HashString(url_tld_token_net_)); model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_)); model.add_hashes(crypto::SHA256HashString(page_term_login_)); model.add_hashes(crypto::SHA256HashString("login")); model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken + std::string("net"))); model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain + std::string("phishing.com"))); model.add_hashes(crypto::SHA256HashString(features::kPageTerm + std::string("login"))); model.add_hashes(crypto::SHA256HashString("login")); // Add a default rule with a non-phishy weight. ClientSideModel::Rule* rule = model.add_rule(); rule->set_weight(-1.0); // To give a phishy score, the total weight needs to be >= 0 // (0.5 when converted to a probability). This will only happen // if all of the listed features are present. rule = model.add_rule(); rule->add_feature(0); rule->add_feature(1); rule->add_feature(2); rule->set_weight(1.0); model.add_page_term(3); model.set_murmur_hash_seed(2777808611U); model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed())); model.set_max_words_per_term(1); model.set_max_shingles_per_page(100); model.set_shingle_size(3); clock_ = new MockFeatureExtractorClock; scorer_.reset(Scorer::Create(model.SerializeAsString())); ASSERT_TRUE(scorer_.get()); classifier_.reset(new PhishingClassifier( content::RenderView::FromRoutingID(kRenderViewRoutingId), clock_)); } void TearDownOnMainThread() override { content::RunAllPendingInMessageLoop(); } // Helper method to start phishing classification and wait for it to // complete. Returns the true if the page is classified as phishy and // false otherwise. bool RunPhishingClassifier(const base::string16* page_text, float* phishy_score, FeatureMap* features) { ClientPhishingRequest verdict; // The classifier accesses the RenderView and must run in the RenderThread. PostTaskToInProcessRendererAndWait( base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier, base::Unretained(this), page_text, phishy_score, features, &verdict)); return verdict.is_phishing(); } void DoRunPhishingClassifier(const base::string16* page_text, float* phishy_score, FeatureMap* features, ClientPhishingRequest* verdict) { *phishy_score = PhishingClassifier::kInvalidScore; features->Clear(); // Force synchronous behavior for ease of unittesting. base::RunLoop run_loop; classifier_->BeginClassification( page_text, base::Bind(&PhishingClassifierTest::ClassificationFinished, base::Unretained(this), &run_loop, verdict)); content::RunThisRunLoop(&run_loop); *phishy_score = verdict->client_score(); for (int i = 0; i < verdict->feature_map_size(); ++i) { features->AddRealFeature(verdict->feature_map(i).name(), verdict->feature_map(i).value()); } } // Completion callback for classification. void ClassificationFinished(base::RunLoop* run_loop, ClientPhishingRequest* verdict_out, const ClientPhishingRequest& verdict) { *verdict_out = verdict; // Copy the verdict. run_loop->Quit(); } scoped_ptr embedded_test_server_; net::test_server::EmbeddedTestServer* embedded_test_server() { // TODO(ajwong): Merge this into BrowserTestBase. if (!embedded_test_server_) { embedded_test_server_.reset(new net::test_server::EmbeddedTestServer()); embedded_test_server_->RegisterRequestHandler( base::Bind(&PhishingClassifierTest::HandleRequest, base::Unretained(this))); CHECK(embedded_test_server_->InitializeAndWaitUntilReady()); } return embedded_test_server_.get(); } void LoadHtml(const std::string& host, const std::string& content) { GURL::Replacements replace_host; replace_host.SetHostStr(host); response_content_ = content; ui_test_utils::NavigateToURL( browser(), embedded_test_server()->base_url().ReplaceComponents(replace_host)); } void LoadHtmlPost(const std::string& host, const std::string& content) { GURL::Replacements replace_host; replace_host.SetHostStr(host); response_content_ = content; ui_test_utils::NavigateToURLWithPost( browser(), embedded_test_server()->base_url().ReplaceComponents(replace_host)); } scoped_ptr HandleRequest(const net::test_server::HttpRequest& request) { scoped_ptr http_response( new net::test_server::BasicHttpResponse()); http_response->set_code(net::HTTP_OK); http_response->set_content_type("text/html"); http_response->set_content(response_content_); return http_response.Pass(); } std::string response_content_; scoped_ptr scorer_; scoped_ptr classifier_; MockFeatureExtractorClock* clock_; // Owned by classifier_. // Features that are in the model. const std::string url_tld_token_net_; const std::string page_link_domain_phishing_; const std::string page_term_login_; }; // This test flakes on Mac with force compositing mode. // http://crbug.com/316709 #if defined(OS_MACOSX) #define MAYBE_TestClassification DISABLED_TestClassification #else #define MAYBE_TestClassification TestClassification #endif IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_TestClassification) { host_resolver()->AddRule("*", "127.0.0.1"); // No scorer yet, so the classifier is not ready. ASSERT_FALSE(classifier_->is_ready()); // Now set the scorer. classifier_->set_phishing_scorer(scorer_.get()); ASSERT_TRUE(classifier_->is_ready()); // This test doesn't exercise the extraction timing. EXPECT_CALL(*clock_, Now()) .WillRepeatedly(::testing::Return(base::TimeTicks::Now())); base::string16 page_text = base::ASCIIToUTF16("login"); float phishy_score; FeatureMap features; LoadHtml("host.net", "login"); EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features)); // Note: features.features() might contain other features that simply aren't // in the model. EXPECT_THAT(features.features(), AllOf(Contains(Pair(url_tld_token_net_, 1.0)), Contains(Pair(page_link_domain_phishing_, 1.0)), Contains(Pair(page_term_login_, 1.0)))); EXPECT_FLOAT_EQ(0.5, phishy_score); // Change the link domain to something non-phishy. LoadHtml("host.net", "login"); EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); EXPECT_THAT(features.features(), AllOf(Contains(Pair(url_tld_token_net_, 1.0)), Contains(Pair(page_term_login_, 1.0)))); EXPECT_THAT(features.features(), Not(Contains(Pair(page_link_domain_phishing_, 1.0)))); EXPECT_GE(phishy_score, 0.0); EXPECT_LT(phishy_score, 0.5); // Extraction should fail for this case since there is no TLD. LoadHtml("localhost", "content"); EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); EXPECT_EQ(0U, features.features().size()); EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); // Extraction should also fail for this case because the URL is not http. net::SpawnedTestServer https_server( net::SpawnedTestServer::TYPE_HTTPS, net::SpawnedTestServer::kLocalhost, base::FilePath(FILE_PATH_LITERAL("chrome/test/data"))); ASSERT_TRUE(https_server.Start()); GURL::Replacements replace_host; replace_host.SetHostStr("host.net"); GURL test_url = https_server.GetURL("/files/title1.html"); ui_test_utils::NavigateToURL(browser(), test_url.ReplaceComponents(replace_host)); EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); EXPECT_EQ(0U, features.features().size()); EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); // Extraction should fail for this case because the URL is a POST request. LoadHtmlPost("host.net", "content"); EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); EXPECT_EQ(0U, features.features().size()); EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); } // Test flakes with LSAN enabled. See http://crbug.com/373155. #if defined(LEAK_SANITIZER) #define MAYBE_DisableDetection DISABLED_DisableDetection #else #define MAYBE_DisableDetection DisableDetection #endif IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_DisableDetection) { // No scorer yet, so the classifier is not ready. EXPECT_FALSE(classifier_->is_ready()); // Now set the scorer. classifier_->set_phishing_scorer(scorer_.get()); EXPECT_TRUE(classifier_->is_ready()); // Set a NULL scorer, which turns detection back off. classifier_->set_phishing_scorer(NULL); EXPECT_FALSE(classifier_->is_ready()); } } // namespace safe_browsing