diff options
author | rsesek@chromium.org <rsesek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-06-16 01:25:17 +0000 |
---|---|---|
committer | rsesek@chromium.org <rsesek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-06-16 01:25:17 +0000 |
commit | d865c551d9f6f642f22395a1a73c2b7efb71714f (patch) | |
tree | acd1f08ec1ec9b2b3498e9eeff0f4534c7b0153b /chrome | |
parent | 7ac389e1f2ba30d5d30c71ae99ec577af9baab0f (diff) | |
download | chromium_src-d865c551d9f6f642f22395a1a73c2b7efb71714f.zip chromium_src-d865c551d9f6f642f22395a1a73c2b7efb71714f.tar.gz chromium_src-d865c551d9f6f642f22395a1a73c2b7efb71714f.tar.bz2 |
Revert 89098 - Initial CL to update the client model more frequently.
BUG=None
TEST=None
Review URL: http://codereview.chromium.org/7057025
TBR=noelutz@google.com
Review URL: http://codereview.chromium.org/7185004
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@89283 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
18 files changed, 241 insertions, 516 deletions
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc index 66e0567..7122bd6 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service.cc +++ b/chrome/browser/safe_browsing/client_side_detection_service.cc @@ -8,7 +8,6 @@ #include "base/file_path.h" #include "base/file_util_proxy.h" #include "base/logging.h" -#include "base/time.h" #include "base/memory/scoped_ptr.h" #include "base/message_loop.h" #include "base/metrics/histogram.h" @@ -17,7 +16,6 @@ #include "base/task.h" #include "base/time.h" #include "chrome/common/net/http_return.h" -#include "chrome/common/safe_browsing/client_model.pb.h" #include "chrome/common/safe_browsing/csd.pb.h" #include "chrome/common/safe_browsing/safebrowsing_messages.h" #include "content/browser/browser_thread.h" @@ -27,7 +25,6 @@ #include "googleurl/src/gurl.h" #include "ipc/ipc_platform_file.h" #include "net/base/load_flags.h" -#include "net/http/http_response_headers.h" #include "net/url_request/url_request_context_getter.h" #include "net/url_request/url_request_status.h" @@ -37,12 +34,7 @@ namespace safe_browsing { -const size_t ClientSideDetectionService::kMaxModelSizeBytes = 90 * 1024; const int ClientSideDetectionService::kMaxReportsPerInterval = 3; -// TODO(noelutz): once we know this mechanism works as intended we should fetch -// the model much more frequently. E.g., every 5 minutes or so. -const int ClientSideDetectionService::kClientModelFetchIntervalMs = 3600 * 1000; -const int ClientSideDetectionService::kInitialClientModelFetchDelayMs = 10000; const base::TimeDelta ClientSideDetectionService::kReportsInterval = base::TimeDelta::FromDays(1); @@ -56,8 +48,11 @@ const char ClientSideDetectionService::kClientReportPhishingUrl[] = // Note: when updatng the model version, don't forget to change the filename // in chrome/common/chrome_constants.cc as well, or else existing users won't // download the new model. +// +// TODO(bryner): add version metadata so that clients can download new models +// without needing a new model filename. const char ClientSideDetectionService::kClientModelUrl[] = - "https://ssl.gstatic.com/safebrowsing/csd/client_model_v2.pb"; + "https://ssl.gstatic.com/safebrowsing/csd/client_model_v1.pb"; struct ClientSideDetectionService::ClientReportInfo { scoped_ptr<ClientReportPhishingRequestCallback> callback; @@ -72,10 +67,8 @@ ClientSideDetectionService::ClientSideDetectionService( const FilePath& model_path, net::URLRequestContextGetter* request_context_getter) : model_path_(model_path), + model_status_(UNKNOWN_STATUS), model_file_(base::kInvalidPlatformFileValue), - model_version_(-1), - tmp_model_file_(base::kInvalidPlatformFileValue), - tmp_model_version_(-1), ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)), request_context_getter_(request_context_getter) { @@ -88,7 +81,7 @@ ClientSideDetectionService::~ClientSideDetectionService() { STLDeleteContainerPairPointers(client_phishing_reports_.begin(), client_phishing_reports_.end()); client_phishing_reports_.clear(); - CloseModelFile(&model_file_); + CloseModelFile(); } /* static */ @@ -102,27 +95,28 @@ ClientSideDetectionService* ClientSideDetectionService::Create( UMA_HISTOGRAM_COUNTS("SBClientPhishing.InitPrivateNetworksFailed", 1); return NULL; } - // We fetch the model at every browser restart. In a lot of cases the model - // will be in the cache so it won't actually be fetched from the network. - // We delay the first model fetch to avoid slowing down browser startup. - MessageLoop::current()->PostDelayedTask( - FROM_HERE, - service->method_factory_.NewRunnableMethod( - &ClientSideDetectionService::StartFetchModel), - kInitialClientModelFetchDelayMs); - // Delete the previous-version model files. + // We try to open the model file right away and start fetching it if + // it does not already exist on disk. + base::FileUtilProxy::CreateOrOpenCallback* cb = + service.get()->callback_factory_.NewCallback( + &ClientSideDetectionService::OpenModelFileDone); + if (!base::FileUtilProxy::CreateOrOpen( + BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), + model_path, + base::PLATFORM_FILE_OPEN | base::PLATFORM_FILE_READ, + cb)) { + delete cb; + return NULL; + } + + // Delete the previous-version model file. // TODO(bryner): Remove this for M14. base::FileUtilProxy::Delete( BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), model_path.DirName().AppendASCII("Safe Browsing Phishing Model"), false /* not recursive */, NULL /* not interested in result */); - base::FileUtilProxy::Delete( - BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), - model_path.DirName().AppendASCII("Safe Browsing Phishing Model v1"), - false /* not recursive */, - NULL /* not interested in result */); return service.release(); } @@ -176,8 +170,12 @@ void ClientSideDetectionService::OnURLFetchComplete( void ClientSideDetectionService::Observe(NotificationType type, const NotificationSource& source, const NotificationDetails& details) { - DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); DCHECK(type == NotificationType::RENDERER_PROCESS_CREATED); + if (model_status_ == UNKNOWN_STATUS) { + // The model isn't ready. When it's known, we'll call all renderers. + return; + } + RenderProcessHost* process = Source<RenderProcessHost>(source).ptr(); SendModelToProcess(process); } @@ -185,8 +183,8 @@ void ClientSideDetectionService::Observe(NotificationType type, void ClientSideDetectionService::SendModelToProcess( RenderProcessHost* process) { if (model_file_ == base::kInvalidPlatformFileValue) - // Model might not be ready or maybe there was an error. return; + IPC::PlatformFileForTransit file; #if defined(OS_POSIX) file = base::FileDescriptor(model_file_, false); @@ -198,184 +196,96 @@ void ClientSideDetectionService::SendModelToProcess( process->Send(new SafeBrowsingMsg_SetPhishingModel(file)); } -void ClientSideDetectionService::SendModelToRenderers() { +void ClientSideDetectionService::SetModelStatus(ModelStatus status) { + DCHECK_NE(READY_STATUS, model_status_); + model_status_ = status; + for (RenderProcessHost::iterator i(RenderProcessHost::AllHostsIterator()); !i.IsAtEnd(); i.Advance()) { RenderProcessHost* process = i.GetCurrentValue(); - if (process->GetHandle()) { + if (process->GetHandle()) SendModelToProcess(process); - } } } -void ClientSideDetectionService::StartFetchModel() { - // Start fetching the model either from the cache or possibly from the - // network if the model isn't in the cache. - model_fetcher_.reset(URLFetcher::Create(0 /* ID is not used */, - GURL(kClientModelUrl), - URLFetcher::GET, - this)); - model_fetcher_->set_request_context(request_context_getter_.get()); - model_fetcher_->Start(); -} - -void ClientSideDetectionService::EndFetchModel(ClientModelStatus status) { - UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.ClientModelStatus", - status, - MODEL_STATUS_MAX); - // If there is already a valid model but we're unable to reload one - // we leave the old model. - if (status == MODEL_SUCCESS) { - base::PlatformFile old_file = model_file_; - // Replace the model file and version; - model_file_ = tmp_model_file_; - model_version_ = tmp_model_version_; - // Now we can safely close the old model file. - CloseModelFile(&old_file); - SendModelToRenderers(); - } else { - CloseModelFile(&tmp_model_file_); - // Delete the temporary model file if necessay. - if (!tmp_model_path_.empty()) { - base::FileUtilProxy::Delete( - BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), - tmp_model_path_, - false /* recursive */, - NULL); - } - } - // Delete up the temporary state. - tmp_model_path_.clear(); - tmp_model_string_.clear(); - - int delay_ms = kClientModelFetchIntervalMs; - // If the most recently fetched model had a valid max-age and the model was - // valid we're scheduling the next model update for after the max-age expired. - if (tmp_model_max_age_.get() && - (status == MODEL_SUCCESS || status == MODEL_NOT_CHANGED)) { - // We're adding 60s of additional delay to make sure we're past - // the model's age. - *tmp_model_max_age_ += base::TimeDelta::FromMinutes(1); - delay_ms = tmp_model_max_age_->InMilliseconds(); - } - tmp_model_max_age_.reset(); - - // Schedule the next model reload. - MessageLoop::current()->PostDelayedTask( - FROM_HERE, - method_factory_.NewRunnableMethod( - &ClientSideDetectionService::StartFetchModel), - delay_ms); -} - -void ClientSideDetectionService::CreateTmpModelFileDone( +void ClientSideDetectionService::OpenModelFileDone( base::PlatformFileError error_code, base::PassPlatformFile file, - FilePath tmp_model_path) { - if (base::PLATFORM_FILE_OK != error_code) { - EndFetchModel(MODEL_CREATE_TMP_FILE_FAILED); - } else { - // Keep this around because we need to rename it later. - tmp_model_path_ = tmp_model_path; - tmp_model_file_ = file.ReleaseValue(); - base::FileUtilProxy::WriteCallback* cb = callback_factory_.NewCallback( - &ClientSideDetectionService::WriteTmpModelFileDone); - if (!base::FileUtilProxy::Write( - BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), - tmp_model_file_, - 0 /* offset */, - tmp_model_string_.data(), tmp_model_string_.size(), - cb)) { - delete cb; - EndFetchModel(MODEL_FILE_UTIL_PROXY_ERROR); - } - } -} - -void ClientSideDetectionService::WriteTmpModelFileDone( - base::PlatformFileError error_code, - int bytes_written) { - if (base::PLATFORM_FILE_OK != error_code || - bytes_written != static_cast<int>(tmp_model_string_.size())) { - EndFetchModel(MODEL_WRITE_TMP_FILE_FAILED); - } else { - // Now we close the writable temporary model file and then we - // reopen it in read only mode. We don't want to send a writable - // file descriptor to the renderer. - base::FileUtilProxy::StatusCallback* cb = - callback_factory_.NewCallback( - &ClientSideDetectionService::CloseTmpModelFileDone); - if (!base::FileUtilProxy::Close( - BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), - tmp_model_file_, - cb)) { - delete cb; - EndFetchModel(MODEL_FILE_UTIL_PROXY_ERROR); - } - } -} - -void ClientSideDetectionService::CloseTmpModelFileDone( - base::PlatformFileError error_code) { - if (base::PLATFORM_FILE_OK != error_code) { - EndFetchModel(MODEL_CLOSE_TMP_FILE_FAILED); + bool created) { + DCHECK(!created); + if (base::PLATFORM_FILE_OK == error_code) { + // The model file already exists. There is no need to fetch the model. + model_file_ = file.ReleaseValue(); + SetModelStatus(READY_STATUS); +#if defined(OS_MACOSX) + base::mac::SetFileBackupExclusion(model_path_); +#endif + } else if (base::PLATFORM_FILE_ERROR_NOT_FOUND == error_code) { + // We need to fetch the model since it does not exist yet. + model_fetcher_.reset(URLFetcher::Create(0 /* ID is not used */, + GURL(kClientModelUrl), + URLFetcher::GET, + this)); + model_fetcher_->set_request_context(request_context_getter_.get()); + model_fetcher_->Start(); } else { - base::FileUtilProxy::CreateOrOpenCallback* cb = - callback_factory_.NewCallback( - &ClientSideDetectionService::ReOpenTmpModelFileDone); - if (!base::FileUtilProxy::CreateOrOpen( - BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), - tmp_model_path_, - base::PLATFORM_FILE_OPEN | base::PLATFORM_FILE_READ, - cb)) { - delete cb; - EndFetchModel(MODEL_FILE_UTIL_PROXY_ERROR); - } + // It is not clear what we should do in this case. For now we simply fail. + // Hopefully, we'll be able to read the model during the next browser + // restart. + SetModelStatus(ERROR_STATUS); } } -void ClientSideDetectionService::ReOpenTmpModelFileDone( +void ClientSideDetectionService::CreateModelFileDone( base::PlatformFileError error_code, base::PassPlatformFile file, bool created) { - if (base::PLATFORM_FILE_OK != error_code) { - EndFetchModel(MODEL_REOPEN_TMP_FILE_FAILED); + model_file_ = file.ReleaseValue(); + base::FileUtilProxy::WriteCallback* cb = callback_factory_.NewCallback( + &ClientSideDetectionService::WriteModelFileDone); + if (!created || + base::PLATFORM_FILE_OK != error_code || + !base::FileUtilProxy::Write( + BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), + model_file_, + 0 /* offset */, tmp_model_string_->data(), tmp_model_string_->size(), + cb)) { + delete cb; + // An error occurred somewhere. We close the model file if necessary and + // then run all the pending callbacks giving them an invalid model file. + CloseModelFile(); + SetModelStatus(ERROR_STATUS); +#if defined(OS_MACOSX) } else { - CloseModelFile(&tmp_model_file_); // Close the writable file handle. - tmp_model_file_ = file.ReleaseValue(); - base::FileUtilProxy::StatusCallback* cb = callback_factory_.NewCallback( - &ClientSideDetectionService::MoveTmpModelFileDone); - if (!base::FileUtilProxy::Move( - BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), - tmp_model_path_, - model_path_, - cb)) { - delete cb; - EndFetchModel(MODEL_FILE_UTIL_PROXY_ERROR); - } + base::mac::SetFileBackupExclusion(model_path_); +#endif } } -void ClientSideDetectionService::MoveTmpModelFileDone( - base::PlatformFileError error_code) { +void ClientSideDetectionService::WriteModelFileDone( + base::PlatformFileError error_code, + int bytes_written) { if (base::PLATFORM_FILE_OK == error_code) { -#if defined(OS_MACOSX) - base::mac::SetFileBackupExclusion(model_path_); -#endif - EndFetchModel(MODEL_SUCCESS); + SetModelStatus(READY_STATUS); } else { - EndFetchModel(MODEL_MOVE_TMP_FILE_ERROR); + // TODO(noelutz): maybe we should retry writing the model since we + // did already fetch the model? + CloseModelFile(); + SetModelStatus(ERROR_STATUS); } + // Delete the model string that we kept around while we were writing the + // string to disk - we don't need it anymore. + tmp_model_string_.reset(); } -void ClientSideDetectionService::CloseModelFile(base::PlatformFile* file) { - if (*file != base::kInvalidPlatformFileValue) { +void ClientSideDetectionService::CloseModelFile() { + if (model_file_ != base::kInvalidPlatformFileValue) { base::FileUtilProxy::Close( BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), - *file, + model_file_, NULL); } - *file = base::kInvalidPlatformFileValue; + model_file_ = base::kInvalidPlatformFileValue; } void ClientSideDetectionService::StartClientReportPhishingRequest( @@ -420,45 +330,27 @@ void ClientSideDetectionService::HandleModelResponse( int response_code, const net::ResponseCookies& cookies, const std::string& data) { - base::TimeDelta max_age; - if (status.is_success() && RC_REQUEST_OK == response_code && - source->response_headers() && - source->response_headers()->GetMaxAgeValue(&max_age)) { - tmp_model_max_age_.reset(new base::TimeDelta(max_age)); - } - ClientSideModel model; - if (!status.is_success() || RC_REQUEST_OK != response_code) { - EndFetchModel(MODEL_FETCH_FAILED); - } else if (data.empty()) { - EndFetchModel(MODEL_EMPTY); - } else if (data.size() > kMaxModelSizeBytes) { - EndFetchModel(MODEL_TOO_LARGE); - } else if (!model.ParseFromString(data)) { - EndFetchModel(MODEL_PARSE_ERROR); - } else if (!model.IsInitialized() || !model.has_version()) { - EndFetchModel(MODEL_MISSING_FIELDS); - } else if (model.version() < 0 || - (model_version_ > 0 && model.version() < model_version_)) { - EndFetchModel(MODEL_INVALID_VERSION_NUMBER); - } else if (model.version() == model_version_) { - EndFetchModel(MODEL_NOT_CHANGED); - } else { - // The model will be written to a temporary file. In the mean time we - // copy the model string because it has to be accessible after this - // function returns. Once we have written the model to a file we will - // delete the temporary model string. - tmp_model_version_ = model.version(); - tmp_model_string_.assign(data); - base::FileUtilProxy::CreateTemporaryCallback* cb = + if (status.is_success() && RC_REQUEST_OK == response_code) { + // Copy the model because it has to be accessible after this function + // returns. Once we have written the model to a file we will delete the + // temporary model string. TODO(noelutz): don't store the model to disk if + // it's invalid. + tmp_model_string_.reset(new std::string(data)); + base::FileUtilProxy::CreateOrOpenCallback* cb = callback_factory_.NewCallback( - &ClientSideDetectionService::CreateTmpModelFileDone); - if (!base::FileUtilProxy::CreateTemporary( + &ClientSideDetectionService::CreateModelFileDone); + if (!base::FileUtilProxy::CreateOrOpen( BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), - 0 /* no additional file flags */, + model_path_, + base::PLATFORM_FILE_CREATE_ALWAYS | + base::PLATFORM_FILE_WRITE | + base::PLATFORM_FILE_READ, cb)) { delete cb; - EndFetchModel(MODEL_FILE_UTIL_PROXY_ERROR); + SetModelStatus(ERROR_STATUS); } + } else { + SetModelStatus(ERROR_STATUS); } } @@ -578,4 +470,5 @@ bool ClientSideDetectionService::InitializePrivateNetworks() { } return true; } + } // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/client_side_detection_service.h b/chrome/browser/safe_browsing/client_side_detection_service.h index bc0697b..4bf515f 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service.h +++ b/chrome/browser/safe_browsing/client_side_detection_service.h @@ -3,10 +3,9 @@ // found in the LICENSE file. // // Helper class which handles communication with the SafeBrowsing backends for -// client-side phishing detection. This class is used to fetch the client-side -// model and send a file descriptor that points to the model file to all -// renderers. This class is also used to send a ping back to Google to verify -// if a particular site is really phishing or not. +// client-side phishing detection. This class can be used to get a file +// descriptor to the client-side phishing model and also to send a ping back to +// Google to verify if a particular site is really phishing or not. // // This class is not thread-safe and expects all calls to be made on the UI // thread. We also expect that the calling thread runs a message loop and that @@ -41,10 +40,6 @@ class RenderProcessHost; -namespace base { -class TimeDelta; -} - namespace net { class URLRequestContextGetter; class URLRequestStatus; @@ -119,38 +114,17 @@ class ClientSideDetectionService : public URLFetcher::Delegate, const FilePath& model_path, net::URLRequestContextGetter* request_context_getter); - // Enum used to keep stats about why we fail to get the client model. - enum ClientModelStatus { - MODEL_SUCCESS, - MODEL_NOT_CHANGED, - MODEL_FETCH_FAILED, - MODEL_EMPTY, - MODEL_TOO_LARGE, - MODEL_PARSE_ERROR, - MODEL_MISSING_FIELDS, - MODEL_INVALID_VERSION_NUMBER, - MODEL_FILE_UTIL_PROXY_ERROR, - MODEL_CREATE_TMP_FILE_FAILED, - MODEL_WRITE_TMP_FILE_FAILED, - MODEL_CLOSE_TMP_FILE_FAILED, - MODEL_REOPEN_TMP_FILE_FAILED, - MODEL_MOVE_TMP_FILE_ERROR, - MODEL_STATUS_MAX // Always add new values before this one. - }; - - // Starts fetching the model from the network or the cache. This method - // is called periodically to check whether a new client model is available - // for download. - void StartFetchModel(); - - // This method is called when we're done fetching the model either because - // we hit an error somewhere or because we're actually done writing the model - // to the temporary file. - virtual void EndFetchModel(ClientModelStatus status); // Virtual for testing. - private: friend class ClientSideDetectionServiceTest; - FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, FetchModelTest); + + enum ModelStatus { + // It's unclear whether or not the model was already fetched. + UNKNOWN_STATUS, + // Model is fetched and is stored on disk. + READY_STATUS, + // Error occured during fetching or writing. + ERROR_STATUS, + }; // CacheState holds all information necessary to respond to a caller without // actually making a HTTP request. @@ -168,45 +142,40 @@ class ClientSideDetectionService : public URLFetcher::Delegate, static const char kClientReportPhishingUrl[]; static const char kClientModelUrl[]; - static const size_t kMaxModelSizeBytes; static const int kMaxReportsPerInterval; - static const int kClientModelFetchIntervalMs; - static const int kInitialClientModelFetchDelayMs; static const base::TimeDelta kReportsInterval; static const base::TimeDelta kNegativeCacheInterval; static const base::TimeDelta kPositiveCacheInterval; - // Callback that is invoked once the attempt to create the temporary model + // Sets the model status and invokes all the pending callbacks in + // |open_callbacks_| with the current |model_file_| as parameter. + void SetModelStatus(ModelStatus status); + + // Called once the initial open() of the model file is done. If the file + // exists we're done and we can call all the pending callbacks. If the + // file doesn't exist this method will asynchronously fetch the model + // from the server by invoking StartFetchingModel(). + void OpenModelFileDone(base::PlatformFileError error_code, + base::PassPlatformFile file, + bool created); + + // Callback that is invoked once the attempt to create the model // file on disk is done. If the file was created successfully we // start writing the model to disk (asynchronously). Otherwise, we // give up and send an invalid platform file to all the pending callbacks. - void CreateTmpModelFileDone(base::PlatformFileError error_code, - base::PassPlatformFile file, - FilePath tmp_model_path); + void CreateModelFileDone(base::PlatformFileError error_code, + base::PassPlatformFile file, + bool created); // Callback is invoked once we're done writing the model file to disk. - // If everything went well then |tmp_model_file_| is a valid platform file. - // If an error occurs we give up and send an invalid platform file to all the - // pending callbacks. - void WriteTmpModelFileDone(base::PlatformFileError error_code, - int bytes_written); - - // Called when we're done closing the writable model file and we're - // ready to re-open the temporary model file in read-only mode. - void CloseTmpModelFileDone(base::PlatformFileError error_code); - - // Reopen the temporary model file in read-only mode to make sure we don't - // pass a writable file descritor to the renderer. - void ReOpenTmpModelFileDone(base::PlatformFileError error_code, - base::PassPlatformFile file, - bool created); + // If everything went well then |model_file_| is a valid platform file + // that can be sent to all the pending callbacks. If an error occurs + // we give up and send an invalid platform file to all the pending callbacks. + void WriteModelFileDone(base::PlatformFileError error_code, + int bytes_written); - // Callback is invoked when the temporary model file was moved to its final - // destination where the model is stored on disk. - void MoveTmpModelFileDone(base::PlatformFileError error_code); - - // Helper function which closes the given model file if necessary. - void CloseModelFile(base::PlatformFile* file); + // Helper function which closes the |model_file_| if necessary. + void CloseModelFile(); // Starts sending the request to the client-side detection frontends. // This method takes ownership of both pointers. @@ -245,19 +214,11 @@ class ClientSideDetectionService : public URLFetcher::Delegate, // Send the model to the given renderer. void SendModelToProcess(RenderProcessHost* process); - // Same as above but sends the model to all rendereres. - void SendModelToRenderers(); - FilePath model_path_; + ModelStatus model_status_; base::PlatformFile model_file_; - int model_version_; scoped_ptr<URLFetcher> model_fetcher_; - - FilePath tmp_model_path_; - base::PlatformFile tmp_model_file_; - int tmp_model_version_; - std::string tmp_model_string_; - scoped_ptr<base::TimeDelta> tmp_model_max_age_; + scoped_ptr<std::string> tmp_model_string_; // Map of client report phishing request to the corresponding callback that // has to be invoked when the request is done. diff --git a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc index c466dd9..3b5d37f 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc +++ b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc @@ -18,36 +18,15 @@ #include "base/task.h" #include "base/time.h" #include "chrome/browser/safe_browsing/client_side_detection_service.h" -#include "chrome/common/safe_browsing/client_model.pb.h" #include "chrome/common/safe_browsing/csd.pb.h" #include "content/browser/browser_thread.h" #include "content/common/test_url_fetcher_factory.h" #include "content/common/url_fetcher.h" #include "googleurl/src/gurl.h" #include "net/url_request/url_request_status.h" -#include "testing/gmock/include/gmock/gmock.h" #include "testing/gtest/include/gtest/gtest.h" -using ::testing::Mock; - namespace safe_browsing { -namespace { -class MockClientSideDetectionService : public ClientSideDetectionService { - public: - explicit MockClientSideDetectionService(const FilePath& path) - : ClientSideDetectionService(path, NULL) {} - virtual ~MockClientSideDetectionService() {} - - MOCK_METHOD1(EndFetchModel, void(ClientModelStatus)); - - private: - DISALLOW_COPY_AND_ASSIGN(MockClientSideDetectionService); -}; - -ACTION(QuitCurrentMessageLoop) { - MessageLoop::current()->Quit(); -} -} // namespace class ClientSideDetectionServiceTest : public testing::Test { protected: @@ -180,105 +159,6 @@ class ClientSideDetectionServiceTest : public testing::Test { bool is_phishing_; }; -TEST_F(ClientSideDetectionServiceTest, FetchModelTest) { - ScopedTempDir tmp_dir; - ASSERT_TRUE(tmp_dir.CreateUniqueTempDir()); - FilePath model_path = tmp_dir.path().AppendASCII("model"); - // We don't want to use a real service class here because we can't call - // the real EndFetchModel. It would reschedule a reload which might - // make the test flaky. - MockClientSideDetectionService service(model_path); - - // The model fetch failed. - SetModelFetchResponse("blamodel", false /* failure */); - EXPECT_CALL(service, EndFetchModel( - ClientSideDetectionService::MODEL_FETCH_FAILED)) - .WillOnce(QuitCurrentMessageLoop()); - service.StartFetchModel(); - msg_loop_.Run(); // EndFetchModel will quit the message loop. - Mock::VerifyAndClearExpectations(&service); - - // Empty model file. - SetModelFetchResponse("", true /* success */); - EXPECT_CALL(service, EndFetchModel( - ClientSideDetectionService::MODEL_EMPTY)) - .WillOnce(QuitCurrentMessageLoop()); - service.StartFetchModel(); - msg_loop_.Run(); // EndFetchModel will quit the message loop. - Mock::VerifyAndClearExpectations(&service); - - // Model is too large. - SetModelFetchResponse( - std::string(ClientSideDetectionService::kMaxModelSizeBytes + 1, 'x'), - true /* success */); - EXPECT_CALL(service, EndFetchModel( - ClientSideDetectionService::MODEL_TOO_LARGE)) - .WillOnce(QuitCurrentMessageLoop()); - service.StartFetchModel(); - msg_loop_.Run(); // EndFetchModel will quit the message loop. - Mock::VerifyAndClearExpectations(&service); - - // Unable to parse the model file. - SetModelFetchResponse("Invalid model file", true /* success */); - EXPECT_CALL(service, EndFetchModel( - ClientSideDetectionService::MODEL_PARSE_ERROR)) - .WillOnce(QuitCurrentMessageLoop()); - service.StartFetchModel(); - msg_loop_.Run(); // EndFetchModel will quit the message loop. - Mock::VerifyAndClearExpectations(&service); - - // Model that is missing some required fields (missing the version field). - ClientSideModel model; - model.set_max_words_per_term(4); - SetModelFetchResponse(model.SerializePartialAsString(), true /* success */); - EXPECT_CALL(service, EndFetchModel( - ClientSideDetectionService::MODEL_MISSING_FIELDS)) - .WillOnce(QuitCurrentMessageLoop()); - service.StartFetchModel(); - msg_loop_.Run(); // EndFetchModel will quit the message loop. - Mock::VerifyAndClearExpectations(&service); - - // Model version number is wrong. - model.set_version(-1); - SetModelFetchResponse(model.SerializeAsString(), true /* success */); - EXPECT_CALL(service, EndFetchModel( - ClientSideDetectionService::MODEL_INVALID_VERSION_NUMBER)) - .WillOnce(QuitCurrentMessageLoop()); - service.StartFetchModel(); - msg_loop_.Run(); // EndFetchModel will quit the message loop. - Mock::VerifyAndClearExpectations(&service); - - // Normal model. - model.set_version(10); - SetModelFetchResponse(model.SerializeAsString(), true /* success */); - EXPECT_CALL(service, EndFetchModel( - ClientSideDetectionService::MODEL_SUCCESS)) - .WillOnce(QuitCurrentMessageLoop()); - service.StartFetchModel(); - msg_loop_.Run(); // EndFetchModel will quit the message loop. - Mock::VerifyAndClearExpectations(&service); - - // Model version number is decreasing. - service.model_version_ = 11; - SetModelFetchResponse(model.SerializeAsString(), true /* success */); - EXPECT_CALL(service, EndFetchModel( - ClientSideDetectionService::MODEL_INVALID_VERSION_NUMBER)) - .WillOnce(QuitCurrentMessageLoop()); - service.StartFetchModel(); - msg_loop_.Run(); // EndFetchModel will quit the message loop. - Mock::VerifyAndClearExpectations(&service); - - // Model version hasn't changed since the last reload. - service.model_version_ = 10; - SetModelFetchResponse(model.SerializeAsString(), true /* success */); - EXPECT_CALL(service, EndFetchModel( - ClientSideDetectionService::MODEL_NOT_CHANGED)) - .WillOnce(QuitCurrentMessageLoop()); - service.StartFetchModel(); - msg_loop_.Run(); // EndFetchModel will quit the message loop. - Mock::VerifyAndClearExpectations(&service); -} - TEST_F(ClientSideDetectionServiceTest, ServiceObjectDeletedBeforeCallbackDone) { SetModelFetchResponse("bogus model", true /* success */); ScopedTempDir tmp_dir; diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index 7985685..721a56a 100644 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -22,7 +22,6 @@ 'platform_locale_settings', 'profile_import', 'safe_browsing_csd_proto', - 'safe_browsing_proto', 'safe_browsing_report_proto', # TODO(sync): Make browser not depend on syncapi_core directly. 'syncapi_core', diff --git a/chrome/chrome_common.gypi b/chrome/chrome_common.gypi index 7580c3d..ca0dbd0 100644 --- a/chrome/chrome_common.gypi +++ b/chrome/chrome_common.gypi @@ -92,9 +92,6 @@ 'direct_dependent_settings': { 'include_dirs': [ '..', - # Allow other targets to depend on common for the SafeBrowsing - # protobuf generated targets. - '<(protoc_out_dir)', ], }, 'dependencies': [ @@ -108,7 +105,6 @@ 'common_net', 'default_plugin/default_plugin.gyp:default_plugin', 'safe_browsing_csd_proto', - 'safe_browsing_proto', 'theme_resources', 'theme_resources_standard', '../app/app.gyp:app_base', @@ -126,7 +122,6 @@ '../third_party/icu/icu.gyp:icui18n', '../third_party/icu/icu.gyp:icuuc', '../third_party/libxml/libxml.gyp:libxml', - '../third_party/protobuf/protobuf.gyp:protobuf_lite', '../third_party/sqlite/sqlite.gyp:sqlite', '../third_party/zlib/zlib.gyp:zlib', '../webkit/support/webkit_support.gyp:glue', @@ -219,8 +214,6 @@ 'common/random.h', 'common/render_messages.cc', 'common/render_messages.h', - '<(protoc_out_dir)/chrome/common/safe_browsing/client_model.pb.cc', - '<(protoc_out_dir)/chrome/common/safe_browsing/client_model.pb.h', '<(protoc_out_dir)/chrome/common/safe_browsing/csd.pb.cc', '<(protoc_out_dir)/chrome/common/safe_browsing/csd.pb.h', 'common/search_provider.h', @@ -343,7 +336,6 @@ 'export_dependent_settings': [ '../app/app.gyp:app_base', '../base/base.gyp:base', - '../third_party/protobuf/protobuf.gyp:protobuf_lite', ], }, { @@ -409,52 +401,6 @@ ], }, { - # Protobuf compiler / generator for the safebrowsing client model proto. - 'target_name': 'safe_browsing_proto', - 'type': 'none', - 'sources': [ 'common/safe_browsing/client_model.proto' ], - 'rules': [ - { - 'rule_name': 'genproto', - 'extension': 'proto', - 'inputs': [ - '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)protoc<(EXECUTABLE_SUFFIX)', - ], - 'variables': { - # The protoc compiler requires a proto_path argument with the - # directory containing the .proto file. - # There's no generator variable that corresponds to this, so fake - # it. - 'rule_input_relpath': 'common/safe_browsing', - }, - 'outputs': [ - '<(protoc_out_dir)/chrome/<(rule_input_relpath)/<(RULE_INPUT_ROOT).pb.h', - '<(protoc_out_dir)/chrome/<(rule_input_relpath)/<(RULE_INPUT_ROOT).pb.cc', - ], - 'action': [ - '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)protoc<(EXECUTABLE_SUFFIX)', - '--proto_path=./<(rule_input_relpath)', - './<(rule_input_relpath)/<(RULE_INPUT_ROOT)<(RULE_INPUT_EXT)', - '--cpp_out=<(protoc_out_dir)/chrome/<(rule_input_relpath)', - ], - 'message': 'Generating C++ code from <(RULE_INPUT_PATH)', - }, - ], - 'dependencies': [ - '../third_party/protobuf/protobuf.gyp:protobuf_lite', - '../third_party/protobuf/protobuf.gyp:protoc#host', - ], - 'direct_dependent_settings': { - 'include_dirs': [ - '<(protoc_out_dir)', - ] - }, - 'export_dependent_settings': [ - '../third_party/protobuf/protobuf.gyp:protobuf_lite', - ], - 'hard_dependency': 1, - }, - { # Protobuf compiler / generator for the safebrowsing client-side detection # (csd) request protocol buffer which is used both in the renderer and in # the browser. diff --git a/chrome/chrome_renderer.gypi b/chrome/chrome_renderer.gypi index 09529f6..8f17d6b 100644 --- a/chrome/chrome_renderer.gypi +++ b/chrome/chrome_renderer.gypi @@ -13,7 +13,6 @@ 'common_net', 'chrome_resources', 'chrome_strings', - 'safe_browsing_csd_proto', 'safe_browsing_proto', '../content/content.gyp:content_renderer', '../content/content.gyp:content_plugin', @@ -137,6 +136,9 @@ 'renderer/print_web_view_helper_win.cc', 'renderer/renderer_histogram_snapshots.cc', 'renderer/renderer_histogram_snapshots.h', + # TODO(noelutz): Find a better way to include these files + '<(protoc_out_dir)/chrome/renderer/safe_browsing/client_model.pb.cc', + '<(protoc_out_dir)/chrome/renderer/safe_browsing/client_model.pb.h', 'renderer/safe_browsing/feature_extractor_clock.cc', 'renderer/safe_browsing/feature_extractor_clock.h', 'renderer/safe_browsing/features.cc', @@ -223,6 +225,50 @@ }], ], }, + { + # Protobuf compiler / generator for the safebrowsing client model proto. + 'target_name': 'safe_browsing_proto', + 'type': 'none', + 'sources': [ 'renderer/safe_browsing/client_model.proto' ], + 'rules': [ + { + 'rule_name': 'genproto', + 'extension': 'proto', + 'inputs': [ + '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)protoc<(EXECUTABLE_SUFFIX)', + ], + 'variables': { + # The protoc compiler requires a proto_path argument with the + # directory containing the .proto file. + # There's no generator variable that corresponds to this, so fake it. + 'rule_input_relpath': 'renderer/safe_browsing', + }, + 'outputs': [ + '<(protoc_out_dir)/chrome/<(rule_input_relpath)/<(RULE_INPUT_ROOT).pb.h', + '<(protoc_out_dir)/chrome/<(rule_input_relpath)/<(RULE_INPUT_ROOT).pb.cc', + ], + 'action': [ + '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)protoc<(EXECUTABLE_SUFFIX)', + '--proto_path=./<(rule_input_relpath)', + './<(rule_input_relpath)/<(RULE_INPUT_ROOT)<(RULE_INPUT_EXT)', + '--cpp_out=<(protoc_out_dir)/chrome/<(rule_input_relpath)', + ], + 'message': 'Generating C++ code from <(RULE_INPUT_PATH)', + }, + ], + 'dependencies': [ + '../third_party/protobuf/protobuf.gyp:protobuf_lite', + '../third_party/protobuf/protobuf.gyp:protoc#host', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + '<(protoc_out_dir)', + ] + }, + 'export_dependent_settings': [ + '../third_party/protobuf/protobuf.gyp:protobuf_lite', + ], + }, ], } diff --git a/chrome/common/chrome_constants.cc b/chrome/common/chrome_constants.cc index ccf28fd..2a90072 100644 --- a/chrome/common/chrome_constants.cc +++ b/chrome/common/chrome_constants.cc @@ -92,7 +92,7 @@ const FilePath::CharType kLocalStateFilename[] = FPL("Local State"); const FilePath::CharType kPreferencesFilename[] = FPL("Preferences"); const FilePath::CharType kSafeBrowsingBaseFilename[] = FPL("Safe Browsing"); const FilePath::CharType kSafeBrowsingPhishingModelFilename[] = - FPL("Safe Browsing Phishing Model v2"); + FPL("Safe Browsing Phishing Model v1"); const FilePath::CharType kSingletonCookieFilename[] = FPL("SingletonCookie"); const FilePath::CharType kSingletonSocketFilename[] = FPL("SingletonSocket"); const FilePath::CharType kSingletonLockFilename[] = FPL("SingletonLock"); diff --git a/chrome/common/safe_browsing/csd.proto b/chrome/common/safe_browsing/csd.proto index e4b3fbe..6b737b30 100644 --- a/chrome/common/safe_browsing/csd.proto +++ b/chrome/common/safe_browsing/csd.proto @@ -43,10 +43,6 @@ message ClientPhishingRequest { // List of features that were extracted. Those are the features that were // sent to the scorer and which resulted in client_score being computed. repeated Feature feature_map = 5; - - // The version number of the model that was used to compute the client-score. - // Copied from ClientSideModel.version(). - optional int32 model_version = 6; } message ClientPhishingResponse { diff --git a/chrome/common/safe_browsing/client_model.proto b/chrome/renderer/safe_browsing/client_model.proto index aa704c3..0a224dc 100644 --- a/chrome/common/safe_browsing/client_model.proto +++ b/chrome/renderer/safe_browsing/client_model.proto @@ -68,9 +68,4 @@ message ClientSideModel { // Page terms in page_term contain at most this many page words. required int32 max_words_per_term = 5; - - // Model version number. Every model that we train should have a different - // version number and it should always be larger than the previous model - // version. - optional int32 version = 6; } diff --git a/chrome/renderer/safe_browsing/phishing_classifier.cc b/chrome/renderer/safe_browsing/phishing_classifier.cc index e64cfc3..12e073e 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier.cc @@ -10,7 +10,6 @@ #include "base/compiler_specific.h" #include "base/logging.h" #include "base/message_loop.h" -#include "base/metrics/histogram.h" #include "base/string_util.h" #include "chrome/common/safe_browsing/csd.pb.h" #include "chrome/common/url_constants.h" @@ -50,7 +49,7 @@ PhishingClassifier::~PhishingClassifier() { } void PhishingClassifier::set_phishing_scorer(const Scorer* scorer) { - CheckNoPendingClassification(); + DCHECK(!scorer_); scorer_ = scorer; url_extractor_.reset(new PhishingUrlFeatureExtractor); dom_extractor_.reset( @@ -170,7 +169,6 @@ void PhishingClassifier::TermExtractionFinished(bool success) { // the score. FeatureMap hashed_features; ClientPhishingRequest verdict; - verdict.set_model_version(scorer_->model_version()); verdict.set_url(main_frame->url().spec()); for (base::hash_map<std::string, double>::const_iterator it = features_->features().begin(); @@ -198,8 +196,6 @@ void PhishingClassifier::CheckNoPendingClassification() { if (done_callback_.get() || page_text_) { LOG(ERROR) << "Classification in progress, missing call to " << "CancelPendingClassification"; - UMA_HISTOGRAM_COUNTS("SBClientPhishing.CheckNoPendingClassificationFailed", - 1); } } diff --git a/chrome/renderer/safe_browsing/phishing_classifier.h b/chrome/renderer/safe_browsing/phishing_classifier.h index fef9476..5a66451 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier.h +++ b/chrome/renderer/safe_browsing/phishing_classifier.h @@ -56,9 +56,7 @@ class PhishingClassifier { virtual ~PhishingClassifier(); // Sets a scorer for the classifier to use in computing the phishiness score. - // This must live at least as long as the PhishingClassifier. The caller is - // expected to cancel any pending classification before setting a phishing - // scorer. + // This must live at least as long as the PhishingClassifier. void set_phishing_scorer(const Scorer* scorer); // Returns true if the classifier is ready to classify pages, i.e. it diff --git a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc index e92ed70..9c6c60e 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc @@ -14,8 +14,8 @@ #include "base/memory/scoped_ptr.h" #include "base/string16.h" #include "base/utf_string_conversions.h" -#include "chrome/common/safe_browsing/client_model.pb.h" #include "chrome/common/safe_browsing/csd.pb.h" +#include "chrome/renderer/safe_browsing/client_model.pb.h" #include "chrome/renderer/safe_browsing/features.h" #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/render_view_fake_resources_test.h" diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc index 0795d0e..8523e84 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc @@ -138,21 +138,14 @@ void PhishingClassifierDelegate::SetPhishingScorer( const safe_browsing::Scorer* scorer) { if (!render_view()->webview()) return; // RenderView is tearing down. - if (is_classifying_) { - // If there is a classification going on right now it means we're - // actually replacing an existing scorer with a new model. In - // this case we simply cancel the current classification. - // TODO(noelutz): if this happens too frequently we could also - // replace the old scorer with the new one once classification is done - // but this would complicate the code somewhat. - CancelPendingClassification(NEW_PHISHING_SCORER); - } + classifier_->set_phishing_scorer(scorer); // Start classifying the current page if all conditions are met. // See MaybeStartClassification() for details. MaybeStartClassification(); } + void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) { last_url_received_from_browser_ = StripRef(url); // Start classifying the current page if all conditions are met. diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h index 113d592..f979bb1 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h @@ -75,7 +75,6 @@ class PhishingClassifierDelegate : public RenderViewObserver { NAVIGATE_WITHIN_PAGE, PAGE_RECAPTURED, SHUTDOWN, - NEW_PHISHING_SCORER, CANCEL_CLASSIFICATION_MAX // Always add new values before this one. }; diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc index c50cb65..a59d205 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc @@ -253,12 +253,6 @@ TEST_F(PhishingClassifierDelegateTest, NoScorer) { delegate->SetPhishingScorer(&scorer); Mock::VerifyAndClearExpectations(classifier); - // If we set a new scorer while a classification is going on the - // classification should be cancelled. - EXPECT_CALL(*classifier, CancelPendingClassification()); - delegate->SetPhishingScorer(&scorer); - Mock::VerifyAndClearExpectations(classifier); - // The delegate will cancel pending classification on destruction. EXPECT_CALL(*classifier, CancelPendingClassification()); } diff --git a/chrome/renderer/safe_browsing/scorer.cc b/chrome/renderer/safe_browsing/scorer.cc index 6c22bde..6496dc0 100644 --- a/chrome/renderer/safe_browsing/scorer.cc +++ b/chrome/renderer/safe_browsing/scorer.cc @@ -11,7 +11,7 @@ #include "base/memory/scoped_ptr.h" #include "base/metrics/histogram.h" #include "base/string_piece.h" -#include "chrome/common/safe_browsing/client_model.pb.h" +#include "chrome/renderer/safe_browsing/client_model.pb.h" #include "chrome/renderer/safe_browsing/features.h" namespace { @@ -33,10 +33,10 @@ void RecordScorerCreationStatus(ScorerCreationStatus status) { } } // namespace -static const size_t kPhishingModelMaxSizeBytes = 512 * 1024; - namespace safe_browsing { +const int Scorer::kMaxPhishingModelSizeBytes = 90 * 1024; + // Helper function which converts log odds to a probability in the range // [0.0,1.0]. static double LogOdds2Prob(double log_odds) { @@ -63,6 +63,7 @@ class ScorerLoader { : file_thread_proxy_(file_thread_proxy), model_file_(model_file), creation_callback_(creation_callback) { + memset(buffer_, 0, sizeof(buffer_)); } ~ScorerLoader() {} @@ -74,7 +75,7 @@ class ScorerLoader { file_thread_proxy_, model_file_, 0, // offset - kPhishingModelMaxSizeBytes, + Scorer::kMaxPhishingModelSizeBytes, NewCallback(this, &ScorerLoader::ModelReadDone)); DCHECK(success) << "Unable to post a task to read the phishing model file"; } @@ -87,8 +88,15 @@ class ScorerLoader { if (error_code != base::PLATFORM_FILE_OK) { DLOG(ERROR) << "Error reading phishing model file: " << error_code; RecordScorerCreationStatus(SCORER_FAIL_MODEL_OPEN_FAIL); + } else if (bytes_read <= 0) { + DLOG(ERROR) << "Empty phishing model file"; + RecordScorerCreationStatus(SCORER_FAIL_MODEL_FILE_EMPTY); + } else if (bytes_read == Scorer::kMaxPhishingModelSizeBytes) { + DLOG(ERROR) << "Phishing model is too large, ignoring"; + RecordScorerCreationStatus(SCORER_FAIL_MODEL_FILE_TOO_LARGE); } else { - scorer = Scorer::Create(base::StringPiece(data, bytes_read)); + memcpy(buffer_, data, bytes_read); + scorer = Scorer::Create(base::StringPiece(buffer_, bytes_read)); } RunCallback(scorer); } @@ -102,6 +110,7 @@ class ScorerLoader { scoped_refptr<base::MessageLoopProxy> file_thread_proxy_; base::PlatformFile model_file_; scoped_ptr<Scorer::CreationCallback> creation_callback_; + char buffer_[Scorer::kMaxPhishingModelSizeBytes]; DISALLOW_COPY_AND_ASSIGN(ScorerLoader); }; @@ -115,9 +124,16 @@ Scorer::~Scorer() {} Scorer* Scorer::Create(const base::StringPiece& model_str) { scoped_ptr<Scorer> scorer(new Scorer()); ClientSideModel& model = scorer->model_; - if (!model.ParseFromArray(model_str.data(), model_str.size()) || - !model.IsInitialized()) { - DLOG(ERROR) << "Invalid client model"; + if (!model.ParseFromArray(model_str.data(), model_str.size())) { + DLOG(ERROR) << "Unable to parse phishing model. This Scorer object is " + << "invalid."; + RecordScorerCreationStatus(SCORER_FAIL_MODEL_PARSE_ERROR); + return NULL; + } + if (!model.IsInitialized()) { + DLOG(ERROR) << "Unable to parse phishing model. The model is missing " + << "some required fields. Maybe the .proto file changed?"; + RecordScorerCreationStatus(SCORER_FAIL_MODEL_MISSING_FIELDS); return NULL; } RecordScorerCreationStatus(SCORER_SUCCESS); @@ -149,10 +165,6 @@ double Scorer::ComputeScore(const FeatureMap& features) const { return LogOdds2Prob(logodds); } -int Scorer::model_version() const { - return model_.version(); -} - const base::hash_set<std::string>& Scorer::page_terms() const { return page_terms_; } diff --git a/chrome/renderer/safe_browsing/scorer.h b/chrome/renderer/safe_browsing/scorer.h index 24ebd93..adbf088c3 100644 --- a/chrome/renderer/safe_browsing/scorer.h +++ b/chrome/renderer/safe_browsing/scorer.h @@ -24,7 +24,7 @@ #include "base/message_loop_proxy.h" #include "base/platform_file.h" #include "base/string_piece.h" -#include "chrome/common/safe_browsing/client_model.pb.h" +#include "chrome/renderer/safe_browsing/client_model.pb.h" namespace safe_browsing { class FeatureMap; @@ -56,9 +56,6 @@ class Scorer { // (range is inclusive on both ends). virtual double ComputeScore(const FeatureMap& features) const; - // Returns the version number of the loaded client model. - int model_version() const; - // -- Accessors used by the page feature extractor --------------------------- // Returns a set of hashed page terms that appear in the model in binary @@ -72,6 +69,10 @@ class Scorer { // Return the maximum number of words per term for the loaded model. size_t max_words_per_term() const; + // The maximum size of a client-side phishing model file that we + // expect to load. + static const int kMaxPhishingModelSizeBytes; + protected: // Most clients should use the factory method. This constructor is public // to allow for mock implementations. diff --git a/chrome/renderer/safe_browsing/scorer_unittest.cc b/chrome/renderer/safe_browsing/scorer_unittest.cc index 20a3ff5..65349b4 100644 --- a/chrome/renderer/safe_browsing/scorer_unittest.cc +++ b/chrome/renderer/safe_browsing/scorer_unittest.cc @@ -12,7 +12,7 @@ #include "base/message_loop.h" #include "base/scoped_temp_dir.h" #include "base/threading/thread.h" -#include "chrome/common/safe_browsing/client_model.pb.h" +#include "chrome/renderer/safe_browsing/client_model.pb.h" #include "chrome/renderer/safe_browsing/features.h" #include "testing/gmock/include/gmock/gmock.h" #include "testing/gtest/include/gtest/gtest.h" @@ -146,6 +146,22 @@ TEST_F(PhishingScorerTest, CreateFromFile) { EXPECT_EQ(serialized_model, GetModel(*scorer).SerializeAsString()); base::ClosePlatformFile(model_file); + // Now try with an empty file. + model_file = WriteAndOpenModelFile(model_file_path, ""); + ASSERT_NE(base::kInvalidPlatformFileValue, model_file); + scorer.reset(CreateFromFile(model_file, loader_thread.message_loop_proxy())); + ASSERT_FALSE(scorer.get()); + base::ClosePlatformFile(model_file); + + // Try with a file that's too large. + model_.add_hashes(std::string(Scorer::kMaxPhishingModelSizeBytes, '0')); + model_file = WriteAndOpenModelFile(model_file_path, + model_.SerializeAsString()); + ASSERT_NE(base::kInvalidPlatformFileValue, model_file); + scorer.reset(CreateFromFile(model_file, loader_thread.message_loop_proxy())); + ASSERT_FALSE(scorer.get()); + base::ClosePlatformFile(model_file); + // Finally, try with an invalid file. scorer.reset(CreateFromFile(base::kInvalidPlatformFileValue, loader_thread.message_loop_proxy())); |