summaryrefslogtreecommitdiffstats
path: root/chrome/browser/safe_browsing/client_side_detection_service.h
blob: fe9ec69922767c1c245c107e3addc62e3442a988 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Helper class which handles communication with the SafeBrowsing backends for
// client-side phishing detection.  This class is used to fetch the client-side
// model and send it to all renderers.  This class is also used to send a ping
// back to Google to verify if a particular site is really phishing or not.
//
// This class is not thread-safe and expects all calls to be made on the UI
// thread.  We also expect that the calling thread runs a message loop.

#ifndef CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
#define CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_

#include <map>
#include <queue>
#include <set>
#include <string>
#include <utility>
#include <vector>

#include "base/basictypes.h"
#include "base/callback_forward.h"
#include "base/gtest_prod_util.h"
#include "base/memory/linked_ptr.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h"
#include "base/memory/weak_ptr.h"
#include "base/time/time.h"
#include "content/public/browser/notification_observer.h"
#include "content/public/browser/notification_registrar.h"
#include "net/base/net_util.h"
#include "net/url_request/url_fetcher_delegate.h"
#include "url/gurl.h"

class SafeBrowsingService;

namespace base {
class TimeDelta;
}

namespace content {
class RenderProcessHost;
}

namespace net {
class URLFetcher;
class URLRequestContextGetter;
class URLRequestStatus;
typedef std::vector<std::string> ResponseCookies;
}  // namespace net

namespace safe_browsing {
class ClientMalwareRequest;
class ClientPhishingRequest;
class ClientPhishingResponse;
class ClientSideModel;

class ClientSideDetectionService : public net::URLFetcherDelegate,
                                   public content::NotificationObserver {
 public:
  // void(GURL phishing_url, bool is_phishing).
  typedef base::Callback<void(GURL, bool)> ClientReportPhishingRequestCallback;
  // void(GURL original_url, GURL malware_url, bool is_malware).
  typedef base::Callback<void(GURL, GURL, bool)>
      ClientReportMalwareRequestCallback;

  virtual ~ClientSideDetectionService();

  // Creates a client-side detection service.  The service is initially
  // disabled, use SetEnabledAndRefreshState() to start it.  The caller takes
  // ownership of the object.  This function may return NULL.
  static ClientSideDetectionService* Create(
      net::URLRequestContextGetter* request_context_getter);

  // Enables or disables the service, and refreshes the state of all renderers.
  // This is usually called by the SafeBrowsingService, which tracks whether
  // any profile uses these services at all.  Disabling cancels any pending
  // requests; existing ClientSideDetectionHosts will have their callbacks
  // called with "false" verdicts.  Enabling starts downloading the model after
  // a delay.  In all cases, each render process is updated to match the state
  // of the SafeBrowsing preference for that profile.
  void SetEnabledAndRefreshState(bool enabled);

  bool enabled() const {
    return enabled_;
  }

  // From the net::URLFetcherDelegate interface.
  virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;

  // content::NotificationObserver overrides:
  virtual void Observe(int type,
                       const content::NotificationSource& source,
                       const content::NotificationDetails& details) OVERRIDE;

  // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest.
  // The URL scheme of the |url()| in the request should be HTTP.  This method
  // takes ownership of the |verdict| as well as the |callback| and calls the
  // the callback once the result has come back from the server or if an error
  // occurs during the fetch.  If the service is disabled or an error occurs
  // the phishing verdict will always be false.  The callback is always called
  // after SendClientReportPhishingRequest() returns and on the same thread as
  // SendClientReportPhishingRequest() was called.  You may set |callback| to
  // NULL if you don't care about the server verdict.
  virtual void SendClientReportPhishingRequest(
      ClientPhishingRequest* verdict,
      const ClientReportPhishingRequestCallback& callback);

  // Similar to above one, instead send ClientMalwareRequest
  virtual void SendClientReportMalwareRequest(
      ClientMalwareRequest* verdict,
      const ClientReportMalwareRequestCallback& callback);

  // Returns true if the given IP address string falls within a private
  // (unroutable) network block.  Pages which are hosted on these IP addresses
  // are exempt from client-side phishing detection.  This is called by the
  // ClientSideDetectionHost prior to sending the renderer a
  // SafeBrowsingMsg_StartPhishingDetection IPC.
  //
  // ip_address should be a dotted IPv4 address, or an unbracketed IPv6
  // address.
  virtual bool IsPrivateIPAddress(const std::string& ip_address) const;

  // Returns true and sets is_phishing if url is in the cache and valid.
  virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing);

  // Returns true if the url is in the cache.
  virtual bool IsInCache(const GURL& url);

  // Returns true if we have sent more than kMaxReportsPerInterval phishing
  // reports in the last kReportsInterval.
  virtual bool OverPhishingReportLimit();

  // Returns true if we have sent more than kMaxReportsPerInterval malware
  // reports in the last kReportsInterval.
  virtual bool OverMalwareReportLimit();

 protected:
  // Use Create() method to create an instance of this object.
  explicit ClientSideDetectionService(
      net::URLRequestContextGetter* request_context_getter);

  // Enum used to keep stats about why we fail to get the client model.
  enum ClientModelStatus {
    MODEL_SUCCESS,
    MODEL_NOT_CHANGED,
    MODEL_FETCH_FAILED,
    MODEL_EMPTY,
    MODEL_TOO_LARGE,
    MODEL_PARSE_ERROR,
    MODEL_MISSING_FIELDS,
    MODEL_INVALID_VERSION_NUMBER,
    MODEL_BAD_HASH_IDS,
    MODEL_STATUS_MAX  // Always add new values before this one.
  };

  // Starts fetching the model from the network or the cache.  This method
  // is called periodically to check whether a new client model is available
  // for download.
  void StartFetchModel();

  // Schedules the next fetch of the model.
  virtual void ScheduleFetchModel(int64 delay_ms);  // Virtual for testing.

  // This method is called when we're done fetching the model either because
  // we hit an error somewhere or because we're actually done fetch and
  // validating the model.
  virtual void EndFetchModel(ClientModelStatus status);  // Virtual for testing.

 private:
  friend class ClientSideDetectionServiceTest;
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, FetchModelTest);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, SetBadSubnets);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                           SetEnabledAndRefreshState);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, IsBadIpAddress);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                           ModelHasValidHashIds);

  // CacheState holds all information necessary to respond to a caller without
  // actually making a HTTP request.
  struct CacheState {
    bool is_phishing;
    base::Time timestamp;

    CacheState(bool phish, base::Time time);
  };
  typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache;

  // A tuple of (IP address block, prefix size) representing a private
  // IP address range.
  typedef std::pair<net::IPAddressNumber, size_t> AddressRange;

  // Maps a IPv6 subnet mask to a set of hashed IPv6 subnets.  The IPv6
  // subnets are in network order and hashed with sha256.
  typedef std::map<std::string /* subnet mask */,
                   std::set<std::string /* hashed subnet */> > BadSubnetMap;

  static const char kClientReportMalwareUrl[];
  static const char kClientReportPhishingUrl[];
  static const char kClientModelUrl[];
  static const size_t kMaxModelSizeBytes;
  static const int kMaxReportsPerInterval;
  static const int kClientModelFetchIntervalMs;
  static const int kInitialClientModelFetchDelayMs;
  static const int kReportsIntervalDays;
  static const int kNegativeCacheIntervalDays;
  static const int kPositiveCacheIntervalMinutes;

  // Starts sending the request to the client-side detection frontends.
  // This method takes ownership of both pointers.
  void StartClientReportPhishingRequest(
      ClientPhishingRequest* verdict,
      const ClientReportPhishingRequestCallback& callback);

  void StartClientReportMalwareRequest(
      ClientMalwareRequest* verdict,
      const ClientReportMalwareRequestCallback& callback);

  // Called by OnURLFetchComplete to handle the response from fetching the
  // model.
  void HandleModelResponse(const net::URLFetcher* source,
                           const GURL& url,
                           const net::URLRequestStatus& status,
                           int response_code,
                           const net::ResponseCookies& cookies,
                           const std::string& data);

  // Called by OnURLFetchComplete to handle the server response from
  // sending the client-side phishing request.
  void HandlePhishingVerdict(const net::URLFetcher* source,
                             const GURL& url,
                             const net::URLRequestStatus& status,
                             int response_code,
                             const net::ResponseCookies& cookies,
                             const std::string& data);

  // Called by OnURLFetchComplete to handle the server response from
  // sending the client-side malware request.
  void HandleMalwareVerdict(const net::URLFetcher* source,
                            const GURL& url,
                            const net::URLRequestStatus& status,
                            int response_code,
                            const net::ResponseCookies& cookies,
                            const std::string& data);

  // Invalidate cache results which are no longer useful.
  void UpdateCache();

  // Get the number of malware reports that we have sent over kReportsInterval.
  int GetMalwareNumReports();

  // Get the number of phishing reports that we have sent over kReportsInterval.
  int GetPhishingNumReports();

  // Get the number of reports that we have sent over kReportsInterval, and
  // trims off the old elements.
  int GetNumReports(std::queue<base::Time>* report_times);

  // Initializes the |private_networks_| vector with the network blocks
  // that we consider non-public IP addresses.  Returns true on success.
  bool InitializePrivateNetworks();

  // Send the model to the given renderer.
  void SendModelToProcess(content::RenderProcessHost* process);

  // Same as above but sends the model to all rendereres.
  void SendModelToRenderers();

  // Reads the bad subnets from the client model and inserts them into
  // |bad_subnets| for faster lookups.  This method is static to simplify
  // testing.
  static void SetBadSubnets(const ClientSideModel& model,
                            BadSubnetMap* bad_subnets);


  // Returns true iff all the hash id's in the client-side model point to
  // valid hashes in the model.
  static bool ModelHasValidHashIds(const ClientSideModel& model);

  // Returns the URL that will be used for phishing requests.
  static GURL GetClientReportUrl(const std::string& report_url);

  // Whether the service is running or not.  When the service is not running,
  // it won't download the model nor report detected phishing URLs.
  bool enabled_;

  std::string model_str_;
  scoped_ptr<ClientSideModel> model_;
  scoped_ptr<base::TimeDelta> model_max_age_;
  scoped_ptr<net::URLFetcher> model_fetcher_;

  // Map of client report phishing request to the corresponding callback that
  // has to be invoked when the request is done.
  struct ClientReportInfo;
  std::map<const net::URLFetcher*, ClientReportInfo*>
      client_phishing_reports_;
  // Map of client malware ip request to the corresponding callback that
  // has to be invoked when the request is done.
  struct ClientMalwareReportInfo;
  std::map<const net::URLFetcher*, ClientMalwareReportInfo*>
      client_malware_reports_;

  // Cache of completed requests. Used to satisfy requests for the same urls
  // as long as the next request falls within our caching window (which is
  // determined by kNegativeCacheInterval and kPositiveCacheInterval). The
  // size of this cache is limited by kMaxReportsPerDay *
  // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))).
  // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
  PhishingCache cache_;

  // Timestamp of when we sent a phishing request. Used to limit the number
  // of phishing requests that we send in a day.
  // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
  std::queue<base::Time> phishing_report_times_;

  // Timestamp of when we sent a malware request. Used to limit the number
  // of malware requests that we send in a day.
  std::queue<base::Time> malware_report_times_;

  // Used to asynchronously call the callbacks for
  // SendClientReportPhishingRequest.
  base::WeakPtrFactory<ClientSideDetectionService> weak_factory_;

  // The context we use to issue network requests.
  scoped_refptr<net::URLRequestContextGetter> request_context_getter_;

  // The network blocks that we consider private IP address ranges.
  std::vector<AddressRange> private_networks_;

  // Map of bad subnets which are copied from the client model and put into
  // this map to speed up lookups.
  BadSubnetMap bad_subnets_;

  content::NotificationRegistrar registrar_;

  DISALLOW_COPY_AND_ASSIGN(ClientSideDetectionService);
};
}  // namespace safe_browsing

#endif  // CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_