summaryrefslogtreecommitdiffstats
path: root/chrome/browser/extensions/activity_log/counting_policy.h
blob: 8c224c684df47e151fb72c1aa83dd3e3dd97ab2b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_BROWSER_EXTENSIONS_ACTIVITY_LOG_COUNTING_POLICY_H_
#define CHROME_BROWSER_EXTENSIONS_ACTIVITY_LOG_COUNTING_POLICY_H_

#include <string>

#include "base/containers/hash_tables.h"
#include "chrome/browser/extensions/activity_log/activity_database.h"
#include "chrome/browser/extensions/activity_log/activity_log_policy.h"
#include "chrome/browser/extensions/activity_log/database_string_table.h"

namespace extensions {

// A policy for logging the stream of actions, but with most arguments stripped
// out (to improve privacy and reduce database size) and with multiple
// identical rows combined together using a count column to track the total
// number of repetitions.  Identical rows within the same day are merged, but
// actions on separate days are kept distinct.  Data is kept for up to a few
// days then deleted.
class CountingPolicy : public ActivityLogDatabasePolicy {
 public:
  explicit CountingPolicy(Profile* profile);
  ~CountingPolicy() override;

  void ProcessAction(scoped_refptr<Action> action) override;

  void ReadFilteredData(
      const std::string& extension_id,
      const Action::ActionType type,
      const std::string& api_name,
      const std::string& page_url,
      const std::string& arg_url,
      const int days_ago,
      const base::Callback<void(scoped_ptr<Action::ActionVector>)>& callback)
      override;

  void Close() override;

  // Gets or sets the amount of time that old records are kept in the database.
  const base::TimeDelta& retention_time() const { return retention_time_; }
  void set_retention_time(const base::TimeDelta& delta) {
    retention_time_ = delta;
  }

  // Remove actions (rows) which IDs are specified in the action_ids array.
  void RemoveActions(const std::vector<int64>& action_ids) override;

  // Clean the URL data stored for this policy.
  void RemoveURLs(const std::vector<GURL>&) override;

  // Clean the data related to this extension for this policy.
  void RemoveExtensionData(const std::string& extension_id) override;

  // Delete everything in the database.
  void DeleteDatabase() override;

  // The main database table, and the name for a read-only view that
  // decompresses string values for easier parsing.
  static const char kTableName[];
  static const char kReadViewName[];

 protected:
  // The ActivityDatabase::Delegate interface.  These are always called from
  // the database thread.
  bool InitDatabase(sql::Connection* db) override;
  bool FlushDatabase(sql::Connection* db) override;
  void OnDatabaseFailure() override;
  void OnDatabaseClose() override;

 private:
  // A type used to track pending writes to the database.  The key is an action
  // to write; the value is the amount by which the count field should be
  // incremented in the database.
  typedef std::map<
      scoped_refptr<Action>,
      int,
      ActionComparatorExcludingTimeAndActionId>
      ActionQueue;

  // Adds an Action to those to be written out; this is an internal method used
  // by ProcessAction and is called on the database thread.
  void QueueAction(scoped_refptr<Action> action);

  // Internal method to read data from the database; called on the database
  // thread.
  scoped_ptr<Action::ActionVector> DoReadFilteredData(
      const std::string& extension_id,
      const Action::ActionType type,
      const std::string& api_name,
      const std::string& page_url,
      const std::string& arg_url,
      const int days_ago);

  // The implementation of RemoveActions; this must only run on the database
  // thread.
  void DoRemoveActions(const std::vector<int64>& action_ids);

  // The implementation of RemoveURLs; this must only run on the database
  // thread.
  void DoRemoveURLs(const std::vector<GURL>& restrict_urls);

  // The implementation of RemoveExtensionData; this must only run on the
  // database thread.
  void DoRemoveExtensionData(const std::string& extension_id);

  // The implementation of DeleteDatabase; called on the database thread.
  void DoDeleteDatabase();

  // Cleans old records from the activity log database.
  bool CleanOlderThan(sql::Connection* db, const base::Time& cutoff);

  // Cleans unused interned strings from the database.  This should be run
  // after deleting rows from the main log table to clean out stale values.
  bool CleanStringTables(sql::Connection* db);

  // API calls for which complete arguments should be logged.
  Util::ApiSet api_arg_whitelist_;

  // Tables for mapping strings to integers for shrinking database storage
  // requirements.  URLs are kept in a separate table from other strings to
  // make history clearing simpler.
  DatabaseStringTable string_table_;
  DatabaseStringTable url_table_;

  // Tracks any pending updates to be written to the database, if write
  // batching is turned on.  Should only be accessed from the database thread.
  ActionQueue queued_actions_;

  // All queued actions must fall on the same day, so that we do not
  // accidentally aggregate actions that should be kept separate.
  // queued_actions_date_ is the date (timestamp at local midnight) of all the
  // actions in queued_actions_.
  base::Time queued_actions_date_;

  // The amount of time old activity log records should be kept in the
  // database.  This time is subtracted from the current time, rounded down to
  // midnight, and rows older than this are deleted from the database when
  // cleaning runs.
  base::TimeDelta retention_time_;

  // The time at which old activity log records were last cleaned out of the
  // database (only tracked for this browser session).  Old records are deleted
  // on the first database flush, and then every 12 hours subsequently.
  base::Time last_database_cleaning_time_;

  friend class CountingPolicyTest;
  FRIEND_TEST_ALL_PREFIXES(CountingPolicyTest, EarlyFlush);
  FRIEND_TEST_ALL_PREFIXES(CountingPolicyTest, MergingAndExpiring);
  FRIEND_TEST_ALL_PREFIXES(CountingPolicyTest, StringTableCleaning);
};

}  // namespace extensions

#endif  // CHROME_BROWSER_EXTENSIONS_ACTIVITY_LOG_COUNTING_POLICY_H_