summaryrefslogtreecommitdiffstats
path: root/components/feedback
diff options
context:
space:
mode:
Diffstat (limited to 'components/feedback')
-rw-r--r--components/feedback/BUILD.gn4
-rw-r--r--components/feedback/DEPS3
-rw-r--r--components/feedback/OWNERS2
-rw-r--r--components/feedback/anonymizer_tool.cc151
-rw-r--r--components/feedback/anonymizer_tool.h52
-rw-r--r--components/feedback/anonymizer_tool_unittest.cc109
-rw-r--r--components/feedback/feedback_common_unittest.cc53
7 files changed, 347 insertions, 27 deletions
diff --git a/components/feedback/BUILD.gn b/components/feedback/BUILD.gn
index f126e05..ec4e9f19 100644
--- a/components/feedback/BUILD.gn
+++ b/components/feedback/BUILD.gn
@@ -4,6 +4,8 @@
source_set("feedback") {
sources = [
+ "anonymizer_tool.cc",
+ "anonymizer_tool.h",
"feedback_common.cc",
"feedback_common.h",
"feedback_data.cc",
@@ -35,6 +37,7 @@ source_set("feedback") {
"//content/public/browser",
"//content/public/common",
"//net",
+ "//third_party/re2",
"//third_party/zlib:zip",
]
}
@@ -42,6 +45,7 @@ source_set("feedback") {
source_set("unit_tests") {
testonly = true
sources = [
+ "anonymizer_tool_unittest.cc",
"feedback_common_unittest.cc",
"feedback_data_unittest.cc",
"feedback_uploader_chrome_unittest.cc",
diff --git a/components/feedback/DEPS b/components/feedback/DEPS
index c88ff4e..3a9dc0b20 100644
--- a/components/feedback/DEPS
+++ b/components/feedback/DEPS
@@ -8,5 +8,6 @@ include_rules = [
"+content/public/test",
"+net/base",
"+net/url_request",
- "+third_party/zlib/google/zip.h",
+ "+third_party/re2",
+ "+third_party/zlib/google",
]
diff --git a/components/feedback/OWNERS b/components/feedback/OWNERS
index 5654d44..5b2bc16 100644
--- a/components/feedback/OWNERS
+++ b/components/feedback/OWNERS
@@ -2,3 +2,5 @@ achaulk@chromium.org
bsimonnet@chromium.org
rkc@chromium.org
zork@chromium.org
+
+per-file anonymizer_tool*=battre@chromium.org
diff --git a/components/feedback/anonymizer_tool.cc b/components/feedback/anonymizer_tool.cc
new file mode 100644
index 0000000..713ceb6
--- /dev/null
+++ b/components/feedback/anonymizer_tool.cc
@@ -0,0 +1,151 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/feedback/anonymizer_tool.h"
+
+#include <base/strings/string_number_conversions.h>
+#include <base/strings/string_util.h>
+#include <base/strings/stringprintf.h>
+
+#include "third_party/re2/src/re2/re2.h"
+
+using re2::RE2;
+
+namespace feedback {
+
+namespace {
+
+// The |kCustomPatterns| array defines patterns to match and anonymize. Each
+// pattern needs to define three capturing parentheses groups:
+//
+// - a group for the pattern before the identifier to be anonymized;
+// - a group for the identifier to be anonymized;
+// - a group for the pattern after the identifier to be anonymized.
+//
+// Every matched identifier (in the context of the whole pattern) is anonymized
+// by replacing it with an incremental instance identifier. Every different
+// pattern defines a separate instance identifier space. See the unit test for
+// AnonymizerTool::AnonymizeCustomPattern for pattern anonymization examples.
+//
+// Useful regular expression syntax:
+//
+// +? is a non-greedy (lazy) +.
+// \b matches a word boundary.
+// (?i) turns on case insensitivy for the remainder of the regex.
+// (?-s) turns off "dot matches newline" for the remainder of the regex.
+// (?:regex) denotes non-capturing parentheses group.
+const char* kCustomPatterns[] = {
+ "(\\bCell ID: ')([0-9a-fA-F]+)(')", // ModemManager
+ "(\\bLocation area code: ')([0-9a-fA-F]+)(')", // ModemManager
+ "(?i-s)(\\bssid[= ]')(.+)(')", // wpa_supplicant
+ "(?-s)(\\bSSID - hexdump\\(len=[0-9]+\\): )(.+)()", // wpa_supplicant
+ "(?-s)(\\[SSID=)(.+?)(\\])", // shill
+};
+
+} // namespace
+
+AnonymizerTool::AnonymizerTool()
+ : custom_patterns_(arraysize(kCustomPatterns)) {}
+
+AnonymizerTool::~AnonymizerTool() {}
+
+std::string AnonymizerTool::Anonymize(const std::string& input) {
+ std::string anonymized = AnonymizeMACAddresses(input);
+ anonymized = AnonymizeCustomPatterns(std::move(anonymized));
+ return anonymized;
+}
+
+std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) {
+ // This regular expression finds the next MAC address. It splits the data into
+ // a section preceding the MAC address, an OUI (Organizationally Unique
+ // Identifier) part and a NIC (Network Interface Controller) specific part.
+
+ RE2::Options options;
+ // set_multiline of pcre is not supported by RE2, yet.
+ options.set_dot_nl(true); // Dot matches a new line.
+ RE2 mac_re(
+ "(.*?)("
+ "[0-9a-fA-F][0-9a-fA-F]:"
+ "[0-9a-fA-F][0-9a-fA-F]:"
+ "[0-9a-fA-F][0-9a-fA-F]):("
+ "[0-9a-fA-F][0-9a-fA-F]:"
+ "[0-9a-fA-F][0-9a-fA-F]:"
+ "[0-9a-fA-F][0-9a-fA-F])",
+ options);
+
+ std::string result;
+ result.reserve(input.size());
+
+ // Keep consuming, building up a result string as we go.
+ re2::StringPiece text(input);
+ std::string pre_mac, oui, nic;
+ while (re2::RE2::Consume(&text, mac_re, RE2::Arg(&pre_mac), RE2::Arg(&oui),
+ RE2::Arg(&nic))) {
+ // Look up the MAC address in the hash.
+ oui = base::ToLowerASCII(oui);
+ nic = base::ToLowerASCII(nic);
+ std::string mac = oui + ":" + nic;
+ std::string replacement_mac = mac_addresses_[mac];
+ if (replacement_mac.empty()) {
+ // If not found, build up a replacement MAC address by generating a new
+ // NIC part.
+ int mac_id = mac_addresses_.size();
+ replacement_mac = base::StringPrintf(
+ "%s:%02x:%02x:%02x", oui.c_str(), (mac_id & 0x00ff0000) >> 16,
+ (mac_id & 0x0000ff00) >> 8, (mac_id & 0x000000ff));
+ mac_addresses_[mac] = replacement_mac;
+ }
+
+ result += pre_mac;
+ result += replacement_mac;
+ }
+
+ text.AppendToString(&result);
+ return result;
+}
+
+std::string AnonymizerTool::AnonymizeCustomPatterns(std::string input) {
+ for (size_t i = 0; i < arraysize(kCustomPatterns); i++) {
+ input =
+ AnonymizeCustomPattern(input, kCustomPatterns[i], &custom_patterns_[i]);
+ }
+ return input;
+}
+
+// static
+std::string AnonymizerTool::AnonymizeCustomPattern(
+ const std::string& input,
+ const std::string& pattern,
+ std::map<std::string, std::string>* identifier_space) {
+ RE2::Options options;
+ // set_multiline of pcre is not supported by RE2, yet.
+ options.set_dot_nl(true); // Dot matches a new line.
+ RE2 re("(.*?)" + pattern, options);
+ DCHECK_EQ(4, re.NumberOfCapturingGroups());
+
+ std::string result;
+ result.reserve(input.size());
+
+ // Keep consuming, building up a result string as we go.
+ re2::StringPiece text(input);
+ std::string pre_match, pre_matched_id, matched_id, post_matched_id;
+ while (RE2::Consume(&text, re, RE2::Arg(&pre_match),
+ RE2::Arg(&pre_matched_id), RE2::Arg(&matched_id),
+ RE2::Arg(&post_matched_id))) {
+ std::string replacement_id = (*identifier_space)[matched_id];
+ if (replacement_id.empty()) {
+ replacement_id = base::IntToString(identifier_space->size());
+ (*identifier_space)[matched_id] = replacement_id;
+ }
+
+ result += pre_match;
+ result += pre_matched_id;
+ result += replacement_id;
+ result += post_matched_id;
+ }
+ text.AppendToString(&result);
+ return result;
+}
+
+} // namespace feedback
diff --git a/components/feedback/anonymizer_tool.h b/components/feedback/anonymizer_tool.h
new file mode 100644
index 0000000..54a690f
--- /dev/null
+++ b/components/feedback/anonymizer_tool.h
@@ -0,0 +1,52 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_
+#define COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include <base/macros.h>
+
+namespace feedback {
+
+class AnonymizerTool {
+ public:
+ AnonymizerTool();
+ ~AnonymizerTool();
+
+ // Returns an anonymized version of |input|. PII-sensitive data (such as MAC
+ // addresses) in |input| is replaced with unique identifiers.
+ std::string Anonymize(const std::string& input);
+
+ private:
+ friend class AnonymizerToolTest;
+
+ std::string AnonymizeMACAddresses(const std::string& input);
+ std::string AnonymizeCustomPatterns(std::string input);
+ static std::string AnonymizeCustomPattern(
+ const std::string& input,
+ const std::string& pattern,
+ std::map<std::string, std::string>* identifier_space);
+
+ // Map of MAC addresses discovered in anonymized strings to anonymized
+ // representations. 11:22:33:44:55:66 gets anonymized to 11:22:33:00:00:01,
+ // where the first three bytes represent the manufacturer. The last three
+ // bytes are used to distinguish different MAC addresses and are incremented
+ // for each newly discovered MAC address.
+ std::map<std::string, std::string> mac_addresses_;
+
+ // Like mac addresses, identifiers in custom patterns are anonymized.
+ // custom_patterns_[i] contains a map of original identifier to anonymized
+ // identifier for custom pattern number i.
+ std::vector<std::map<std::string, std::string>> custom_patterns_;
+
+ DISALLOW_COPY_AND_ASSIGN(AnonymizerTool);
+};
+
+} // namespace feedback
+
+#endif // COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_
diff --git a/components/feedback/anonymizer_tool_unittest.cc b/components/feedback/anonymizer_tool_unittest.cc
new file mode 100644
index 0000000..68f35a8
--- /dev/null
+++ b/components/feedback/anonymizer_tool_unittest.cc
@@ -0,0 +1,109 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/feedback/anonymizer_tool.h"
+
+#include <gtest/gtest.h>
+
+namespace feedback {
+
+class AnonymizerToolTest : public testing::Test {
+ protected:
+ std::string AnonymizeMACAddresses(const std::string& input) {
+ return anonymizer_.AnonymizeMACAddresses(input);
+ }
+
+ std::string AnonymizeCustomPatterns(const std::string& input) {
+ return anonymizer_.AnonymizeCustomPatterns(input);
+ }
+
+ static std::string AnonymizeCustomPattern(
+ const std::string& input,
+ const std::string& pattern,
+ std::map<std::string, std::string>* space) {
+ return AnonymizerTool::AnonymizeCustomPattern(input, pattern, space);
+ }
+
+ AnonymizerTool anonymizer_;
+};
+
+TEST_F(AnonymizerToolTest, Anonymize) {
+ EXPECT_EQ("", anonymizer_.Anonymize(""));
+ EXPECT_EQ("foo\nbar\n", anonymizer_.Anonymize("foo\nbar\n"));
+
+ // Make sure MAC address anonymization is invoked.
+ EXPECT_EQ("02:46:8a:00:00:01", anonymizer_.Anonymize("02:46:8a:ce:13:57"));
+
+ // Make sure custom pattern anonymization is invoked.
+ EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'"));
+}
+
+TEST_F(AnonymizerToolTest, AnonymizeMACAddresses) {
+ EXPECT_EQ("", AnonymizeMACAddresses(""));
+ EXPECT_EQ("foo\nbar\n", AnonymizeMACAddresses("foo\nbar\n"));
+ EXPECT_EQ("11:22:33:44:55", AnonymizeMACAddresses("11:22:33:44:55"));
+ EXPECT_EQ("aa:bb:cc:00:00:01", AnonymizeMACAddresses("aa:bb:cc:dd:ee:ff"));
+ EXPECT_EQ(
+ "BSSID: aa:bb:cc:00:00:01 in the middle\n"
+ "bb:cc:dd:00:00:02 start of line\n"
+ "end of line aa:bb:cc:00:00:01\n"
+ "no match across lines aa:bb:cc:\n"
+ "dd:ee:ff two on the same line:\n"
+ "x bb:cc:dd:00:00:02 cc:dd:ee:00:00:03 x\n",
+ AnonymizeMACAddresses("BSSID: aa:bb:cc:dd:ee:ff in the middle\n"
+ "bb:cc:dd:ee:ff:00 start of line\n"
+ "end of line aa:bb:cc:dd:ee:ff\n"
+ "no match across lines aa:bb:cc:\n"
+ "dd:ee:ff two on the same line:\n"
+ "x bb:cc:dd:ee:ff:00 cc:dd:ee:ff:00:11 x\n"));
+ EXPECT_EQ("Remember bb:cc:dd:00:00:02?",
+ AnonymizeMACAddresses("Remember bB:Cc:DD:ee:ff:00?"));
+}
+
+TEST_F(AnonymizerToolTest, AnonymizeCustomPatterns) {
+ EXPECT_EQ("", AnonymizeCustomPatterns(""));
+
+ EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'"));
+ EXPECT_EQ("Cell ID: '2'", AnonymizeCustomPatterns("Cell ID: 'C1D2'"));
+ EXPECT_EQ("foo Cell ID: '1' bar",
+ AnonymizeCustomPatterns("foo Cell ID: 'A1B2' bar"));
+
+ EXPECT_EQ("foo Location area code: '1' bar",
+ AnonymizeCustomPatterns("foo Location area code: 'A1B2' bar"));
+
+ EXPECT_EQ("foo\na SSID='1' b\n'",
+ AnonymizeCustomPatterns("foo\na SSID='Joe's' b\n'"));
+ EXPECT_EQ("ssid '2'", AnonymizeCustomPatterns("ssid 'My AP'"));
+ EXPECT_EQ("bssid 'aa:bb'", AnonymizeCustomPatterns("bssid 'aa:bb'"));
+
+ EXPECT_EQ("Scan SSID - hexdump(len=6): 1\nfoo",
+ AnonymizeCustomPatterns(
+ "Scan SSID - hexdump(len=6): 47 6f 6f 67 6c 65\nfoo"));
+
+ EXPECT_EQ(
+ "a\nb [SSID=1] [SSID=2] [SSID=foo\nbar] b",
+ AnonymizeCustomPatterns("a\nb [SSID=foo] [SSID=bar] [SSID=foo\nbar] b"));
+}
+
+TEST_F(AnonymizerToolTest, AnonymizeCustomPattern) {
+ const char kPattern[] = "(\\b(?i)id:? ')(\\d+)(')";
+ std::map<std::string, std::string> space;
+ EXPECT_EQ("", AnonymizeCustomPattern("", kPattern, &space));
+ EXPECT_EQ("foo\nbar\n",
+ AnonymizeCustomPattern("foo\nbar\n", kPattern, &space));
+ EXPECT_EQ("id '1'", AnonymizeCustomPattern("id '2345'", kPattern, &space));
+ EXPECT_EQ("id '2'", AnonymizeCustomPattern("id '1234'", kPattern, &space));
+ EXPECT_EQ("id: '2'", AnonymizeCustomPattern("id: '1234'", kPattern, &space));
+ EXPECT_EQ("ID: '1'", AnonymizeCustomPattern("ID: '2345'", kPattern, &space));
+ EXPECT_EQ("x1 id '1' 1x id '2'\nid '1'\n",
+ AnonymizeCustomPattern("x1 id '2345' 1x id '1234'\nid '2345'\n",
+ kPattern, &space));
+ space.clear();
+ EXPECT_EQ("id '1'", AnonymizeCustomPattern("id '1234'", kPattern, &space));
+
+ space.clear();
+ EXPECT_EQ("x1z", AnonymizeCustomPattern("xyz", "()(y+)()", &space));
+}
+
+} // namespace feedback
diff --git a/components/feedback/feedback_common_unittest.cc b/components/feedback/feedback_common_unittest.cc
index d3fb950..61895ab 100644
--- a/components/feedback/feedback_common_unittest.cc
+++ b/components/feedback/feedback_common_unittest.cc
@@ -25,55 +25,56 @@ const char kLogsAttachmentName[] = "system_logs.zip";
class FeedbackCommonTest : public testing::Test {
protected:
FeedbackCommonTest() {
- feedback = scoped_refptr<FeedbackCommon>(new FeedbackCommon());
+ feedback_ = scoped_refptr<FeedbackCommon>(new FeedbackCommon());
}
~FeedbackCommonTest() override {}
- scoped_refptr<FeedbackCommon> feedback;
- userfeedback::ExtensionSubmit report;
+ scoped_refptr<FeedbackCommon> feedback_;
+ userfeedback::ExtensionSubmit report_;
};
TEST_F(FeedbackCommonTest, TestBasicData) {
// Test that basic data can be set and propagates to the request.
- feedback->set_category_tag(kOne);
- feedback->set_description(kTwo);
- feedback->set_page_url(kThree);
- feedback->set_user_email(kFour);
- feedback->PrepareReport(&report);
+ feedback_->set_category_tag(kOne);
+ feedback_->set_description(kTwo);
+ feedback_->set_page_url(kThree);
+ feedback_->set_user_email(kFour);
+ feedback_->PrepareReport(&report_);
- EXPECT_EQ(kOne, report.bucket());
- EXPECT_EQ(kTwo, report.common_data().description());
- EXPECT_EQ(kThree, report.web_data().url());
- EXPECT_EQ(kFour, report.common_data().user_email());
+ EXPECT_EQ(kOne, report_.bucket());
+ EXPECT_EQ(kTwo, report_.common_data().description());
+ EXPECT_EQ(kThree, report_.web_data().url());
+ EXPECT_EQ(kFour, report_.common_data().user_email());
}
TEST_F(FeedbackCommonTest, TestAddLogs) {
- feedback->AddLog(kOne, kTwo);
- feedback->AddLog(kThree, kFour);
+ feedback_->AddLog(kOne, kTwo);
+ feedback_->AddLog(kThree, kFour);
- EXPECT_EQ(2U, feedback->sys_info()->size());
+ EXPECT_EQ(2U, feedback_->sys_info()->size());
}
TEST_F(FeedbackCommonTest, TestCompressionThreshold) {
// Add a large and small log, verify that only the small log gets
// included in the report.
- feedback->AddLog(kOne, kTwo);
- feedback->AddLog(kThree, kLongLog);
- feedback->PrepareReport(&report);
+ feedback_->AddLog(kOne, kTwo);
+ feedback_->AddLog(kThree, kLongLog);
+ feedback_->PrepareReport(&report_);
- EXPECT_EQ(1, report.web_data().product_specific_data_size());
- EXPECT_EQ(kOne, report.web_data().product_specific_data(0).key());
+ EXPECT_EQ(1, report_.web_data().product_specific_data_size());
+ EXPECT_EQ(kOne, report_.web_data().product_specific_data(0).key());
}
TEST_F(FeedbackCommonTest, TestCompression) {
// Add a large and small log, verify that an attachment has been
// added with the right name.
- feedback->AddLog(kOne, kTwo);
- feedback->AddLog(kThree, kLongLog);
- feedback->CompressLogs();
- feedback->PrepareReport(&report);
+ feedback_->AddLog(kOne, kTwo);
+ feedback_->AddLog(kThree, kLongLog);
+ feedback_->CompressLogs();
+ feedback_->PrepareReport(&report_);
- EXPECT_EQ(1, report.product_specific_binary_data_size());
- EXPECT_EQ(kLogsAttachmentName, report.product_specific_binary_data(0).name());
+ EXPECT_EQ(1, report_.product_specific_binary_data_size());
+ EXPECT_EQ(kLogsAttachmentName,
+ report_.product_specific_binary_data(0).name());
}