// Copyright 2015 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "components/feedback/anonymizer_tool.h" #include #include #include #include "third_party/re2/src/re2/re2.h" using re2::RE2; namespace feedback { namespace { // The |kCustomPatterns| array defines patterns to match and anonymize. Each // pattern needs to define three capturing parentheses groups: // // - a group for the pattern before the identifier to be anonymized; // - a group for the identifier to be anonymized; // - a group for the pattern after the identifier to be anonymized. // // Every matched identifier (in the context of the whole pattern) is anonymized // by replacing it with an incremental instance identifier. Every different // pattern defines a separate instance identifier space. See the unit test for // AnonymizerTool::AnonymizeCustomPattern for pattern anonymization examples. // // Useful regular expression syntax: // // +? is a non-greedy (lazy) +. // \b matches a word boundary. // (?i) turns on case insensitivy for the remainder of the regex. // (?-s) turns off "dot matches newline" for the remainder of the regex. // (?:regex) denotes non-capturing parentheses group. const char* kCustomPatterns[] = { "(\\bCell ID: ')([0-9a-fA-F]+)(')", // ModemManager "(\\bLocation area code: ')([0-9a-fA-F]+)(')", // ModemManager "(?i-s)(\\bssid[= ]')(.+)(')", // wpa_supplicant "(?-s)(\\bSSID - hexdump\\(len=[0-9]+\\): )(.+)()", // wpa_supplicant "(?-s)(\\[SSID=)(.+?)(\\])", // shill }; } // namespace AnonymizerTool::AnonymizerTool() : custom_patterns_(arraysize(kCustomPatterns)) {} AnonymizerTool::~AnonymizerTool() {} std::string AnonymizerTool::Anonymize(const std::string& input) { std::string anonymized = AnonymizeMACAddresses(input); anonymized = AnonymizeCustomPatterns(std::move(anonymized)); return anonymized; } std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) { // This regular expression finds the next MAC address. It splits the data into // a section preceding the MAC address, an OUI (Organizationally Unique // Identifier) part and a NIC (Network Interface Controller) specific part. RE2::Options options; // set_multiline of pcre is not supported by RE2, yet. options.set_dot_nl(true); // Dot matches a new line. RE2 mac_re( "(.*?)(" "[0-9a-fA-F][0-9a-fA-F]:" "[0-9a-fA-F][0-9a-fA-F]:" "[0-9a-fA-F][0-9a-fA-F]):(" "[0-9a-fA-F][0-9a-fA-F]:" "[0-9a-fA-F][0-9a-fA-F]:" "[0-9a-fA-F][0-9a-fA-F])", options); std::string result; result.reserve(input.size()); // Keep consuming, building up a result string as we go. re2::StringPiece text(input); std::string pre_mac, oui, nic; while (re2::RE2::Consume(&text, mac_re, RE2::Arg(&pre_mac), RE2::Arg(&oui), RE2::Arg(&nic))) { // Look up the MAC address in the hash. oui = base::ToLowerASCII(oui); nic = base::ToLowerASCII(nic); std::string mac = oui + ":" + nic; std::string replacement_mac = mac_addresses_[mac]; if (replacement_mac.empty()) { // If not found, build up a replacement MAC address by generating a new // NIC part. int mac_id = mac_addresses_.size(); replacement_mac = base::StringPrintf( "%s:%02x:%02x:%02x", oui.c_str(), (mac_id & 0x00ff0000) >> 16, (mac_id & 0x0000ff00) >> 8, (mac_id & 0x000000ff)); mac_addresses_[mac] = replacement_mac; } result += pre_mac; result += replacement_mac; } text.AppendToString(&result); return result; } std::string AnonymizerTool::AnonymizeCustomPatterns(std::string input) { for (size_t i = 0; i < arraysize(kCustomPatterns); i++) { input = AnonymizeCustomPattern(input, kCustomPatterns[i], &custom_patterns_[i]); } return input; } // static std::string AnonymizerTool::AnonymizeCustomPattern( const std::string& input, const std::string& pattern, std::map* identifier_space) { RE2::Options options; // set_multiline of pcre is not supported by RE2, yet. options.set_dot_nl(true); // Dot matches a new line. RE2 re("(.*?)" + pattern, options); DCHECK_EQ(4, re.NumberOfCapturingGroups()); std::string result; result.reserve(input.size()); // Keep consuming, building up a result string as we go. re2::StringPiece text(input); std::string pre_match, pre_matched_id, matched_id, post_matched_id; while (RE2::Consume(&text, re, RE2::Arg(&pre_match), RE2::Arg(&pre_matched_id), RE2::Arg(&matched_id), RE2::Arg(&post_matched_id))) { std::string replacement_id = (*identifier_space)[matched_id]; if (replacement_id.empty()) { replacement_id = base::IntToString(identifier_space->size()); (*identifier_space)[matched_id] = replacement_id; } result += pre_match; result += pre_matched_id; result += replacement_id; result += post_matched_id; } text.AppendToString(&result); return result; } } // namespace feedback