blob: a4e2d3a2b7604d742b9f02f984b15182c4a6945d [file] [log] [blame]
battre4cdaa7c2016-01-07 11:30:271// Copyright 2015 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_
6#define COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_
7
8#include <map>
dcheng84c358e2016-04-26 07:05:539#include <memory>
battre4cdaa7c2016-01-07 11:30:2710#include <string>
11#include <vector>
12
battre03910b42016-01-11 13:42:3413#include "base/macros.h"
Dominic Battref091addfc2017-12-07 15:45:3414#include "base/memory/ref_counted.h"
15#include "base/sequence_checker.h"
16#include "base/sequenced_task_runner.h"
battre03910b42016-01-11 13:42:3417
18namespace re2 {
19class RE2;
20}
battre4cdaa7c2016-01-07 11:30:2721
22namespace feedback {
23
battre03910b42016-01-11 13:42:3424struct CustomPatternWithoutContext {
25 // A string literal used in anonymized tests. Matches to the |pattern| are
26 // replaced with <|alias|: 1>, <|alias|: 2>, ...
27 const char* alias;
28 // A RE2 regexp with exactly one capture group. Matches will be replaced by
29 // the alias reference described above.
30 const char* pattern;
31};
32
battre4cdaa7c2016-01-07 11:30:2733class AnonymizerTool {
34 public:
35 AnonymizerTool();
36 ~AnonymizerTool();
37
38 // Returns an anonymized version of |input|. PII-sensitive data (such as MAC
39 // addresses) in |input| is replaced with unique identifiers.
Dominic Battref091addfc2017-12-07 15:45:3440 // This is an expensive operation. Make sure not to execute this on the UI
41 // thread.
battre4cdaa7c2016-01-07 11:30:2742 std::string Anonymize(const std::string& input);
43
44 private:
45 friend class AnonymizerToolTest;
46
battre03910b42016-01-11 13:42:3447 re2::RE2* GetRegExp(const std::string& pattern);
48
battre4cdaa7c2016-01-07 11:30:2749 std::string AnonymizeMACAddresses(const std::string& input);
50 std::string AnonymizeCustomPatterns(std::string input);
battre03910b42016-01-11 13:42:3451 std::string AnonymizeCustomPatternWithContext(
battre4cdaa7c2016-01-07 11:30:2752 const std::string& input,
53 const std::string& pattern,
54 std::map<std::string, std::string>* identifier_space);
battre03910b42016-01-11 13:42:3455 std::string AnonymizeCustomPatternWithoutContext(
56 const std::string& input,
57 const CustomPatternWithoutContext& pattern,
58 std::map<std::string, std::string>* identifier_space);
battre4cdaa7c2016-01-07 11:30:2759
60 // Map of MAC addresses discovered in anonymized strings to anonymized
61 // representations. 11:22:33:44:55:66 gets anonymized to 11:22:33:00:00:01,
62 // where the first three bytes represent the manufacturer. The last three
63 // bytes are used to distinguish different MAC addresses and are incremented
64 // for each newly discovered MAC address.
65 std::map<std::string, std::string> mac_addresses_;
66
67 // Like mac addresses, identifiers in custom patterns are anonymized.
battre03910b42016-01-11 13:42:3468 // custom_patterns_with_context_[i] contains a map of original identifier to
69 // anonymized identifier for custom pattern number i.
70 std::vector<std::map<std::string, std::string>> custom_patterns_with_context_;
71 std::vector<std::map<std::string, std::string>>
72 custom_patterns_without_context_;
73
74 // Cache to prevent the repeated compilation of the same regular expression
75 // pattern. Key is the string representation of the RegEx.
dcheng84c358e2016-04-26 07:05:5376 std::map<std::string, std::unique_ptr<re2::RE2>> regexp_cache_;
battre4cdaa7c2016-01-07 11:30:2777
Dominic Battref091addfc2017-12-07 15:45:3478 SEQUENCE_CHECKER(sequence_checker_);
79
battre4cdaa7c2016-01-07 11:30:2780 DISALLOW_COPY_AND_ASSIGN(AnonymizerTool);
81};
82
Dominic Battref091addfc2017-12-07 15:45:3483// A container for a AnonymizerTool that is thread-safely ref-countable.
84// This is useful for a class that wants to post an async anonymization task
85// to a background sequence runner and not deal with its own life-cycle ending
86// while the AnonymizerTool is busy on another sequence.
87class AnonymizerToolContainer
88 : public base::RefCountedThreadSafe<AnonymizerToolContainer> {
89 public:
90 explicit AnonymizerToolContainer(
91 scoped_refptr<base::SequencedTaskRunner> task_runner);
92
93 // Returns a pointer to the instance of this anonymier. May only be called
94 // on |task_runner_|.
95 AnonymizerTool* Get();
96
97 private:
98 friend class base::RefCountedThreadSafe<AnonymizerToolContainer>;
99 ~AnonymizerToolContainer();
100
101 std::unique_ptr<AnonymizerTool> anonymizer_;
102 scoped_refptr<base::SequencedTaskRunner> task_runner_;
103};
104
battre4cdaa7c2016-01-07 11:30:27105} // namespace feedback
106
107#endif // COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_