blob: 6789fbf426a4c1692ebbd06d9e6e16f007b9a5ee [file] [log] [blame]
Avi Drissman8ba1bad2022-09-13 19:22:361// Copyright 2022 The Chromium Authors
Yao Xiao7a1995b2022-03-09 08:18:552// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/browsing_topics/browsing_topics_service_impl.h"
6
Yao Xiaocc379392022-03-25 21:39:067#include <random>
Abigail Katcoff02cceaa2023-03-27 16:22:328#include <vector>
Yao Xiaocc379392022-03-25 21:39:069
Yao Xiaod15c5532023-02-07 18:50:1210#include "base/metrics/histogram_functions.h"
Josh Karlind147c672023-03-06 20:45:1311#include "base/notreached.h"
Yao Xiaocc379392022-03-25 21:39:0612#include "base/rand_util.h"
Yao Xiao57892a22022-06-28 19:21:4113#include "base/ranges/algorithm.h"
Tommy C. Li088b42f2022-11-15 00:51:2814#include "base/strings/strcat.h"
Gabriel Charetted87f10f2022-03-31 00:44:2215#include "base/time/time.h"
Yao Xiaocc379392022-03-25 21:39:0616#include "components/browsing_topics/browsing_topics_calculator.h"
17#include "components/browsing_topics/browsing_topics_page_load_data_tracker.h"
Yao Xiao84826f42022-10-24 16:13:5818#include "components/browsing_topics/common/common_types.h"
Yao Xiao21f1faa2022-04-29 06:20:3219#include "components/browsing_topics/mojom/browsing_topics_internals.mojom.h"
Yao Xiaocc379392022-03-25 21:39:0620#include "components/browsing_topics/util.h"
Abigail Katcoff02cceaa2023-03-27 16:22:3221#include "components/privacy_sandbox/canonical_topic.h"
Yao Xiaocc379392022-03-25 21:39:0622#include "content/public/browser/browsing_topics_site_data_manager.h"
23#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
Yao Xiao716e4812022-04-20 22:57:1924#include "services/metrics/public/cpp/ukm_builders.h"
25#include "services/metrics/public/cpp/ukm_recorder.h"
Yao Xiaocc379392022-03-25 21:39:0626#include "third_party/blink/public/common/features.h"
27#include "third_party/blink/public/mojom/browsing_topics/browsing_topics.mojom.h"
28
Yao Xiao7a1995b2022-03-09 08:18:5529namespace browsing_topics {
30
Yao Xiaocc379392022-03-25 21:39:0631namespace {
32
Yao Xiaobf39e34d2022-03-28 21:48:2833// Returns whether the topics should all be cleared given
34// `browsing_topics_data_accessible_since` and `is_topic_allowed_by_settings`.
35// Returns true if `browsing_topics_data_accessible_since` is greater than the
Abigail Katcoff02cceaa2023-03-27 16:22:3236// last calculation time.
Yao Xiaobf39e34d2022-03-28 21:48:2837bool ShouldClearTopicsOnStartup(
Yao Xiaocc379392022-03-25 21:39:0638 const BrowsingTopicsState& browsing_topics_state,
Abigail Katcoff02cceaa2023-03-27 16:22:3239 base::Time browsing_topics_data_accessible_since) {
Yao Xiaobf39e34d2022-03-28 21:48:2840 if (browsing_topics_state.epochs().empty())
41 return false;
42
Yao Xiaocc379392022-03-25 21:39:0643 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
44 // only be updated to base::Time::Now() due to data deletion. So we'll either
45 // need to clear all topics data, or no-op. If this assumption no longer
46 // holds, we'd need to iterate over all epochs, check their calculation time,
47 // and selectively delete the epochs.
Yao Xiaobf39e34d2022-03-28 21:48:2848 if (browsing_topics_data_accessible_since >
49 browsing_topics_state.epochs().back().calculation_time()) {
50 return true;
51 }
52
Abigail Katcoff02cceaa2023-03-27 16:22:3253 return false;
54}
55
56// Returns a vector of top topics which are disallowed and thus should be
57// cleared. This could happen if the topic became disallowed when
58// `browsing_topics_state` was still loading (and we didn't get a chance to
59// clear it).
60std::vector<privacy_sandbox::CanonicalTopic> TopTopicsToClearOnStartup(
61 const BrowsingTopicsState& browsing_topics_state,
62 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
63 is_topic_allowed_by_settings) {
64 DCHECK(!is_topic_allowed_by_settings.is_null());
65 std::vector<privacy_sandbox::CanonicalTopic> top_topics_to_clear;
Yao Xiaobf39e34d2022-03-28 21:48:2866 for (const EpochTopics& epoch : browsing_topics_state.epochs()) {
67 for (const TopicAndDomains& topic_and_domains :
68 epoch.top_topics_and_observing_domains()) {
69 if (!topic_and_domains.IsValid())
70 continue;
Abigail Katcoff02cceaa2023-03-27 16:22:3271 privacy_sandbox::CanonicalTopic canonical_topic =
72 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
73 epoch.taxonomy_version());
74 if (!is_topic_allowed_by_settings.Run(canonical_topic)) {
75 top_topics_to_clear.emplace_back(canonical_topic);
Yao Xiaobf39e34d2022-03-28 21:48:2876 }
77 }
78 }
Abigail Katcoff02cceaa2023-03-27 16:22:3279 return top_topics_to_clear;
Yao Xiaocc379392022-03-25 21:39:0680}
81
82struct StartupCalculateDecision {
Abigail Katcoff02cceaa2023-03-27 16:22:3283 bool clear_all_topics_data = true;
Yao Xiaocc379392022-03-25 21:39:0684 base::TimeDelta next_calculation_delay;
Abigail Katcoff02cceaa2023-03-27 16:22:3285 std::vector<privacy_sandbox::CanonicalTopic> topics_to_clear;
Yao Xiaocc379392022-03-25 21:39:0686};
87
88StartupCalculateDecision GetStartupCalculationDecision(
89 const BrowsingTopicsState& browsing_topics_state,
Yao Xiaobf39e34d2022-03-28 21:48:2890 base::Time browsing_topics_data_accessible_since,
91 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
92 is_topic_allowed_by_settings) {
Yao Xiaocc379392022-03-25 21:39:0693 // The topics have never been calculated. This could happen with a fresh
94 // profile or the if the config has updated. In case of a config update, the
95 // topics should have already been cleared when initializing the
96 // `BrowsingTopicsState`.
97 if (browsing_topics_state.next_scheduled_calculation_time().is_null()) {
Abigail Katcoff02cceaa2023-03-27 16:22:3298 return StartupCalculateDecision{.clear_all_topics_data = false,
99 .next_calculation_delay = base::TimeDelta(),
100 .topics_to_clear = {}};
Yao Xiaocc379392022-03-25 21:39:06101 }
102
103 // This could happen when clear-on-exit is turned on and has caused the
Abigail Katcoff02cceaa2023-03-27 16:22:32104 // cookies to be deleted on startup
105 bool should_clear_all_topics_data = ShouldClearTopicsOnStartup(
106 browsing_topics_state, browsing_topics_data_accessible_since);
107
108 std::vector<privacy_sandbox::CanonicalTopic> topics_to_clear;
109 if (!should_clear_all_topics_data) {
110 topics_to_clear = TopTopicsToClearOnStartup(browsing_topics_state,
111 is_topic_allowed_by_settings);
112 }
Yao Xiaocc379392022-03-25 21:39:06113
114 base::TimeDelta presumed_next_calculation_delay =
115 browsing_topics_state.next_scheduled_calculation_time() -
116 base::Time::Now();
117
118 // The scheduled calculation time was reached before the startup.
119 if (presumed_next_calculation_delay <= base::TimeDelta()) {
120 return StartupCalculateDecision{
Abigail Katcoff02cceaa2023-03-27 16:22:32121 .clear_all_topics_data = should_clear_all_topics_data,
122 .next_calculation_delay = base::TimeDelta(),
123 .topics_to_clear = topics_to_clear};
Yao Xiaocc379392022-03-25 21:39:06124 }
125
126 // This could happen if the machine time has changed since the last
127 // calculation. Recalculate immediately to align with the expected schedule
128 // rather than potentially stop computing for a very long time.
129 if (presumed_next_calculation_delay >=
130 2 * blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get()) {
131 return StartupCalculateDecision{
Abigail Katcoff02cceaa2023-03-27 16:22:32132 .clear_all_topics_data = should_clear_all_topics_data,
133 .next_calculation_delay = base::TimeDelta(),
134 .topics_to_clear = topics_to_clear};
Yao Xiaocc379392022-03-25 21:39:06135 }
136
137 return StartupCalculateDecision{
Abigail Katcoff02cceaa2023-03-27 16:22:32138 .clear_all_topics_data = should_clear_all_topics_data,
139 .next_calculation_delay = presumed_next_calculation_delay,
140 .topics_to_clear = topics_to_clear};
Yao Xiaocc379392022-03-25 21:39:06141}
142
Josh Karlind147c672023-03-06 20:45:13143void RecordBrowsingTopicsApiResultMetrics(ApiAccessResult result,
144 content::RenderFrameHost* main_frame,
145 bool is_get_topics_request) {
Yao Xiao9c789ea2022-10-26 14:46:55146 // The `BrowsingTopics_DocumentBrowsingTopicsApiResult2` event is only
147 // recorded for request that gets the topics.
Josh Karlind147c672023-03-06 20:45:13148 if (!is_get_topics_request) {
Yao Xiao9c789ea2022-10-26 14:46:55149 return;
Josh Karlind147c672023-03-06 20:45:13150 }
151
152 base::UmaHistogramEnumeration("BrowsingTopics.Result.Status", result);
153
154 if (result == browsing_topics::ApiAccessResult::kSuccess) {
155 return;
156 }
Yao Xiao9c789ea2022-10-26 14:46:55157
Yao Xiao716e4812022-04-20 22:57:19158 ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
Yao Xiao84826f42022-10-24 16:13:58159 ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult2 builder(
Yao Xiao716e4812022-04-20 22:57:19160 main_frame->GetPageUkmSourceId());
Josh Karlind147c672023-03-06 20:45:13161 builder.SetFailureReason(static_cast<int64_t>(result));
162
Yao Xiao716e4812022-04-20 22:57:19163 builder.Record(ukm_recorder->Get());
164}
165
Josh Karlind147c672023-03-06 20:45:13166void RecordBrowsingTopicsApiResultMetrics(
Yao Xiao84826f42022-10-24 16:13:58167 const std::vector<CandidateTopic>& valid_candidate_topics,
Yao Xiao716e4812022-04-20 22:57:19168 content::RenderFrameHost* main_frame) {
Yao Xiao716e4812022-04-20 22:57:19169 ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
Yao Xiao84826f42022-10-24 16:13:58170 ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult2 builder(
Yao Xiao716e4812022-04-20 22:57:19171 main_frame->GetPageUkmSourceId());
172
Josh Karlind147c672023-03-06 20:45:13173 int real_count = 0;
174 int fake_count = 0;
175 int filtered_count = 0;
176
Yao Xiao84826f42022-10-24 16:13:58177 for (size_t i = 0; i < 3u && valid_candidate_topics.size() > i; ++i) {
178 const CandidateTopic& candidate_topic = valid_candidate_topics[i];
Yao Xiao716e4812022-04-20 22:57:19179
Yao Xiao84826f42022-10-24 16:13:58180 DCHECK(candidate_topic.IsValid());
Yao Xiao716e4812022-04-20 22:57:19181
Josh Karlind147c672023-03-06 20:45:13182 if (candidate_topic.should_be_filtered()) {
183 filtered_count += 1;
184 } else {
185 candidate_topic.is_true_topic() ? real_count += 1 : fake_count += 1;
186 }
187
Yao Xiao716e4812022-04-20 22:57:19188 if (i == 0) {
Yao Xiao84826f42022-10-24 16:13:58189 builder.SetCandidateTopic0(candidate_topic.topic().value())
190 .SetCandidateTopic0IsTrueTopTopic(candidate_topic.is_true_topic())
191 .SetCandidateTopic0ShouldBeFiltered(
192 candidate_topic.should_be_filtered())
193 .SetCandidateTopic0TaxonomyVersion(candidate_topic.taxonomy_version())
194 .SetCandidateTopic0ModelVersion(candidate_topic.model_version());
Yao Xiao716e4812022-04-20 22:57:19195 } else if (i == 1) {
Yao Xiao84826f42022-10-24 16:13:58196 builder.SetCandidateTopic1(candidate_topic.topic().value())
197 .SetCandidateTopic1IsTrueTopTopic(candidate_topic.is_true_topic())
198 .SetCandidateTopic1ShouldBeFiltered(
199 candidate_topic.should_be_filtered())
200 .SetCandidateTopic1TaxonomyVersion(candidate_topic.taxonomy_version())
201 .SetCandidateTopic1ModelVersion(candidate_topic.model_version());
Yao Xiao716e4812022-04-20 22:57:19202 } else {
203 DCHECK_EQ(i, 2u);
Yao Xiao84826f42022-10-24 16:13:58204 builder.SetCandidateTopic2(candidate_topic.topic().value())
205 .SetCandidateTopic2IsTrueTopTopic(candidate_topic.is_true_topic())
206 .SetCandidateTopic2ShouldBeFiltered(
207 candidate_topic.should_be_filtered())
208 .SetCandidateTopic2TaxonomyVersion(candidate_topic.taxonomy_version())
209 .SetCandidateTopic2ModelVersion(candidate_topic.model_version());
Yao Xiao716e4812022-04-20 22:57:19210 }
211 }
212
Josh Karlind147c672023-03-06 20:45:13213 const int kBuckets = 10;
214 DCHECK_GE(kBuckets,
215 blink::features::kBrowsingTopicsNumberOfEpochsToExpose.Get());
216
217 base::UmaHistogramExactLinear("BrowsingTopics.Result.RealTopicCount",
218 real_count, kBuckets);
219 base::UmaHistogramExactLinear("BrowsingTopics.Result.FakeTopicCount",
220 fake_count, kBuckets);
221 base::UmaHistogramExactLinear("BrowsingTopics.Result.FilteredTopicCount",
222 filtered_count, kBuckets);
223
Yao Xiao716e4812022-04-20 22:57:19224 builder.Record(ukm_recorder->Get());
225}
226
Yao Xiaod15c5532023-02-07 18:50:12227// Represents the action type of the request.
228//
229// These values are persisted to logs. Entries should not be renumbered and
230// numeric values should never be reused.
231enum class BrowsingTopicsApiActionType {
232 // Get topics via document.browsingTopics({skipObservation: true}).
233 kGetViaDocumentApi = 0,
234
235 // Get and observe topics via the document.browsingTopics().
236 kGetAndObserveViaDocumentApi = 1,
237
238 // Get topics via fetch(<url>, {browsingTopics: true}) or via the analogous
239 // XHR request.
240 kGetViaFetchLikeApi = 2,
241
242 // Observe topics via the "Sec-Browsing-Topics: ?1" response header for the
243 // fetch(<url>, {browsingTopics: true}) request, or for the analogous XHR
244 // request.
245 kObserveViaFetchLikeApi = 3,
246
Yao Xiao9f73e882023-03-27 19:01:41247 // Get topics via <iframe src=[url] browsingtopics>.
248 kGetViaIframeAttributeApi = 4,
249
250 // Observe topics via the "Sec-Browsing-Topics: ?1" response header for the
251 // <iframe src=[url] browsingtopics> request.
252 kObserveViaIframeAttributeApi = 5,
253
254 kMaxValue = kObserveViaIframeAttributeApi,
Yao Xiaod15c5532023-02-07 18:50:12255};
256
257void RecordBrowsingTopicsApiActionTypeMetrics(ApiCallerSource caller_source,
258 bool get_topics,
259 bool observe) {
260 static constexpr char kBrowsingTopicsApiActionTypeHistogramId[] =
261 "BrowsingTopics.ApiActionType";
262
263 if (caller_source == ApiCallerSource::kJavaScript) {
264 DCHECK(get_topics);
265
266 if (!observe) {
267 base::UmaHistogramEnumeration(
268 kBrowsingTopicsApiActionTypeHistogramId,
269 BrowsingTopicsApiActionType::kGetViaDocumentApi);
270 return;
271 }
272
273 base::UmaHistogramEnumeration(
274 kBrowsingTopicsApiActionTypeHistogramId,
275 BrowsingTopicsApiActionType::kGetAndObserveViaDocumentApi);
276
277 return;
278 }
279
Yao Xiao9f73e882023-03-27 19:01:41280 if (caller_source == ApiCallerSource::kIframeAttribute) {
281 if (get_topics) {
282 DCHECK(!observe);
283
284 base::UmaHistogramEnumeration(
285 kBrowsingTopicsApiActionTypeHistogramId,
286 BrowsingTopicsApiActionType::kGetViaIframeAttributeApi);
287 return;
288 }
289
290 DCHECK(observe);
291 base::UmaHistogramEnumeration(
292 kBrowsingTopicsApiActionTypeHistogramId,
293 BrowsingTopicsApiActionType::kObserveViaIframeAttributeApi);
294
295 return;
296 }
297
Yao Xiaod15c5532023-02-07 18:50:12298 DCHECK_EQ(caller_source, ApiCallerSource::kFetch);
299
300 if (get_topics) {
301 DCHECK(!observe);
302
303 base::UmaHistogramEnumeration(
304 kBrowsingTopicsApiActionTypeHistogramId,
305 BrowsingTopicsApiActionType::kGetViaFetchLikeApi);
306 return;
307 }
308
309 DCHECK(observe);
310 base::UmaHistogramEnumeration(
311 kBrowsingTopicsApiActionTypeHistogramId,
312 BrowsingTopicsApiActionType::kObserveViaFetchLikeApi);
313}
314
Yao Xiaocc379392022-03-25 21:39:06315} // namespace
316
Yao Xiao7a1995b2022-03-09 08:18:55317BrowsingTopicsServiceImpl::~BrowsingTopicsServiceImpl() = default;
318
Yao Xiaocc379392022-03-25 21:39:06319BrowsingTopicsServiceImpl::BrowsingTopicsServiceImpl(
320 const base::FilePath& profile_path,
321 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
322 history::HistoryService* history_service,
323 content::BrowsingTopicsSiteDataManager* site_data_manager,
Robert Ogdenad99d6f62023-05-01 21:40:09324 std::unique_ptr<Annotator> annotator,
Christian Dullwebera4a58022023-01-27 01:53:42325 TopicAccessedCallback topic_accessed_callback)
Yao Xiaocc379392022-03-25 21:39:06326 : privacy_sandbox_settings_(privacy_sandbox_settings),
327 history_service_(history_service),
328 site_data_manager_(site_data_manager),
Yao Xiaocc379392022-03-25 21:39:06329 browsing_topics_state_(
330 profile_path,
331 base::BindOnce(
332 &BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded,
Christian Dullwebera4a58022023-01-27 01:53:42333 base::Unretained(this))),
Robert Ogdenad99d6f62023-05-01 21:40:09334 annotator_(std::move(annotator)),
Christian Dullwebera4a58022023-01-27 01:53:42335 topic_accessed_callback_(std::move(topic_accessed_callback)) {
336 DCHECK(topic_accessed_callback_);
Yao Xiaocc379392022-03-25 21:39:06337 privacy_sandbox_settings_observation_.Observe(privacy_sandbox_settings);
338 history_service_observation_.Observe(history_service);
Yao Xiaocc379392022-03-25 21:39:06339}
340
Yao Xiao9c789ea2022-10-26 14:46:55341bool BrowsingTopicsServiceImpl::HandleTopicsWebApi(
Yao Xiaocc379392022-03-25 21:39:06342 const url::Origin& context_origin,
Yao Xiao1d60ed32022-09-27 16:33:24343 content::RenderFrameHost* main_frame,
Yao Xiao9c789ea2022-10-26 14:46:55344 ApiCallerSource caller_source,
345 bool get_topics,
346 bool observe,
347 std::vector<blink::mojom::EpochTopicPtr>& topics) {
348 DCHECK(topics.empty());
349 DCHECK(get_topics || observe);
350
Yao Xiaod15c5532023-02-07 18:50:12351 RecordBrowsingTopicsApiActionTypeMetrics(caller_source, get_topics, observe);
352
Yao Xiao716e4812022-04-20 22:57:19353 if (!browsing_topics_state_loaded_) {
Josh Karlind147c672023-03-06 20:45:13354 RecordBrowsingTopicsApiResultMetrics(ApiAccessResult::kStateNotReady,
355 main_frame, get_topics);
Yao Xiao9c789ea2022-10-26 14:46:55356 return false;
Yao Xiao716e4812022-04-20 22:57:19357 }
Yao Xiaocc379392022-03-25 21:39:06358
Yao Xiao716e4812022-04-20 22:57:19359 if (!privacy_sandbox_settings_->IsTopicsAllowed()) {
Josh Karlind147c672023-03-06 20:45:13360 RecordBrowsingTopicsApiResultMetrics(
361 ApiAccessResult::kAccessDisallowedBySettings, main_frame, get_topics);
Yao Xiao9c789ea2022-10-26 14:46:55362 return false;
Yao Xiao716e4812022-04-20 22:57:19363 }
Yao Xiaocc379392022-03-25 21:39:06364
365 if (!privacy_sandbox_settings_->IsTopicsAllowedForContext(
Rohit Agarwalaac12df2022-12-20 18:15:23366 /*top_frame_origin=*/main_frame->GetLastCommittedOrigin(),
367 context_origin.GetURL())) {
Josh Karlind147c672023-03-06 20:45:13368 RecordBrowsingTopicsApiResultMetrics(
369 ApiAccessResult::kAccessDisallowedBySettings, main_frame, get_topics);
Yao Xiao9c789ea2022-10-26 14:46:55370 return false;
Yao Xiaocc379392022-03-25 21:39:06371 }
372
Josh Karlind147c672023-03-06 20:45:13373 RecordBrowsingTopicsApiResultMetrics(ApiAccessResult::kSuccess, main_frame,
374 get_topics);
375
Yao Xiaocc379392022-03-25 21:39:06376 std::string context_domain =
377 net::registry_controlled_domains::GetDomainAndRegistry(
378 context_origin.GetURL(),
379 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
380
381 HashedDomain hashed_context_domain = HashContextDomainForStorage(
382 browsing_topics_state_.hmac_key(), context_domain);
383
Yao Xiao1d60ed32022-09-27 16:33:24384 if (observe) {
385 // Track the API usage context after the permissions check.
386 BrowsingTopicsPageLoadDataTracker::GetOrCreateForPage(main_frame->GetPage())
387 ->OnBrowsingTopicsApiUsed(hashed_context_domain, history_service_);
388 }
Yao Xiaocc379392022-03-25 21:39:06389
Yao Xiao9c789ea2022-10-26 14:46:55390 if (!get_topics)
391 return true;
392
Yao Xiaocc379392022-03-25 21:39:06393 std::string top_domain =
394 net::registry_controlled_domains::GetDomainAndRegistry(
395 main_frame->GetLastCommittedOrigin().GetURL(),
396 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
397
Yao Xiao84826f42022-10-24 16:13:58398 std::vector<CandidateTopic> valid_candidate_topics;
Yao Xiao716e4812022-04-20 22:57:19399
Yao Xiaocc379392022-03-25 21:39:06400 for (const EpochTopics* epoch :
401 browsing_topics_state_.EpochsForSite(top_domain)) {
Yao Xiao414312992022-10-18 20:25:11402 CandidateTopic candidate_topic = epoch->CandidateTopicForSite(
403 top_domain, hashed_context_domain, browsing_topics_state_.hmac_key());
Yao Xiao716e4812022-04-20 22:57:19404
Yao Xiao414312992022-10-18 20:25:11405 if (!candidate_topic.IsValid())
Yao Xiaocc379392022-03-25 21:39:06406 continue;
407
Yao Xiaobf39e34d2022-03-28 21:48:28408 // Although a top topic can never be in the disallowed state, the returned
Yao Xiao414312992022-10-18 20:25:11409 // `candidate_topic` may be the random one. Thus we still need this check.
Yao Xiaocc379392022-03-25 21:39:06410 if (!privacy_sandbox_settings_->IsTopicAllowed(
Yao Xiao414312992022-10-18 20:25:11411 privacy_sandbox::CanonicalTopic(
412 candidate_topic.topic(), candidate_topic.taxonomy_version()))) {
413 DCHECK(!candidate_topic.is_true_topic());
Yao Xiaobf39e34d2022-03-28 21:48:28414 continue;
Yao Xiaocc379392022-03-25 21:39:06415 }
416
Yao Xiao84826f42022-10-24 16:13:58417 valid_candidate_topics.push_back(std::move(candidate_topic));
418 }
419
Josh Karlind147c672023-03-06 20:45:13420 RecordBrowsingTopicsApiResultMetrics(valid_candidate_topics, main_frame);
Yao Xiao84826f42022-10-24 16:13:58421
Yao Xiao84826f42022-10-24 16:13:58422 for (const CandidateTopic& candidate_topic : valid_candidate_topics) {
423 if (candidate_topic.should_be_filtered())
424 continue;
425
Yao Xiao3a03e602022-10-18 18:17:56426 // `PageSpecificContentSettings` should only observe true top topics
427 // accessed on the page. It's okay to notify the same topic multiple
428 // times even though duplicate topics will be removed in the end.
Yao Xiao414312992022-10-18 20:25:11429 if (candidate_topic.is_true_topic()) {
Yao Xiao3a03e602022-10-18 18:17:56430 privacy_sandbox::CanonicalTopic canonical_topic(
Yao Xiao414312992022-10-18 20:25:11431 candidate_topic.topic(), candidate_topic.taxonomy_version());
Christian Dullwebera4a58022023-01-27 01:53:42432 topic_accessed_callback_.Run(main_frame, context_origin,
433 /*blocked_by_policy=*/false,
434 canonical_topic);
Yao Xiao3a03e602022-10-18 18:17:56435 }
436
Yao Xiao21f1faa2022-04-29 06:20:32437 auto result_topic = blink::mojom::EpochTopic::New();
Yao Xiao414312992022-10-18 20:25:11438 result_topic->topic = candidate_topic.topic().value();
Yao Xiaocc379392022-03-25 21:39:06439 result_topic->config_version = base::StrCat(
440 {"chrome.", base::NumberToString(
441 blink::features::kBrowsingTopicsConfigVersion.Get())});
Yao Xiao414312992022-10-18 20:25:11442 result_topic->model_version =
443 base::NumberToString(candidate_topic.model_version());
Yao Xiaocc379392022-03-25 21:39:06444 result_topic->taxonomy_version =
Yao Xiao414312992022-10-18 20:25:11445 base::NumberToString(candidate_topic.taxonomy_version());
Yao Xiaocc379392022-03-25 21:39:06446 result_topic->version = base::StrCat({result_topic->config_version, ":",
447 result_topic->taxonomy_version, ":",
448 result_topic->model_version});
Yao Xiao9c789ea2022-10-26 14:46:55449 topics.emplace_back(std::move(result_topic));
Yao Xiaocc379392022-03-25 21:39:06450 }
451
Yao Xiao9c789ea2022-10-26 14:46:55452 std::sort(topics.begin(), topics.end());
Yao Xiao716e4812022-04-20 22:57:19453
Yao Xiao84826f42022-10-24 16:13:58454 // Remove duplicate entries.
Yao Xiao9c789ea2022-10-26 14:46:55455 topics.erase(std::unique(topics.begin(), topics.end()), topics.end());
Yao Xiaocc379392022-03-25 21:39:06456
Yao Xiao9c789ea2022-10-26 14:46:55457 return true;
Yao Xiaocc379392022-03-25 21:39:06458}
Yao Xiao7a1995b2022-03-09 08:18:55459
Yao Xiao57892a22022-06-28 19:21:41460void BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUi(
461 bool calculate_now,
462 mojom::PageHandler::GetBrowsingTopicsStateCallback callback) {
Yao Xiao21f1faa2022-04-29 06:20:32463 if (!browsing_topics_state_loaded_) {
Yao Xiao57892a22022-06-28 19:21:41464 std::move(callback).Run(
465 mojom::WebUIGetBrowsingTopicsStateResult::NewOverrideStatusMessage(
466 "State loading hasn't finished. Please retry shortly."));
467 return;
Yao Xiao21f1faa2022-04-29 06:20:32468 }
469
Yao Xiao57892a22022-06-28 19:21:41470 // If a calculation is already in progress, get the webui topics state after
471 // the calculation is done. Do this regardless of whether `calculate_now` is
472 // true, i.e. if `calculate_now` is true, this request is effectively merged
473 // with the in progress calculation.
474 if (topics_calculator_) {
475 get_state_for_webui_callbacks_.push_back(std::move(callback));
476 return;
Yao Xiao21f1faa2022-04-29 06:20:32477 }
478
Yao Xiao57892a22022-06-28 19:21:41479 DCHECK(schedule_calculate_timer_.IsRunning());
Yao Xiao21f1faa2022-04-29 06:20:32480
Yao Xiao57892a22022-06-28 19:21:41481 if (calculate_now) {
482 get_state_for_webui_callbacks_.push_back(std::move(callback));
Yao Xiao21f1faa2022-04-29 06:20:32483
Yao Xiao57892a22022-06-28 19:21:41484 schedule_calculate_timer_.AbandonAndStop();
485 CalculateBrowsingTopics();
486 return;
Yao Xiao21f1faa2022-04-29 06:20:32487 }
488
Yao Xiao57892a22022-06-28 19:21:41489 std::move(callback).Run(GetBrowsingTopicsStateForWebUiHelper());
Yao Xiao21f1faa2022-04-29 06:20:32490}
491
Yao Xiao7a1995b2022-03-09 08:18:55492std::vector<privacy_sandbox::CanonicalTopic>
Yao Xiao7a1995b2022-03-09 08:18:55493BrowsingTopicsServiceImpl::GetTopTopicsForDisplay() const {
Yao Xiaocc379392022-03-25 21:39:06494 if (!browsing_topics_state_loaded_)
495 return {};
496
497 std::vector<privacy_sandbox::CanonicalTopic> result;
498
499 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
Yao Xiaobf39e34d2022-03-28 21:48:28500 DCHECK_LE(epoch.padded_top_topics_start_index(),
501 epoch.top_topics_and_observing_domains().size());
502
503 for (size_t i = 0; i < epoch.padded_top_topics_start_index(); ++i) {
504 const TopicAndDomains& topic_and_domains =
505 epoch.top_topics_and_observing_domains()[i];
506
507 if (!topic_and_domains.IsValid())
Yao Xiaocc379392022-03-25 21:39:06508 continue;
509
Yao Xiaobf39e34d2022-03-28 21:48:28510 // A top topic can never be in the disallowed state (i.e. it will be
511 // cleared when it becomes diallowed).
512 DCHECK(privacy_sandbox_settings_->IsTopicAllowed(
513 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
514 epoch.taxonomy_version())));
515
516 result.emplace_back(topic_and_domains.topic(), epoch.taxonomy_version());
Yao Xiaocc379392022-03-25 21:39:06517 }
518 }
519
520 return result;
521}
522
Robert Ogdenad99d6f62023-05-01 21:40:09523Annotator* BrowsingTopicsServiceImpl::GetAnnotator() {
524 return annotator_.get();
525}
526
Yao Xiaobf39e34d2022-03-28 21:48:28527void BrowsingTopicsServiceImpl::ClearTopic(
528 const privacy_sandbox::CanonicalTopic& canonical_topic) {
529 if (!browsing_topics_state_loaded_)
530 return;
531
532 browsing_topics_state_.ClearTopic(canonical_topic.topic_id(),
533 canonical_topic.taxonomy_version());
534}
535
536void BrowsingTopicsServiceImpl::ClearTopicsDataForOrigin(
537 const url::Origin& origin) {
538 if (!browsing_topics_state_loaded_)
539 return;
540
541 std::string context_domain =
542 net::registry_controlled_domains::GetDomainAndRegistry(
543 origin.GetURL(),
544 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
545
546 HashedDomain hashed_context_domain = HashContextDomainForStorage(
547 browsing_topics_state_.hmac_key(), context_domain);
548
549 browsing_topics_state_.ClearContextDomain(hashed_context_domain);
550 site_data_manager_->ClearContextDomain(hashed_context_domain);
551}
552
553void BrowsingTopicsServiceImpl::ClearAllTopicsData() {
554 if (!browsing_topics_state_loaded_)
555 return;
556
557 browsing_topics_state_.ClearAllTopics();
558 site_data_manager_->ExpireDataBefore(base::Time::Now());
559}
560
Yao Xiaocc379392022-03-25 21:39:06561std::unique_ptr<BrowsingTopicsCalculator>
562BrowsingTopicsServiceImpl::CreateCalculator(
563 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
564 history::HistoryService* history_service,
565 content::BrowsingTopicsSiteDataManager* site_data_manager,
Robert Ogdenad99d6f62023-05-01 21:40:09566 Annotator* annotator,
Yao Xiao57892a22022-06-28 19:21:41567 const base::circular_deque<EpochTopics>& epochs,
Yao Xiaocc379392022-03-25 21:39:06568 BrowsingTopicsCalculator::CalculateCompletedCallback callback) {
569 return std::make_unique<BrowsingTopicsCalculator>(
Robert Ogdenad99d6f62023-05-01 21:40:09570 privacy_sandbox_settings, history_service, site_data_manager, annotator,
571 epochs, std::move(callback));
Yao Xiaocc379392022-03-25 21:39:06572}
573
574const BrowsingTopicsState& BrowsingTopicsServiceImpl::browsing_topics_state() {
575 return browsing_topics_state_;
576}
577
578void BrowsingTopicsServiceImpl::ScheduleBrowsingTopicsCalculation(
579 base::TimeDelta delay) {
580 DCHECK(browsing_topics_state_loaded_);
581
582 // `this` owns the timer, which is automatically cancelled on destruction, so
583 // base::Unretained(this) is safe.
584 schedule_calculate_timer_.Start(
585 FROM_HERE, delay,
586 base::BindOnce(&BrowsingTopicsServiceImpl::CalculateBrowsingTopics,
587 base::Unretained(this)));
588}
589
590void BrowsingTopicsServiceImpl::CalculateBrowsingTopics() {
591 DCHECK(browsing_topics_state_loaded_);
592
593 DCHECK(!topics_calculator_);
594
595 // `this` owns `topics_calculator_` so `topics_calculator_` should not invoke
596 // the callback once it's destroyed.
597 topics_calculator_ = CreateCalculator(
598 privacy_sandbox_settings_, history_service_, site_data_manager_,
Robert Ogdenad99d6f62023-05-01 21:40:09599 annotator_.get(), browsing_topics_state_.epochs(),
Yao Xiaocc379392022-03-25 21:39:06600 base::BindOnce(
601 &BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted,
602 base::Unretained(this)));
603}
604
605void BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted(
606 EpochTopics epoch_topics) {
607 DCHECK(browsing_topics_state_loaded_);
608
609 DCHECK(topics_calculator_);
610 topics_calculator_.reset();
611
612 browsing_topics_state_.AddEpoch(std::move(epoch_topics));
613 browsing_topics_state_.UpdateNextScheduledCalculationTime();
614
615 ScheduleBrowsingTopicsCalculation(
616 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
Yao Xiao57892a22022-06-28 19:21:41617
618 if (!get_state_for_webui_callbacks_.empty()) {
619 mojom::WebUIGetBrowsingTopicsStateResultPtr webui_state =
620 GetBrowsingTopicsStateForWebUiHelper();
621
622 for (auto& callback : get_state_for_webui_callbacks_) {
623 std::move(callback).Run(webui_state->Clone());
624 }
625
626 get_state_for_webui_callbacks_.clear();
627 }
Yao Xiaocc379392022-03-25 21:39:06628}
629
630void BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded() {
631 DCHECK(!browsing_topics_state_loaded_);
632 browsing_topics_state_loaded_ = true;
633
634 base::Time browsing_topics_data_sccessible_since =
635 privacy_sandbox_settings_->TopicsDataAccessibleSince();
636
637 StartupCalculateDecision decision = GetStartupCalculationDecision(
Yao Xiaobf39e34d2022-03-28 21:48:28638 browsing_topics_state_, browsing_topics_data_sccessible_since,
639 base::BindRepeating(
640 &privacy_sandbox::PrivacySandboxSettings::IsTopicAllowed,
641 base::Unretained(privacy_sandbox_settings_)));
Yao Xiaocc379392022-03-25 21:39:06642
Abigail Katcoff02cceaa2023-03-27 16:22:32643 if (decision.clear_all_topics_data) {
Yao Xiaocc379392022-03-25 21:39:06644 browsing_topics_state_.ClearAllTopics();
Abigail Katcoff02cceaa2023-03-27 16:22:32645 } else if (!decision.topics_to_clear.empty()) {
646 for (const privacy_sandbox::CanonicalTopic& canonical_topic :
647 decision.topics_to_clear) {
648 browsing_topics_state_.ClearTopic(canonical_topic.topic_id(),
649 canonical_topic.taxonomy_version());
650 }
651 }
Yao Xiaocc379392022-03-25 21:39:06652
653 site_data_manager_->ExpireDataBefore(browsing_topics_data_sccessible_since);
654
655 ScheduleBrowsingTopicsCalculation(decision.next_calculation_delay);
656}
657
658void BrowsingTopicsServiceImpl::Shutdown() {
659 privacy_sandbox_settings_observation_.Reset();
660 history_service_observation_.Reset();
661}
662
663void BrowsingTopicsServiceImpl::OnTopicsDataAccessibleSinceUpdated() {
664 if (!browsing_topics_state_loaded_)
665 return;
666
Yao Xiaobf39e34d2022-03-28 21:48:28667 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
668 // only be updated to base::Time::Now() due to data deletion. In this case, we
669 // should just clear all topics.
670 browsing_topics_state_.ClearAllTopics();
671 site_data_manager_->ExpireDataBefore(
672 privacy_sandbox_settings_->TopicsDataAccessibleSince());
Yao Xiaocc379392022-03-25 21:39:06673
674 // Abort the outstanding topics calculation and restart immediately.
675 if (topics_calculator_) {
676 DCHECK(!schedule_calculate_timer_.IsRunning());
677
678 topics_calculator_.reset();
679 CalculateBrowsingTopics();
680 }
681}
682
683void BrowsingTopicsServiceImpl::OnURLsDeleted(
684 history::HistoryService* history_service,
685 const history::DeletionInfo& deletion_info) {
686 if (!browsing_topics_state_loaded_)
687 return;
688
689 // Ignore invalid time_range.
690 if (!deletion_info.IsAllHistory() && !deletion_info.time_range().IsValid())
691 return;
692
693 for (size_t i = 0; i < browsing_topics_state_.epochs().size(); ++i) {
694 const EpochTopics& epoch_topics = browsing_topics_state_.epochs()[i];
695
696 if (epoch_topics.empty())
697 continue;
698
Yao Xiao57892a22022-06-28 19:21:41699 // The typical case is assumed here. We cannot always derive the original
700 // history start time, as the necessary data (e.g. its previous epoch's
701 // calculation time) may have been gone.
702 base::Time history_data_start_time =
703 epoch_topics.calculation_time() -
704 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get();
705
Yao Xiaocc379392022-03-25 21:39:06706 bool time_range_overlap =
707 epoch_topics.calculation_time() >= deletion_info.time_range().begin() &&
Yao Xiao57892a22022-06-28 19:21:41708 history_data_start_time <= deletion_info.time_range().end();
Yao Xiaocc379392022-03-25 21:39:06709
710 if (time_range_overlap)
711 browsing_topics_state_.ClearOneEpoch(i);
712 }
713
714 // If there's an outstanding topics calculation, abort and restart it.
715 if (topics_calculator_) {
716 DCHECK(!schedule_calculate_timer_.IsRunning());
717
718 topics_calculator_.reset();
719 CalculateBrowsingTopics();
720 }
Yao Xiao7a1995b2022-03-09 08:18:55721}
722
Yao Xiao57892a22022-06-28 19:21:41723mojom::WebUIGetBrowsingTopicsStateResultPtr
724BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUiHelper() {
725 DCHECK(browsing_topics_state_loaded_);
726 DCHECK(!topics_calculator_);
727
728 auto webui_state = mojom::WebUIBrowsingTopicsState::New();
729
730 webui_state->next_scheduled_calculation_time =
731 browsing_topics_state_.next_scheduled_calculation_time();
732
733 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
734 DCHECK_LE(epoch.padded_top_topics_start_index(),
735 epoch.top_topics_and_observing_domains().size());
736
737 // Note: for a failed epoch calculation, the default zero-initialized values
738 // will be displayed in the Web UI.
739 auto webui_epoch = mojom::WebUIEpoch::New();
740 webui_epoch->calculation_time = epoch.calculation_time();
741 webui_epoch->model_version = base::NumberToString(epoch.model_version());
742 webui_epoch->taxonomy_version =
743 base::NumberToString(epoch.taxonomy_version());
744
745 for (size_t i = 0; i < epoch.top_topics_and_observing_domains().size();
746 ++i) {
747 const TopicAndDomains& topic_and_domains =
748 epoch.top_topics_and_observing_domains()[i];
749
750 privacy_sandbox::CanonicalTopic canonical_topic =
751 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
752 epoch.taxonomy_version());
753
754 std::vector<std::string> webui_observed_by_domains;
755 webui_observed_by_domains.reserve(
756 topic_and_domains.hashed_domains().size());
757 for (const auto& domain : topic_and_domains.hashed_domains()) {
758 webui_observed_by_domains.push_back(
759 base::NumberToString(domain.value()));
760 }
761
762 // Note: if the topic is invalid (i.e. cleared), the output `topic_id`
763 // will be 0; if the topic is invalid, or if the taxonomy version isn't
764 // recognized by this Chrome binary, the output `topic_name` will be
765 // "Unknown".
766 auto webui_topic = mojom::WebUITopic::New();
767 webui_topic->topic_id = topic_and_domains.topic().value();
768 webui_topic->topic_name = canonical_topic.GetLocalizedRepresentation();
769 webui_topic->is_real_topic = (i < epoch.padded_top_topics_start_index());
770 webui_topic->observed_by_domains = std::move(webui_observed_by_domains);
771
772 webui_epoch->topics.push_back(std::move(webui_topic));
773 }
774
775 webui_state->epochs.push_back(std::move(webui_epoch));
776 }
777
778 // Reorder the epochs from latest to oldest.
779 base::ranges::reverse(webui_state->epochs);
780
781 return mojom::WebUIGetBrowsingTopicsStateResult::NewBrowsingTopicsState(
782 std::move(webui_state));
783}
784
Yao Xiao7a1995b2022-03-09 08:18:55785} // namespace browsing_topics