blob: e3d5b666448d32bf9c520abfee01d7bc8c6bd6cb [file] [log] [blame]
Avi Drissman8ba1bad2022-09-13 19:22:361// Copyright 2022 The Chromium Authors
Yao Xiao7a1995b2022-03-09 08:18:552// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/browsing_topics/browsing_topics_service_impl.h"
6
Yao Xiaocc379392022-03-25 21:39:067#include <random>
8
9#include "base/rand_util.h"
Yao Xiao57892a22022-06-28 19:21:4110#include "base/ranges/algorithm.h"
Tommy C. Li088b42f2022-11-15 00:51:2811#include "base/strings/strcat.h"
Gabriel Charetted87f10f2022-03-31 00:44:2212#include "base/time/time.h"
Yao Xiaocc379392022-03-25 21:39:0613#include "components/browsing_topics/browsing_topics_calculator.h"
14#include "components/browsing_topics/browsing_topics_page_load_data_tracker.h"
Yao Xiao84826f42022-10-24 16:13:5815#include "components/browsing_topics/common/common_types.h"
Yao Xiao21f1faa2022-04-29 06:20:3216#include "components/browsing_topics/mojom/browsing_topics_internals.mojom.h"
Yao Xiaocc379392022-03-25 21:39:0617#include "components/browsing_topics/util.h"
Yao Xiao3a03e602022-10-18 18:17:5618#include "components/content_settings/browser/page_specific_content_settings.h"
Findit3a850b6e2022-09-08 08:39:4319#include "components/optimization_guide/content/browser/page_content_annotations_service.h"
Yao Xiaocc379392022-03-25 21:39:0620#include "content/public/browser/browsing_topics_site_data_manager.h"
21#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
Yao Xiao716e4812022-04-20 22:57:1922#include "services/metrics/public/cpp/ukm_builders.h"
23#include "services/metrics/public/cpp/ukm_recorder.h"
Yao Xiaocc379392022-03-25 21:39:0624#include "third_party/blink/public/common/features.h"
25#include "third_party/blink/public/mojom/browsing_topics/browsing_topics.mojom.h"
26
Yao Xiao7a1995b2022-03-09 08:18:5527namespace browsing_topics {
28
Yao Xiaocc379392022-03-25 21:39:0629namespace {
30
Yao Xiaobf39e34d2022-03-28 21:48:2831// Returns whether the topics should all be cleared given
32// `browsing_topics_data_accessible_since` and `is_topic_allowed_by_settings`.
33// Returns true if `browsing_topics_data_accessible_since` is greater than the
34// last calculation time, or if any top topic is disallowed from the settings.
35// The latter could happen if the topic became disallowed when
36// `browsing_topics_state` was still loading (and we didn't get a chance to
37// clear it). This is an unlikely edge case, so it's fine to over-delete.
38bool ShouldClearTopicsOnStartup(
Yao Xiaocc379392022-03-25 21:39:0639 const BrowsingTopicsState& browsing_topics_state,
Yao Xiaobf39e34d2022-03-28 21:48:2840 base::Time browsing_topics_data_accessible_since,
41 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
42 is_topic_allowed_by_settings) {
43 DCHECK(!is_topic_allowed_by_settings.is_null());
44
45 if (browsing_topics_state.epochs().empty())
46 return false;
47
Yao Xiaocc379392022-03-25 21:39:0648 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
49 // only be updated to base::Time::Now() due to data deletion. So we'll either
50 // need to clear all topics data, or no-op. If this assumption no longer
51 // holds, we'd need to iterate over all epochs, check their calculation time,
52 // and selectively delete the epochs.
Yao Xiaobf39e34d2022-03-28 21:48:2853 if (browsing_topics_data_accessible_since >
54 browsing_topics_state.epochs().back().calculation_time()) {
55 return true;
56 }
57
58 for (const EpochTopics& epoch : browsing_topics_state.epochs()) {
59 for (const TopicAndDomains& topic_and_domains :
60 epoch.top_topics_and_observing_domains()) {
61 if (!topic_and_domains.IsValid())
62 continue;
63
64 if (!is_topic_allowed_by_settings.Run(privacy_sandbox::CanonicalTopic(
65 topic_and_domains.topic(), epoch.taxonomy_version()))) {
66 return true;
67 }
68 }
69 }
70
71 return false;
Yao Xiaocc379392022-03-25 21:39:0672}
73
74struct StartupCalculateDecision {
75 bool clear_topics_data = true;
76 base::TimeDelta next_calculation_delay;
77};
78
79StartupCalculateDecision GetStartupCalculationDecision(
80 const BrowsingTopicsState& browsing_topics_state,
Yao Xiaobf39e34d2022-03-28 21:48:2881 base::Time browsing_topics_data_accessible_since,
82 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
83 is_topic_allowed_by_settings) {
Yao Xiaocc379392022-03-25 21:39:0684 // The topics have never been calculated. This could happen with a fresh
85 // profile or the if the config has updated. In case of a config update, the
86 // topics should have already been cleared when initializing the
87 // `BrowsingTopicsState`.
88 if (browsing_topics_state.next_scheduled_calculation_time().is_null()) {
89 return StartupCalculateDecision{
90 .clear_topics_data = false,
91 .next_calculation_delay = base::TimeDelta()};
92 }
93
94 // This could happen when clear-on-exit is turned on and has caused the
Yao Xiaobf39e34d2022-03-28 21:48:2895 // cookies to be deleted on startup, of if a topic became disallowed when
96 // `browsing_topics_state` was still loading.
97 bool should_clear_topics_data = ShouldClearTopicsOnStartup(
98 browsing_topics_state, browsing_topics_data_accessible_since,
99 is_topic_allowed_by_settings);
Yao Xiaocc379392022-03-25 21:39:06100
101 base::TimeDelta presumed_next_calculation_delay =
102 browsing_topics_state.next_scheduled_calculation_time() -
103 base::Time::Now();
104
105 // The scheduled calculation time was reached before the startup.
106 if (presumed_next_calculation_delay <= base::TimeDelta()) {
107 return StartupCalculateDecision{
108 .clear_topics_data = should_clear_topics_data,
109 .next_calculation_delay = base::TimeDelta()};
110 }
111
112 // This could happen if the machine time has changed since the last
113 // calculation. Recalculate immediately to align with the expected schedule
114 // rather than potentially stop computing for a very long time.
115 if (presumed_next_calculation_delay >=
116 2 * blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get()) {
117 return StartupCalculateDecision{
118 .clear_topics_data = should_clear_topics_data,
119 .next_calculation_delay = base::TimeDelta()};
120 }
121
122 return StartupCalculateDecision{
123 .clear_topics_data = should_clear_topics_data,
124 .next_calculation_delay = presumed_next_calculation_delay};
125}
126
Yao Xiao716e4812022-04-20 22:57:19127void RecordBrowsingTopicsApiResultUkmMetrics(
Yao Xiao84826f42022-10-24 16:13:58128 ApiAccessFailureReason failure_reason,
Yao Xiao9c789ea2022-10-26 14:46:55129 content::RenderFrameHost* main_frame,
130 bool is_get_topics_request) {
131 // The `BrowsingTopics_DocumentBrowsingTopicsApiResult2` event is only
132 // recorded for request that gets the topics.
133 if (!is_get_topics_request)
134 return;
135
Yao Xiao716e4812022-04-20 22:57:19136 ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
Yao Xiao84826f42022-10-24 16:13:58137 ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult2 builder(
Yao Xiao716e4812022-04-20 22:57:19138 main_frame->GetPageUkmSourceId());
Yao Xiao84826f42022-10-24 16:13:58139 builder.SetFailureReason(static_cast<int64_t>(failure_reason));
Yao Xiao716e4812022-04-20 22:57:19140 builder.Record(ukm_recorder->Get());
141}
142
143void RecordBrowsingTopicsApiResultUkmMetrics(
Yao Xiao84826f42022-10-24 16:13:58144 const std::vector<CandidateTopic>& valid_candidate_topics,
Yao Xiao716e4812022-04-20 22:57:19145 content::RenderFrameHost* main_frame) {
Yao Xiao716e4812022-04-20 22:57:19146 ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
Yao Xiao84826f42022-10-24 16:13:58147 ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult2 builder(
Yao Xiao716e4812022-04-20 22:57:19148 main_frame->GetPageUkmSourceId());
149
Yao Xiao84826f42022-10-24 16:13:58150 for (size_t i = 0; i < 3u && valid_candidate_topics.size() > i; ++i) {
151 const CandidateTopic& candidate_topic = valid_candidate_topics[i];
Yao Xiao716e4812022-04-20 22:57:19152
Yao Xiao84826f42022-10-24 16:13:58153 DCHECK(candidate_topic.IsValid());
Yao Xiao716e4812022-04-20 22:57:19154
155 if (i == 0) {
Yao Xiao84826f42022-10-24 16:13:58156 builder.SetCandidateTopic0(candidate_topic.topic().value())
157 .SetCandidateTopic0IsTrueTopTopic(candidate_topic.is_true_topic())
158 .SetCandidateTopic0ShouldBeFiltered(
159 candidate_topic.should_be_filtered())
160 .SetCandidateTopic0TaxonomyVersion(candidate_topic.taxonomy_version())
161 .SetCandidateTopic0ModelVersion(candidate_topic.model_version());
Yao Xiao716e4812022-04-20 22:57:19162 } else if (i == 1) {
Yao Xiao84826f42022-10-24 16:13:58163 builder.SetCandidateTopic1(candidate_topic.topic().value())
164 .SetCandidateTopic1IsTrueTopTopic(candidate_topic.is_true_topic())
165 .SetCandidateTopic1ShouldBeFiltered(
166 candidate_topic.should_be_filtered())
167 .SetCandidateTopic1TaxonomyVersion(candidate_topic.taxonomy_version())
168 .SetCandidateTopic1ModelVersion(candidate_topic.model_version());
Yao Xiao716e4812022-04-20 22:57:19169 } else {
170 DCHECK_EQ(i, 2u);
Yao Xiao84826f42022-10-24 16:13:58171 builder.SetCandidateTopic2(candidate_topic.topic().value())
172 .SetCandidateTopic2IsTrueTopTopic(candidate_topic.is_true_topic())
173 .SetCandidateTopic2ShouldBeFiltered(
174 candidate_topic.should_be_filtered())
175 .SetCandidateTopic2TaxonomyVersion(candidate_topic.taxonomy_version())
176 .SetCandidateTopic2ModelVersion(candidate_topic.model_version());
Yao Xiao716e4812022-04-20 22:57:19177 }
178 }
179
180 builder.Record(ukm_recorder->Get());
181}
182
Yao Xiaocc379392022-03-25 21:39:06183} // namespace
184
Yao Xiao7a1995b2022-03-09 08:18:55185BrowsingTopicsServiceImpl::~BrowsingTopicsServiceImpl() = default;
186
Yao Xiaocc379392022-03-25 21:39:06187BrowsingTopicsServiceImpl::BrowsingTopicsServiceImpl(
188 const base::FilePath& profile_path,
189 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
190 history::HistoryService* history_service,
191 content::BrowsingTopicsSiteDataManager* site_data_manager,
192 optimization_guide::PageContentAnnotationsService* annotations_service)
193 : privacy_sandbox_settings_(privacy_sandbox_settings),
194 history_service_(history_service),
195 site_data_manager_(site_data_manager),
196 annotations_service_(annotations_service),
197 browsing_topics_state_(
198 profile_path,
199 base::BindOnce(
200 &BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded,
201 base::Unretained(this))) {
202 privacy_sandbox_settings_observation_.Observe(privacy_sandbox_settings);
203 history_service_observation_.Observe(history_service);
204
Yao Xiaobc1241a2022-03-29 05:23:37205 // Greedily request the model to be available to reduce the latency in later
206 // topics calculation.
Yao Xiaocc379392022-03-25 21:39:06207 annotations_service_->RequestAndNotifyWhenModelAvailable(
208 optimization_guide::AnnotationType::kPageTopics, base::DoNothing());
209}
210
Yao Xiao9c789ea2022-10-26 14:46:55211bool BrowsingTopicsServiceImpl::HandleTopicsWebApi(
Yao Xiaocc379392022-03-25 21:39:06212 const url::Origin& context_origin,
Yao Xiao1d60ed32022-09-27 16:33:24213 content::RenderFrameHost* main_frame,
Yao Xiao9c789ea2022-10-26 14:46:55214 ApiCallerSource caller_source,
215 bool get_topics,
216 bool observe,
217 std::vector<blink::mojom::EpochTopicPtr>& topics) {
218 DCHECK(topics.empty());
219 DCHECK(get_topics || observe);
220
Yao Xiao716e4812022-04-20 22:57:19221 if (!browsing_topics_state_loaded_) {
222 RecordBrowsingTopicsApiResultUkmMetrics(
Yao Xiao9c789ea2022-10-26 14:46:55223 ApiAccessFailureReason::kStateNotReady, main_frame, get_topics);
224 return false;
Yao Xiao716e4812022-04-20 22:57:19225 }
Yao Xiaocc379392022-03-25 21:39:06226
Yao Xiao716e4812022-04-20 22:57:19227 if (!privacy_sandbox_settings_->IsTopicsAllowed()) {
228 RecordBrowsingTopicsApiResultUkmMetrics(
Yao Xiao9c789ea2022-10-26 14:46:55229 ApiAccessFailureReason::kAccessDisallowedBySettings, main_frame,
230 get_topics);
231 return false;
Yao Xiao716e4812022-04-20 22:57:19232 }
Yao Xiaocc379392022-03-25 21:39:06233
234 if (!privacy_sandbox_settings_->IsTopicsAllowedForContext(
Rohit Agarwalaac12df2022-12-20 18:15:23235 /*top_frame_origin=*/main_frame->GetLastCommittedOrigin(),
236 context_origin.GetURL())) {
Yao Xiao716e4812022-04-20 22:57:19237 RecordBrowsingTopicsApiResultUkmMetrics(
Yao Xiao9c789ea2022-10-26 14:46:55238 ApiAccessFailureReason::kAccessDisallowedBySettings, main_frame,
239 get_topics);
240 return false;
Yao Xiaocc379392022-03-25 21:39:06241 }
242
243 std::string context_domain =
244 net::registry_controlled_domains::GetDomainAndRegistry(
245 context_origin.GetURL(),
246 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
247
248 HashedDomain hashed_context_domain = HashContextDomainForStorage(
249 browsing_topics_state_.hmac_key(), context_domain);
250
Yao Xiao1d60ed32022-09-27 16:33:24251 if (observe) {
252 // Track the API usage context after the permissions check.
253 BrowsingTopicsPageLoadDataTracker::GetOrCreateForPage(main_frame->GetPage())
254 ->OnBrowsingTopicsApiUsed(hashed_context_domain, history_service_);
255 }
Yao Xiaocc379392022-03-25 21:39:06256
Yao Xiao9c789ea2022-10-26 14:46:55257 if (!get_topics)
258 return true;
259
Yao Xiaocc379392022-03-25 21:39:06260 std::string top_domain =
261 net::registry_controlled_domains::GetDomainAndRegistry(
262 main_frame->GetLastCommittedOrigin().GetURL(),
263 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
264
Yao Xiao84826f42022-10-24 16:13:58265 std::vector<CandidateTopic> valid_candidate_topics;
Yao Xiao716e4812022-04-20 22:57:19266
Yao Xiaocc379392022-03-25 21:39:06267 for (const EpochTopics* epoch :
268 browsing_topics_state_.EpochsForSite(top_domain)) {
Yao Xiao414312992022-10-18 20:25:11269 CandidateTopic candidate_topic = epoch->CandidateTopicForSite(
270 top_domain, hashed_context_domain, browsing_topics_state_.hmac_key());
Yao Xiao716e4812022-04-20 22:57:19271
Yao Xiao414312992022-10-18 20:25:11272 if (!candidate_topic.IsValid())
Yao Xiaocc379392022-03-25 21:39:06273 continue;
274
Yao Xiaobf39e34d2022-03-28 21:48:28275 // Although a top topic can never be in the disallowed state, the returned
Yao Xiao414312992022-10-18 20:25:11276 // `candidate_topic` may be the random one. Thus we still need this check.
Yao Xiaocc379392022-03-25 21:39:06277 if (!privacy_sandbox_settings_->IsTopicAllowed(
Yao Xiao414312992022-10-18 20:25:11278 privacy_sandbox::CanonicalTopic(
279 candidate_topic.topic(), candidate_topic.taxonomy_version()))) {
280 DCHECK(!candidate_topic.is_true_topic());
Yao Xiaobf39e34d2022-03-28 21:48:28281 continue;
Yao Xiaocc379392022-03-25 21:39:06282 }
283
Yao Xiao84826f42022-10-24 16:13:58284 valid_candidate_topics.push_back(std::move(candidate_topic));
285 }
286
287 RecordBrowsingTopicsApiResultUkmMetrics(valid_candidate_topics, main_frame);
288
Yao Xiao84826f42022-10-24 16:13:58289 for (const CandidateTopic& candidate_topic : valid_candidate_topics) {
290 if (candidate_topic.should_be_filtered())
291 continue;
292
Yao Xiao3a03e602022-10-18 18:17:56293 // `PageSpecificContentSettings` should only observe true top topics
294 // accessed on the page. It's okay to notify the same topic multiple
295 // times even though duplicate topics will be removed in the end.
Yao Xiao414312992022-10-18 20:25:11296 if (candidate_topic.is_true_topic()) {
Yao Xiao3a03e602022-10-18 18:17:56297 privacy_sandbox::CanonicalTopic canonical_topic(
Yao Xiao414312992022-10-18 20:25:11298 candidate_topic.topic(), candidate_topic.taxonomy_version());
Yao Xiao3a03e602022-10-18 18:17:56299 content_settings::PageSpecificContentSettings::TopicAccessed(
300 main_frame, context_origin, /*blocked_by_policy=*/false,
301 canonical_topic);
302 }
303
Yao Xiao21f1faa2022-04-29 06:20:32304 auto result_topic = blink::mojom::EpochTopic::New();
Yao Xiao414312992022-10-18 20:25:11305 result_topic->topic = candidate_topic.topic().value();
Yao Xiaocc379392022-03-25 21:39:06306 result_topic->config_version = base::StrCat(
307 {"chrome.", base::NumberToString(
308 blink::features::kBrowsingTopicsConfigVersion.Get())});
Yao Xiao414312992022-10-18 20:25:11309 result_topic->model_version =
310 base::NumberToString(candidate_topic.model_version());
Yao Xiaocc379392022-03-25 21:39:06311 result_topic->taxonomy_version =
Yao Xiao414312992022-10-18 20:25:11312 base::NumberToString(candidate_topic.taxonomy_version());
Yao Xiaocc379392022-03-25 21:39:06313 result_topic->version = base::StrCat({result_topic->config_version, ":",
314 result_topic->taxonomy_version, ":",
315 result_topic->model_version});
Yao Xiao9c789ea2022-10-26 14:46:55316 topics.emplace_back(std::move(result_topic));
Yao Xiaocc379392022-03-25 21:39:06317 }
318
Yao Xiao9c789ea2022-10-26 14:46:55319 std::sort(topics.begin(), topics.end());
Yao Xiao716e4812022-04-20 22:57:19320
Yao Xiao84826f42022-10-24 16:13:58321 // Remove duplicate entries.
Yao Xiao9c789ea2022-10-26 14:46:55322 topics.erase(std::unique(topics.begin(), topics.end()), topics.end());
Yao Xiaocc379392022-03-25 21:39:06323
Yao Xiao9c789ea2022-10-26 14:46:55324 return true;
Yao Xiaocc379392022-03-25 21:39:06325}
Yao Xiao7a1995b2022-03-09 08:18:55326
Yao Xiao57892a22022-06-28 19:21:41327void BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUi(
328 bool calculate_now,
329 mojom::PageHandler::GetBrowsingTopicsStateCallback callback) {
Yao Xiao21f1faa2022-04-29 06:20:32330 if (!browsing_topics_state_loaded_) {
Yao Xiao57892a22022-06-28 19:21:41331 std::move(callback).Run(
332 mojom::WebUIGetBrowsingTopicsStateResult::NewOverrideStatusMessage(
333 "State loading hasn't finished. Please retry shortly."));
334 return;
Yao Xiao21f1faa2022-04-29 06:20:32335 }
336
Yao Xiao57892a22022-06-28 19:21:41337 // If a calculation is already in progress, get the webui topics state after
338 // the calculation is done. Do this regardless of whether `calculate_now` is
339 // true, i.e. if `calculate_now` is true, this request is effectively merged
340 // with the in progress calculation.
341 if (topics_calculator_) {
342 get_state_for_webui_callbacks_.push_back(std::move(callback));
343 return;
Yao Xiao21f1faa2022-04-29 06:20:32344 }
345
Yao Xiao57892a22022-06-28 19:21:41346 DCHECK(schedule_calculate_timer_.IsRunning());
Yao Xiao21f1faa2022-04-29 06:20:32347
Yao Xiao57892a22022-06-28 19:21:41348 if (calculate_now) {
349 get_state_for_webui_callbacks_.push_back(std::move(callback));
Yao Xiao21f1faa2022-04-29 06:20:32350
Yao Xiao57892a22022-06-28 19:21:41351 schedule_calculate_timer_.AbandonAndStop();
352 CalculateBrowsingTopics();
353 return;
Yao Xiao21f1faa2022-04-29 06:20:32354 }
355
Yao Xiao57892a22022-06-28 19:21:41356 std::move(callback).Run(GetBrowsingTopicsStateForWebUiHelper());
Yao Xiao21f1faa2022-04-29 06:20:32357}
358
Yao Xiao7a1995b2022-03-09 08:18:55359std::vector<privacy_sandbox::CanonicalTopic>
Yao Xiao7a1995b2022-03-09 08:18:55360BrowsingTopicsServiceImpl::GetTopTopicsForDisplay() const {
Yao Xiaocc379392022-03-25 21:39:06361 if (!browsing_topics_state_loaded_)
362 return {};
363
364 std::vector<privacy_sandbox::CanonicalTopic> result;
365
366 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
Yao Xiaobf39e34d2022-03-28 21:48:28367 DCHECK_LE(epoch.padded_top_topics_start_index(),
368 epoch.top_topics_and_observing_domains().size());
369
370 for (size_t i = 0; i < epoch.padded_top_topics_start_index(); ++i) {
371 const TopicAndDomains& topic_and_domains =
372 epoch.top_topics_and_observing_domains()[i];
373
374 if (!topic_and_domains.IsValid())
Yao Xiaocc379392022-03-25 21:39:06375 continue;
376
Yao Xiaobf39e34d2022-03-28 21:48:28377 // A top topic can never be in the disallowed state (i.e. it will be
378 // cleared when it becomes diallowed).
379 DCHECK(privacy_sandbox_settings_->IsTopicAllowed(
380 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
381 epoch.taxonomy_version())));
382
383 result.emplace_back(topic_and_domains.topic(), epoch.taxonomy_version());
Yao Xiaocc379392022-03-25 21:39:06384 }
385 }
386
387 return result;
388}
389
Yao Xiaobf39e34d2022-03-28 21:48:28390void BrowsingTopicsServiceImpl::ClearTopic(
391 const privacy_sandbox::CanonicalTopic& canonical_topic) {
392 if (!browsing_topics_state_loaded_)
393 return;
394
395 browsing_topics_state_.ClearTopic(canonical_topic.topic_id(),
396 canonical_topic.taxonomy_version());
397}
398
399void BrowsingTopicsServiceImpl::ClearTopicsDataForOrigin(
400 const url::Origin& origin) {
401 if (!browsing_topics_state_loaded_)
402 return;
403
404 std::string context_domain =
405 net::registry_controlled_domains::GetDomainAndRegistry(
406 origin.GetURL(),
407 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
408
409 HashedDomain hashed_context_domain = HashContextDomainForStorage(
410 browsing_topics_state_.hmac_key(), context_domain);
411
412 browsing_topics_state_.ClearContextDomain(hashed_context_domain);
413 site_data_manager_->ClearContextDomain(hashed_context_domain);
414}
415
416void BrowsingTopicsServiceImpl::ClearAllTopicsData() {
417 if (!browsing_topics_state_loaded_)
418 return;
419
420 browsing_topics_state_.ClearAllTopics();
421 site_data_manager_->ExpireDataBefore(base::Time::Now());
422}
423
Yao Xiaocc379392022-03-25 21:39:06424std::unique_ptr<BrowsingTopicsCalculator>
425BrowsingTopicsServiceImpl::CreateCalculator(
426 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
427 history::HistoryService* history_service,
428 content::BrowsingTopicsSiteDataManager* site_data_manager,
429 optimization_guide::PageContentAnnotationsService* annotations_service,
Yao Xiao57892a22022-06-28 19:21:41430 const base::circular_deque<EpochTopics>& epochs,
Yao Xiaocc379392022-03-25 21:39:06431 BrowsingTopicsCalculator::CalculateCompletedCallback callback) {
432 return std::make_unique<BrowsingTopicsCalculator>(
433 privacy_sandbox_settings, history_service, site_data_manager,
Yao Xiao57892a22022-06-28 19:21:41434 annotations_service, epochs, std::move(callback));
Yao Xiaocc379392022-03-25 21:39:06435}
436
437const BrowsingTopicsState& BrowsingTopicsServiceImpl::browsing_topics_state() {
438 return browsing_topics_state_;
439}
440
441void BrowsingTopicsServiceImpl::ScheduleBrowsingTopicsCalculation(
442 base::TimeDelta delay) {
443 DCHECK(browsing_topics_state_loaded_);
444
445 // `this` owns the timer, which is automatically cancelled on destruction, so
446 // base::Unretained(this) is safe.
447 schedule_calculate_timer_.Start(
448 FROM_HERE, delay,
449 base::BindOnce(&BrowsingTopicsServiceImpl::CalculateBrowsingTopics,
450 base::Unretained(this)));
451}
452
453void BrowsingTopicsServiceImpl::CalculateBrowsingTopics() {
454 DCHECK(browsing_topics_state_loaded_);
455
456 DCHECK(!topics_calculator_);
457
458 // `this` owns `topics_calculator_` so `topics_calculator_` should not invoke
459 // the callback once it's destroyed.
460 topics_calculator_ = CreateCalculator(
461 privacy_sandbox_settings_, history_service_, site_data_manager_,
Yao Xiao57892a22022-06-28 19:21:41462 annotations_service_, browsing_topics_state_.epochs(),
Yao Xiaocc379392022-03-25 21:39:06463 base::BindOnce(
464 &BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted,
465 base::Unretained(this)));
466}
467
468void BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted(
469 EpochTopics epoch_topics) {
470 DCHECK(browsing_topics_state_loaded_);
471
472 DCHECK(topics_calculator_);
473 topics_calculator_.reset();
474
475 browsing_topics_state_.AddEpoch(std::move(epoch_topics));
476 browsing_topics_state_.UpdateNextScheduledCalculationTime();
477
478 ScheduleBrowsingTopicsCalculation(
479 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
Yao Xiao57892a22022-06-28 19:21:41480
481 if (!get_state_for_webui_callbacks_.empty()) {
482 mojom::WebUIGetBrowsingTopicsStateResultPtr webui_state =
483 GetBrowsingTopicsStateForWebUiHelper();
484
485 for (auto& callback : get_state_for_webui_callbacks_) {
486 std::move(callback).Run(webui_state->Clone());
487 }
488
489 get_state_for_webui_callbacks_.clear();
490 }
Yao Xiaocc379392022-03-25 21:39:06491}
492
493void BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded() {
494 DCHECK(!browsing_topics_state_loaded_);
495 browsing_topics_state_loaded_ = true;
496
497 base::Time browsing_topics_data_sccessible_since =
498 privacy_sandbox_settings_->TopicsDataAccessibleSince();
499
500 StartupCalculateDecision decision = GetStartupCalculationDecision(
Yao Xiaobf39e34d2022-03-28 21:48:28501 browsing_topics_state_, browsing_topics_data_sccessible_since,
502 base::BindRepeating(
503 &privacy_sandbox::PrivacySandboxSettings::IsTopicAllowed,
504 base::Unretained(privacy_sandbox_settings_)));
Yao Xiaocc379392022-03-25 21:39:06505
506 if (decision.clear_topics_data)
507 browsing_topics_state_.ClearAllTopics();
508
509 site_data_manager_->ExpireDataBefore(browsing_topics_data_sccessible_since);
510
511 ScheduleBrowsingTopicsCalculation(decision.next_calculation_delay);
512}
513
514void BrowsingTopicsServiceImpl::Shutdown() {
515 privacy_sandbox_settings_observation_.Reset();
516 history_service_observation_.Reset();
517}
518
519void BrowsingTopicsServiceImpl::OnTopicsDataAccessibleSinceUpdated() {
520 if (!browsing_topics_state_loaded_)
521 return;
522
Yao Xiaobf39e34d2022-03-28 21:48:28523 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
524 // only be updated to base::Time::Now() due to data deletion. In this case, we
525 // should just clear all topics.
526 browsing_topics_state_.ClearAllTopics();
527 site_data_manager_->ExpireDataBefore(
528 privacy_sandbox_settings_->TopicsDataAccessibleSince());
Yao Xiaocc379392022-03-25 21:39:06529
530 // Abort the outstanding topics calculation and restart immediately.
531 if (topics_calculator_) {
532 DCHECK(!schedule_calculate_timer_.IsRunning());
533
534 topics_calculator_.reset();
535 CalculateBrowsingTopics();
536 }
537}
538
539void BrowsingTopicsServiceImpl::OnURLsDeleted(
540 history::HistoryService* history_service,
541 const history::DeletionInfo& deletion_info) {
542 if (!browsing_topics_state_loaded_)
543 return;
544
545 // Ignore invalid time_range.
546 if (!deletion_info.IsAllHistory() && !deletion_info.time_range().IsValid())
547 return;
548
549 for (size_t i = 0; i < browsing_topics_state_.epochs().size(); ++i) {
550 const EpochTopics& epoch_topics = browsing_topics_state_.epochs()[i];
551
552 if (epoch_topics.empty())
553 continue;
554
Yao Xiao57892a22022-06-28 19:21:41555 // The typical case is assumed here. We cannot always derive the original
556 // history start time, as the necessary data (e.g. its previous epoch's
557 // calculation time) may have been gone.
558 base::Time history_data_start_time =
559 epoch_topics.calculation_time() -
560 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get();
561
Yao Xiaocc379392022-03-25 21:39:06562 bool time_range_overlap =
563 epoch_topics.calculation_time() >= deletion_info.time_range().begin() &&
Yao Xiao57892a22022-06-28 19:21:41564 history_data_start_time <= deletion_info.time_range().end();
Yao Xiaocc379392022-03-25 21:39:06565
566 if (time_range_overlap)
567 browsing_topics_state_.ClearOneEpoch(i);
568 }
569
570 // If there's an outstanding topics calculation, abort and restart it.
571 if (topics_calculator_) {
572 DCHECK(!schedule_calculate_timer_.IsRunning());
573
574 topics_calculator_.reset();
575 CalculateBrowsingTopics();
576 }
Yao Xiao7a1995b2022-03-09 08:18:55577}
578
Yao Xiao57892a22022-06-28 19:21:41579mojom::WebUIGetBrowsingTopicsStateResultPtr
580BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUiHelper() {
581 DCHECK(browsing_topics_state_loaded_);
582 DCHECK(!topics_calculator_);
583
584 auto webui_state = mojom::WebUIBrowsingTopicsState::New();
585
586 webui_state->next_scheduled_calculation_time =
587 browsing_topics_state_.next_scheduled_calculation_time();
588
589 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
590 DCHECK_LE(epoch.padded_top_topics_start_index(),
591 epoch.top_topics_and_observing_domains().size());
592
593 // Note: for a failed epoch calculation, the default zero-initialized values
594 // will be displayed in the Web UI.
595 auto webui_epoch = mojom::WebUIEpoch::New();
596 webui_epoch->calculation_time = epoch.calculation_time();
597 webui_epoch->model_version = base::NumberToString(epoch.model_version());
598 webui_epoch->taxonomy_version =
599 base::NumberToString(epoch.taxonomy_version());
600
601 for (size_t i = 0; i < epoch.top_topics_and_observing_domains().size();
602 ++i) {
603 const TopicAndDomains& topic_and_domains =
604 epoch.top_topics_and_observing_domains()[i];
605
606 privacy_sandbox::CanonicalTopic canonical_topic =
607 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
608 epoch.taxonomy_version());
609
610 std::vector<std::string> webui_observed_by_domains;
611 webui_observed_by_domains.reserve(
612 topic_and_domains.hashed_domains().size());
613 for (const auto& domain : topic_and_domains.hashed_domains()) {
614 webui_observed_by_domains.push_back(
615 base::NumberToString(domain.value()));
616 }
617
618 // Note: if the topic is invalid (i.e. cleared), the output `topic_id`
619 // will be 0; if the topic is invalid, or if the taxonomy version isn't
620 // recognized by this Chrome binary, the output `topic_name` will be
621 // "Unknown".
622 auto webui_topic = mojom::WebUITopic::New();
623 webui_topic->topic_id = topic_and_domains.topic().value();
624 webui_topic->topic_name = canonical_topic.GetLocalizedRepresentation();
625 webui_topic->is_real_topic = (i < epoch.padded_top_topics_start_index());
626 webui_topic->observed_by_domains = std::move(webui_observed_by_domains);
627
628 webui_epoch->topics.push_back(std::move(webui_topic));
629 }
630
631 webui_state->epochs.push_back(std::move(webui_epoch));
632 }
633
634 // Reorder the epochs from latest to oldest.
635 base::ranges::reverse(webui_state->epochs);
636
637 return mojom::WebUIGetBrowsingTopicsStateResult::NewBrowsingTopicsState(
638 std::move(webui_state));
639}
640
Yao Xiao7a1995b2022-03-09 08:18:55641} // namespace browsing_topics