blob: 5a22ee8c8910c25c5fe2aa849bb0ad34f0f79624 [file] [log] [blame]
Avi Drissman8ba1bad2022-09-13 19:22:361// Copyright 2022 The Chromium Authors
Yao Xiao7a1995b2022-03-09 08:18:552// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/browsing_topics/browsing_topics_service_impl.h"
6
Yao Xiaocc379392022-03-25 21:39:067#include <random>
8
9#include "base/rand_util.h"
Yao Xiao57892a22022-06-28 19:21:4110#include "base/ranges/algorithm.h"
Tommy C. Li088b42f2022-11-15 00:51:2811#include "base/strings/strcat.h"
Gabriel Charetted87f10f2022-03-31 00:44:2212#include "base/time/time.h"
Yao Xiaocc379392022-03-25 21:39:0613#include "components/browsing_topics/browsing_topics_calculator.h"
14#include "components/browsing_topics/browsing_topics_page_load_data_tracker.h"
Yao Xiao84826f42022-10-24 16:13:5815#include "components/browsing_topics/common/common_types.h"
Yao Xiao21f1faa2022-04-29 06:20:3216#include "components/browsing_topics/mojom/browsing_topics_internals.mojom.h"
Yao Xiaocc379392022-03-25 21:39:0617#include "components/browsing_topics/util.h"
Yao Xiao3a03e602022-10-18 18:17:5618#include "components/content_settings/browser/page_specific_content_settings.h"
Findit3a850b6e2022-09-08 08:39:4319#include "components/optimization_guide/content/browser/page_content_annotations_service.h"
Yao Xiaocc379392022-03-25 21:39:0620#include "content/public/browser/browsing_topics_site_data_manager.h"
21#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
Yao Xiao716e4812022-04-20 22:57:1922#include "services/metrics/public/cpp/ukm_builders.h"
23#include "services/metrics/public/cpp/ukm_recorder.h"
Yao Xiaocc379392022-03-25 21:39:0624#include "third_party/blink/public/common/features.h"
25#include "third_party/blink/public/mojom/browsing_topics/browsing_topics.mojom.h"
26
Yao Xiao7a1995b2022-03-09 08:18:5527namespace browsing_topics {
28
Yao Xiaocc379392022-03-25 21:39:0629namespace {
30
Yao Xiaobf39e34d2022-03-28 21:48:2831// Returns whether the topics should all be cleared given
32// `browsing_topics_data_accessible_since` and `is_topic_allowed_by_settings`.
33// Returns true if `browsing_topics_data_accessible_since` is greater than the
34// last calculation time, or if any top topic is disallowed from the settings.
35// The latter could happen if the topic became disallowed when
36// `browsing_topics_state` was still loading (and we didn't get a chance to
37// clear it). This is an unlikely edge case, so it's fine to over-delete.
38bool ShouldClearTopicsOnStartup(
Yao Xiaocc379392022-03-25 21:39:0639 const BrowsingTopicsState& browsing_topics_state,
Yao Xiaobf39e34d2022-03-28 21:48:2840 base::Time browsing_topics_data_accessible_since,
41 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
42 is_topic_allowed_by_settings) {
43 DCHECK(!is_topic_allowed_by_settings.is_null());
44
45 if (browsing_topics_state.epochs().empty())
46 return false;
47
Yao Xiaocc379392022-03-25 21:39:0648 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
49 // only be updated to base::Time::Now() due to data deletion. So we'll either
50 // need to clear all topics data, or no-op. If this assumption no longer
51 // holds, we'd need to iterate over all epochs, check their calculation time,
52 // and selectively delete the epochs.
Yao Xiaobf39e34d2022-03-28 21:48:2853 if (browsing_topics_data_accessible_since >
54 browsing_topics_state.epochs().back().calculation_time()) {
55 return true;
56 }
57
58 for (const EpochTopics& epoch : browsing_topics_state.epochs()) {
59 for (const TopicAndDomains& topic_and_domains :
60 epoch.top_topics_and_observing_domains()) {
61 if (!topic_and_domains.IsValid())
62 continue;
63
64 if (!is_topic_allowed_by_settings.Run(privacy_sandbox::CanonicalTopic(
65 topic_and_domains.topic(), epoch.taxonomy_version()))) {
66 return true;
67 }
68 }
69 }
70
71 return false;
Yao Xiaocc379392022-03-25 21:39:0672}
73
74struct StartupCalculateDecision {
75 bool clear_topics_data = true;
76 base::TimeDelta next_calculation_delay;
77};
78
79StartupCalculateDecision GetStartupCalculationDecision(
80 const BrowsingTopicsState& browsing_topics_state,
Yao Xiaobf39e34d2022-03-28 21:48:2881 base::Time browsing_topics_data_accessible_since,
82 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
83 is_topic_allowed_by_settings) {
Yao Xiaocc379392022-03-25 21:39:0684 // The topics have never been calculated. This could happen with a fresh
85 // profile or the if the config has updated. In case of a config update, the
86 // topics should have already been cleared when initializing the
87 // `BrowsingTopicsState`.
88 if (browsing_topics_state.next_scheduled_calculation_time().is_null()) {
89 return StartupCalculateDecision{
90 .clear_topics_data = false,
91 .next_calculation_delay = base::TimeDelta()};
92 }
93
94 // This could happen when clear-on-exit is turned on and has caused the
Yao Xiaobf39e34d2022-03-28 21:48:2895 // cookies to be deleted on startup, of if a topic became disallowed when
96 // `browsing_topics_state` was still loading.
97 bool should_clear_topics_data = ShouldClearTopicsOnStartup(
98 browsing_topics_state, browsing_topics_data_accessible_since,
99 is_topic_allowed_by_settings);
Yao Xiaocc379392022-03-25 21:39:06100
101 base::TimeDelta presumed_next_calculation_delay =
102 browsing_topics_state.next_scheduled_calculation_time() -
103 base::Time::Now();
104
105 // The scheduled calculation time was reached before the startup.
106 if (presumed_next_calculation_delay <= base::TimeDelta()) {
107 return StartupCalculateDecision{
108 .clear_topics_data = should_clear_topics_data,
109 .next_calculation_delay = base::TimeDelta()};
110 }
111
112 // This could happen if the machine time has changed since the last
113 // calculation. Recalculate immediately to align with the expected schedule
114 // rather than potentially stop computing for a very long time.
115 if (presumed_next_calculation_delay >=
116 2 * blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get()) {
117 return StartupCalculateDecision{
118 .clear_topics_data = should_clear_topics_data,
119 .next_calculation_delay = base::TimeDelta()};
120 }
121
122 return StartupCalculateDecision{
123 .clear_topics_data = should_clear_topics_data,
124 .next_calculation_delay = presumed_next_calculation_delay};
125}
126
Yao Xiao716e4812022-04-20 22:57:19127void RecordBrowsingTopicsApiResultUkmMetrics(
Yao Xiao84826f42022-10-24 16:13:58128 ApiAccessFailureReason failure_reason,
Yao Xiao9c789ea2022-10-26 14:46:55129 content::RenderFrameHost* main_frame,
130 bool is_get_topics_request) {
131 // The `BrowsingTopics_DocumentBrowsingTopicsApiResult2` event is only
132 // recorded for request that gets the topics.
133 if (!is_get_topics_request)
134 return;
135
Yao Xiao716e4812022-04-20 22:57:19136 ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
Yao Xiao84826f42022-10-24 16:13:58137 ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult2 builder(
Yao Xiao716e4812022-04-20 22:57:19138 main_frame->GetPageUkmSourceId());
Yao Xiao84826f42022-10-24 16:13:58139 builder.SetFailureReason(static_cast<int64_t>(failure_reason));
Yao Xiao716e4812022-04-20 22:57:19140 builder.Record(ukm_recorder->Get());
141}
142
143void RecordBrowsingTopicsApiResultUkmMetrics(
Yao Xiao84826f42022-10-24 16:13:58144 const std::vector<CandidateTopic>& valid_candidate_topics,
Yao Xiao716e4812022-04-20 22:57:19145 content::RenderFrameHost* main_frame) {
Yao Xiao716e4812022-04-20 22:57:19146 ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
Yao Xiao84826f42022-10-24 16:13:58147 ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult2 builder(
Yao Xiao716e4812022-04-20 22:57:19148 main_frame->GetPageUkmSourceId());
149
Yao Xiao84826f42022-10-24 16:13:58150 for (size_t i = 0; i < 3u && valid_candidate_topics.size() > i; ++i) {
151 const CandidateTopic& candidate_topic = valid_candidate_topics[i];
Yao Xiao716e4812022-04-20 22:57:19152
Yao Xiao84826f42022-10-24 16:13:58153 DCHECK(candidate_topic.IsValid());
Yao Xiao716e4812022-04-20 22:57:19154
155 if (i == 0) {
Yao Xiao84826f42022-10-24 16:13:58156 builder.SetCandidateTopic0(candidate_topic.topic().value())
157 .SetCandidateTopic0IsTrueTopTopic(candidate_topic.is_true_topic())
158 .SetCandidateTopic0ShouldBeFiltered(
159 candidate_topic.should_be_filtered())
160 .SetCandidateTopic0TaxonomyVersion(candidate_topic.taxonomy_version())
161 .SetCandidateTopic0ModelVersion(candidate_topic.model_version());
Yao Xiao716e4812022-04-20 22:57:19162 } else if (i == 1) {
Yao Xiao84826f42022-10-24 16:13:58163 builder.SetCandidateTopic1(candidate_topic.topic().value())
164 .SetCandidateTopic1IsTrueTopTopic(candidate_topic.is_true_topic())
165 .SetCandidateTopic1ShouldBeFiltered(
166 candidate_topic.should_be_filtered())
167 .SetCandidateTopic1TaxonomyVersion(candidate_topic.taxonomy_version())
168 .SetCandidateTopic1ModelVersion(candidate_topic.model_version());
Yao Xiao716e4812022-04-20 22:57:19169 } else {
170 DCHECK_EQ(i, 2u);
Yao Xiao84826f42022-10-24 16:13:58171 builder.SetCandidateTopic2(candidate_topic.topic().value())
172 .SetCandidateTopic2IsTrueTopTopic(candidate_topic.is_true_topic())
173 .SetCandidateTopic2ShouldBeFiltered(
174 candidate_topic.should_be_filtered())
175 .SetCandidateTopic2TaxonomyVersion(candidate_topic.taxonomy_version())
176 .SetCandidateTopic2ModelVersion(candidate_topic.model_version());
Yao Xiao716e4812022-04-20 22:57:19177 }
178 }
179
180 builder.Record(ukm_recorder->Get());
181}
182
Yao Xiaocc379392022-03-25 21:39:06183} // namespace
184
Yao Xiao7a1995b2022-03-09 08:18:55185BrowsingTopicsServiceImpl::~BrowsingTopicsServiceImpl() = default;
186
Yao Xiaocc379392022-03-25 21:39:06187BrowsingTopicsServiceImpl::BrowsingTopicsServiceImpl(
188 const base::FilePath& profile_path,
189 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
190 history::HistoryService* history_service,
191 content::BrowsingTopicsSiteDataManager* site_data_manager,
192 optimization_guide::PageContentAnnotationsService* annotations_service)
193 : privacy_sandbox_settings_(privacy_sandbox_settings),
194 history_service_(history_service),
195 site_data_manager_(site_data_manager),
196 annotations_service_(annotations_service),
197 browsing_topics_state_(
198 profile_path,
199 base::BindOnce(
200 &BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded,
201 base::Unretained(this))) {
202 privacy_sandbox_settings_observation_.Observe(privacy_sandbox_settings);
203 history_service_observation_.Observe(history_service);
204
Yao Xiaobc1241a2022-03-29 05:23:37205 // Greedily request the model to be available to reduce the latency in later
206 // topics calculation.
Yao Xiaocc379392022-03-25 21:39:06207 annotations_service_->RequestAndNotifyWhenModelAvailable(
208 optimization_guide::AnnotationType::kPageTopics, base::DoNothing());
209}
210
Yao Xiao9c789ea2022-10-26 14:46:55211bool BrowsingTopicsServiceImpl::HandleTopicsWebApi(
Yao Xiaocc379392022-03-25 21:39:06212 const url::Origin& context_origin,
Yao Xiao1d60ed32022-09-27 16:33:24213 content::RenderFrameHost* main_frame,
Yao Xiao9c789ea2022-10-26 14:46:55214 ApiCallerSource caller_source,
215 bool get_topics,
216 bool observe,
217 std::vector<blink::mojom::EpochTopicPtr>& topics) {
218 DCHECK(topics.empty());
219 DCHECK(get_topics || observe);
220
Yao Xiao716e4812022-04-20 22:57:19221 if (!browsing_topics_state_loaded_) {
222 RecordBrowsingTopicsApiResultUkmMetrics(
Yao Xiao9c789ea2022-10-26 14:46:55223 ApiAccessFailureReason::kStateNotReady, main_frame, get_topics);
224 return false;
Yao Xiao716e4812022-04-20 22:57:19225 }
Yao Xiaocc379392022-03-25 21:39:06226
Yao Xiao716e4812022-04-20 22:57:19227 if (!privacy_sandbox_settings_->IsTopicsAllowed()) {
228 RecordBrowsingTopicsApiResultUkmMetrics(
Yao Xiao9c789ea2022-10-26 14:46:55229 ApiAccessFailureReason::kAccessDisallowedBySettings, main_frame,
230 get_topics);
231 return false;
Yao Xiao716e4812022-04-20 22:57:19232 }
Yao Xiaocc379392022-03-25 21:39:06233
234 if (!privacy_sandbox_settings_->IsTopicsAllowedForContext(
235 context_origin.GetURL(), main_frame->GetLastCommittedOrigin())) {
Yao Xiao716e4812022-04-20 22:57:19236 RecordBrowsingTopicsApiResultUkmMetrics(
Yao Xiao9c789ea2022-10-26 14:46:55237 ApiAccessFailureReason::kAccessDisallowedBySettings, main_frame,
238 get_topics);
239 return false;
Yao Xiaocc379392022-03-25 21:39:06240 }
241
242 std::string context_domain =
243 net::registry_controlled_domains::GetDomainAndRegistry(
244 context_origin.GetURL(),
245 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
246
247 HashedDomain hashed_context_domain = HashContextDomainForStorage(
248 browsing_topics_state_.hmac_key(), context_domain);
249
Yao Xiao1d60ed32022-09-27 16:33:24250 if (observe) {
251 // Track the API usage context after the permissions check.
252 BrowsingTopicsPageLoadDataTracker::GetOrCreateForPage(main_frame->GetPage())
253 ->OnBrowsingTopicsApiUsed(hashed_context_domain, history_service_);
254 }
Yao Xiaocc379392022-03-25 21:39:06255
Yao Xiao9c789ea2022-10-26 14:46:55256 if (!get_topics)
257 return true;
258
Yao Xiaocc379392022-03-25 21:39:06259 std::string top_domain =
260 net::registry_controlled_domains::GetDomainAndRegistry(
261 main_frame->GetLastCommittedOrigin().GetURL(),
262 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
263
Yao Xiao84826f42022-10-24 16:13:58264 std::vector<CandidateTopic> valid_candidate_topics;
Yao Xiao716e4812022-04-20 22:57:19265
Yao Xiaocc379392022-03-25 21:39:06266 for (const EpochTopics* epoch :
267 browsing_topics_state_.EpochsForSite(top_domain)) {
Yao Xiao414312992022-10-18 20:25:11268 CandidateTopic candidate_topic = epoch->CandidateTopicForSite(
269 top_domain, hashed_context_domain, browsing_topics_state_.hmac_key());
Yao Xiao716e4812022-04-20 22:57:19270
Yao Xiao414312992022-10-18 20:25:11271 if (!candidate_topic.IsValid())
Yao Xiaocc379392022-03-25 21:39:06272 continue;
273
Yao Xiaobf39e34d2022-03-28 21:48:28274 // Although a top topic can never be in the disallowed state, the returned
Yao Xiao414312992022-10-18 20:25:11275 // `candidate_topic` may be the random one. Thus we still need this check.
Yao Xiaocc379392022-03-25 21:39:06276 if (!privacy_sandbox_settings_->IsTopicAllowed(
Yao Xiao414312992022-10-18 20:25:11277 privacy_sandbox::CanonicalTopic(
278 candidate_topic.topic(), candidate_topic.taxonomy_version()))) {
279 DCHECK(!candidate_topic.is_true_topic());
Yao Xiaobf39e34d2022-03-28 21:48:28280 continue;
Yao Xiaocc379392022-03-25 21:39:06281 }
282
Yao Xiao84826f42022-10-24 16:13:58283 valid_candidate_topics.push_back(std::move(candidate_topic));
284 }
285
286 RecordBrowsingTopicsApiResultUkmMetrics(valid_candidate_topics, main_frame);
287
Yao Xiao84826f42022-10-24 16:13:58288 for (const CandidateTopic& candidate_topic : valid_candidate_topics) {
289 if (candidate_topic.should_be_filtered())
290 continue;
291
Yao Xiao3a03e602022-10-18 18:17:56292 // `PageSpecificContentSettings` should only observe true top topics
293 // accessed on the page. It's okay to notify the same topic multiple
294 // times even though duplicate topics will be removed in the end.
Yao Xiao414312992022-10-18 20:25:11295 if (candidate_topic.is_true_topic()) {
Yao Xiao3a03e602022-10-18 18:17:56296 privacy_sandbox::CanonicalTopic canonical_topic(
Yao Xiao414312992022-10-18 20:25:11297 candidate_topic.topic(), candidate_topic.taxonomy_version());
Yao Xiao3a03e602022-10-18 18:17:56298 content_settings::PageSpecificContentSettings::TopicAccessed(
299 main_frame, context_origin, /*blocked_by_policy=*/false,
300 canonical_topic);
301 }
302
Yao Xiao21f1faa2022-04-29 06:20:32303 auto result_topic = blink::mojom::EpochTopic::New();
Yao Xiao414312992022-10-18 20:25:11304 result_topic->topic = candidate_topic.topic().value();
Yao Xiaocc379392022-03-25 21:39:06305 result_topic->config_version = base::StrCat(
306 {"chrome.", base::NumberToString(
307 blink::features::kBrowsingTopicsConfigVersion.Get())});
Yao Xiao414312992022-10-18 20:25:11308 result_topic->model_version =
309 base::NumberToString(candidate_topic.model_version());
Yao Xiaocc379392022-03-25 21:39:06310 result_topic->taxonomy_version =
Yao Xiao414312992022-10-18 20:25:11311 base::NumberToString(candidate_topic.taxonomy_version());
Yao Xiaocc379392022-03-25 21:39:06312 result_topic->version = base::StrCat({result_topic->config_version, ":",
313 result_topic->taxonomy_version, ":",
314 result_topic->model_version});
Yao Xiao9c789ea2022-10-26 14:46:55315 topics.emplace_back(std::move(result_topic));
Yao Xiaocc379392022-03-25 21:39:06316 }
317
Yao Xiao9c789ea2022-10-26 14:46:55318 std::sort(topics.begin(), topics.end());
Yao Xiao716e4812022-04-20 22:57:19319
Yao Xiao84826f42022-10-24 16:13:58320 // Remove duplicate entries.
Yao Xiao9c789ea2022-10-26 14:46:55321 topics.erase(std::unique(topics.begin(), topics.end()), topics.end());
Yao Xiaocc379392022-03-25 21:39:06322
Yao Xiao9c789ea2022-10-26 14:46:55323 return true;
Yao Xiaocc379392022-03-25 21:39:06324}
Yao Xiao7a1995b2022-03-09 08:18:55325
Yao Xiao57892a22022-06-28 19:21:41326void BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUi(
327 bool calculate_now,
328 mojom::PageHandler::GetBrowsingTopicsStateCallback callback) {
Yao Xiao21f1faa2022-04-29 06:20:32329 if (!browsing_topics_state_loaded_) {
Yao Xiao57892a22022-06-28 19:21:41330 std::move(callback).Run(
331 mojom::WebUIGetBrowsingTopicsStateResult::NewOverrideStatusMessage(
332 "State loading hasn't finished. Please retry shortly."));
333 return;
Yao Xiao21f1faa2022-04-29 06:20:32334 }
335
Yao Xiao57892a22022-06-28 19:21:41336 // If a calculation is already in progress, get the webui topics state after
337 // the calculation is done. Do this regardless of whether `calculate_now` is
338 // true, i.e. if `calculate_now` is true, this request is effectively merged
339 // with the in progress calculation.
340 if (topics_calculator_) {
341 get_state_for_webui_callbacks_.push_back(std::move(callback));
342 return;
Yao Xiao21f1faa2022-04-29 06:20:32343 }
344
Yao Xiao57892a22022-06-28 19:21:41345 DCHECK(schedule_calculate_timer_.IsRunning());
Yao Xiao21f1faa2022-04-29 06:20:32346
Yao Xiao57892a22022-06-28 19:21:41347 if (calculate_now) {
348 get_state_for_webui_callbacks_.push_back(std::move(callback));
Yao Xiao21f1faa2022-04-29 06:20:32349
Yao Xiao57892a22022-06-28 19:21:41350 schedule_calculate_timer_.AbandonAndStop();
351 CalculateBrowsingTopics();
352 return;
Yao Xiao21f1faa2022-04-29 06:20:32353 }
354
Yao Xiao57892a22022-06-28 19:21:41355 std::move(callback).Run(GetBrowsingTopicsStateForWebUiHelper());
Yao Xiao21f1faa2022-04-29 06:20:32356}
357
Yao Xiao7a1995b2022-03-09 08:18:55358std::vector<privacy_sandbox::CanonicalTopic>
Yao Xiao7a1995b2022-03-09 08:18:55359BrowsingTopicsServiceImpl::GetTopTopicsForDisplay() const {
Yao Xiaocc379392022-03-25 21:39:06360 if (!browsing_topics_state_loaded_)
361 return {};
362
363 std::vector<privacy_sandbox::CanonicalTopic> result;
364
365 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
Yao Xiaobf39e34d2022-03-28 21:48:28366 DCHECK_LE(epoch.padded_top_topics_start_index(),
367 epoch.top_topics_and_observing_domains().size());
368
369 for (size_t i = 0; i < epoch.padded_top_topics_start_index(); ++i) {
370 const TopicAndDomains& topic_and_domains =
371 epoch.top_topics_and_observing_domains()[i];
372
373 if (!topic_and_domains.IsValid())
Yao Xiaocc379392022-03-25 21:39:06374 continue;
375
Yao Xiaobf39e34d2022-03-28 21:48:28376 // A top topic can never be in the disallowed state (i.e. it will be
377 // cleared when it becomes diallowed).
378 DCHECK(privacy_sandbox_settings_->IsTopicAllowed(
379 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
380 epoch.taxonomy_version())));
381
382 result.emplace_back(topic_and_domains.topic(), epoch.taxonomy_version());
Yao Xiaocc379392022-03-25 21:39:06383 }
384 }
385
386 return result;
387}
388
Yao Xiaobf39e34d2022-03-28 21:48:28389void BrowsingTopicsServiceImpl::ClearTopic(
390 const privacy_sandbox::CanonicalTopic& canonical_topic) {
391 if (!browsing_topics_state_loaded_)
392 return;
393
394 browsing_topics_state_.ClearTopic(canonical_topic.topic_id(),
395 canonical_topic.taxonomy_version());
396}
397
398void BrowsingTopicsServiceImpl::ClearTopicsDataForOrigin(
399 const url::Origin& origin) {
400 if (!browsing_topics_state_loaded_)
401 return;
402
403 std::string context_domain =
404 net::registry_controlled_domains::GetDomainAndRegistry(
405 origin.GetURL(),
406 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
407
408 HashedDomain hashed_context_domain = HashContextDomainForStorage(
409 browsing_topics_state_.hmac_key(), context_domain);
410
411 browsing_topics_state_.ClearContextDomain(hashed_context_domain);
412 site_data_manager_->ClearContextDomain(hashed_context_domain);
413}
414
415void BrowsingTopicsServiceImpl::ClearAllTopicsData() {
416 if (!browsing_topics_state_loaded_)
417 return;
418
419 browsing_topics_state_.ClearAllTopics();
420 site_data_manager_->ExpireDataBefore(base::Time::Now());
421}
422
Yao Xiaocc379392022-03-25 21:39:06423std::unique_ptr<BrowsingTopicsCalculator>
424BrowsingTopicsServiceImpl::CreateCalculator(
425 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
426 history::HistoryService* history_service,
427 content::BrowsingTopicsSiteDataManager* site_data_manager,
428 optimization_guide::PageContentAnnotationsService* annotations_service,
Yao Xiao57892a22022-06-28 19:21:41429 const base::circular_deque<EpochTopics>& epochs,
Yao Xiaocc379392022-03-25 21:39:06430 BrowsingTopicsCalculator::CalculateCompletedCallback callback) {
431 return std::make_unique<BrowsingTopicsCalculator>(
432 privacy_sandbox_settings, history_service, site_data_manager,
Yao Xiao57892a22022-06-28 19:21:41433 annotations_service, epochs, std::move(callback));
Yao Xiaocc379392022-03-25 21:39:06434}
435
436const BrowsingTopicsState& BrowsingTopicsServiceImpl::browsing_topics_state() {
437 return browsing_topics_state_;
438}
439
440void BrowsingTopicsServiceImpl::ScheduleBrowsingTopicsCalculation(
441 base::TimeDelta delay) {
442 DCHECK(browsing_topics_state_loaded_);
443
444 // `this` owns the timer, which is automatically cancelled on destruction, so
445 // base::Unretained(this) is safe.
446 schedule_calculate_timer_.Start(
447 FROM_HERE, delay,
448 base::BindOnce(&BrowsingTopicsServiceImpl::CalculateBrowsingTopics,
449 base::Unretained(this)));
450}
451
452void BrowsingTopicsServiceImpl::CalculateBrowsingTopics() {
453 DCHECK(browsing_topics_state_loaded_);
454
455 DCHECK(!topics_calculator_);
456
457 // `this` owns `topics_calculator_` so `topics_calculator_` should not invoke
458 // the callback once it's destroyed.
459 topics_calculator_ = CreateCalculator(
460 privacy_sandbox_settings_, history_service_, site_data_manager_,
Yao Xiao57892a22022-06-28 19:21:41461 annotations_service_, browsing_topics_state_.epochs(),
Yao Xiaocc379392022-03-25 21:39:06462 base::BindOnce(
463 &BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted,
464 base::Unretained(this)));
465}
466
467void BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted(
468 EpochTopics epoch_topics) {
469 DCHECK(browsing_topics_state_loaded_);
470
471 DCHECK(topics_calculator_);
472 topics_calculator_.reset();
473
474 browsing_topics_state_.AddEpoch(std::move(epoch_topics));
475 browsing_topics_state_.UpdateNextScheduledCalculationTime();
476
477 ScheduleBrowsingTopicsCalculation(
478 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
Yao Xiao57892a22022-06-28 19:21:41479
480 if (!get_state_for_webui_callbacks_.empty()) {
481 mojom::WebUIGetBrowsingTopicsStateResultPtr webui_state =
482 GetBrowsingTopicsStateForWebUiHelper();
483
484 for (auto& callback : get_state_for_webui_callbacks_) {
485 std::move(callback).Run(webui_state->Clone());
486 }
487
488 get_state_for_webui_callbacks_.clear();
489 }
Yao Xiaocc379392022-03-25 21:39:06490}
491
492void BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded() {
493 DCHECK(!browsing_topics_state_loaded_);
494 browsing_topics_state_loaded_ = true;
495
496 base::Time browsing_topics_data_sccessible_since =
497 privacy_sandbox_settings_->TopicsDataAccessibleSince();
498
499 StartupCalculateDecision decision = GetStartupCalculationDecision(
Yao Xiaobf39e34d2022-03-28 21:48:28500 browsing_topics_state_, browsing_topics_data_sccessible_since,
501 base::BindRepeating(
502 &privacy_sandbox::PrivacySandboxSettings::IsTopicAllowed,
503 base::Unretained(privacy_sandbox_settings_)));
Yao Xiaocc379392022-03-25 21:39:06504
505 if (decision.clear_topics_data)
506 browsing_topics_state_.ClearAllTopics();
507
508 site_data_manager_->ExpireDataBefore(browsing_topics_data_sccessible_since);
509
510 ScheduleBrowsingTopicsCalculation(decision.next_calculation_delay);
511}
512
513void BrowsingTopicsServiceImpl::Shutdown() {
514 privacy_sandbox_settings_observation_.Reset();
515 history_service_observation_.Reset();
516}
517
518void BrowsingTopicsServiceImpl::OnTopicsDataAccessibleSinceUpdated() {
519 if (!browsing_topics_state_loaded_)
520 return;
521
Yao Xiaobf39e34d2022-03-28 21:48:28522 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
523 // only be updated to base::Time::Now() due to data deletion. In this case, we
524 // should just clear all topics.
525 browsing_topics_state_.ClearAllTopics();
526 site_data_manager_->ExpireDataBefore(
527 privacy_sandbox_settings_->TopicsDataAccessibleSince());
Yao Xiaocc379392022-03-25 21:39:06528
529 // Abort the outstanding topics calculation and restart immediately.
530 if (topics_calculator_) {
531 DCHECK(!schedule_calculate_timer_.IsRunning());
532
533 topics_calculator_.reset();
534 CalculateBrowsingTopics();
535 }
536}
537
538void BrowsingTopicsServiceImpl::OnURLsDeleted(
539 history::HistoryService* history_service,
540 const history::DeletionInfo& deletion_info) {
541 if (!browsing_topics_state_loaded_)
542 return;
543
544 // Ignore invalid time_range.
545 if (!deletion_info.IsAllHistory() && !deletion_info.time_range().IsValid())
546 return;
547
548 for (size_t i = 0; i < browsing_topics_state_.epochs().size(); ++i) {
549 const EpochTopics& epoch_topics = browsing_topics_state_.epochs()[i];
550
551 if (epoch_topics.empty())
552 continue;
553
Yao Xiao57892a22022-06-28 19:21:41554 // The typical case is assumed here. We cannot always derive the original
555 // history start time, as the necessary data (e.g. its previous epoch's
556 // calculation time) may have been gone.
557 base::Time history_data_start_time =
558 epoch_topics.calculation_time() -
559 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get();
560
Yao Xiaocc379392022-03-25 21:39:06561 bool time_range_overlap =
562 epoch_topics.calculation_time() >= deletion_info.time_range().begin() &&
Yao Xiao57892a22022-06-28 19:21:41563 history_data_start_time <= deletion_info.time_range().end();
Yao Xiaocc379392022-03-25 21:39:06564
565 if (time_range_overlap)
566 browsing_topics_state_.ClearOneEpoch(i);
567 }
568
569 // If there's an outstanding topics calculation, abort and restart it.
570 if (topics_calculator_) {
571 DCHECK(!schedule_calculate_timer_.IsRunning());
572
573 topics_calculator_.reset();
574 CalculateBrowsingTopics();
575 }
Yao Xiao7a1995b2022-03-09 08:18:55576}
577
Yao Xiao57892a22022-06-28 19:21:41578mojom::WebUIGetBrowsingTopicsStateResultPtr
579BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUiHelper() {
580 DCHECK(browsing_topics_state_loaded_);
581 DCHECK(!topics_calculator_);
582
583 auto webui_state = mojom::WebUIBrowsingTopicsState::New();
584
585 webui_state->next_scheduled_calculation_time =
586 browsing_topics_state_.next_scheduled_calculation_time();
587
588 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
589 DCHECK_LE(epoch.padded_top_topics_start_index(),
590 epoch.top_topics_and_observing_domains().size());
591
592 // Note: for a failed epoch calculation, the default zero-initialized values
593 // will be displayed in the Web UI.
594 auto webui_epoch = mojom::WebUIEpoch::New();
595 webui_epoch->calculation_time = epoch.calculation_time();
596 webui_epoch->model_version = base::NumberToString(epoch.model_version());
597 webui_epoch->taxonomy_version =
598 base::NumberToString(epoch.taxonomy_version());
599
600 for (size_t i = 0; i < epoch.top_topics_and_observing_domains().size();
601 ++i) {
602 const TopicAndDomains& topic_and_domains =
603 epoch.top_topics_and_observing_domains()[i];
604
605 privacy_sandbox::CanonicalTopic canonical_topic =
606 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
607 epoch.taxonomy_version());
608
609 std::vector<std::string> webui_observed_by_domains;
610 webui_observed_by_domains.reserve(
611 topic_and_domains.hashed_domains().size());
612 for (const auto& domain : topic_and_domains.hashed_domains()) {
613 webui_observed_by_domains.push_back(
614 base::NumberToString(domain.value()));
615 }
616
617 // Note: if the topic is invalid (i.e. cleared), the output `topic_id`
618 // will be 0; if the topic is invalid, or if the taxonomy version isn't
619 // recognized by this Chrome binary, the output `topic_name` will be
620 // "Unknown".
621 auto webui_topic = mojom::WebUITopic::New();
622 webui_topic->topic_id = topic_and_domains.topic().value();
623 webui_topic->topic_name = canonical_topic.GetLocalizedRepresentation();
624 webui_topic->is_real_topic = (i < epoch.padded_top_topics_start_index());
625 webui_topic->observed_by_domains = std::move(webui_observed_by_domains);
626
627 webui_epoch->topics.push_back(std::move(webui_topic));
628 }
629
630 webui_state->epochs.push_back(std::move(webui_epoch));
631 }
632
633 // Reorder the epochs from latest to oldest.
634 base::ranges::reverse(webui_state->epochs);
635
636 return mojom::WebUIGetBrowsingTopicsStateResult::NewBrowsingTopicsState(
637 std::move(webui_state));
638}
639
Yao Xiao7a1995b2022-03-09 08:18:55640} // namespace browsing_topics