blob: 9ee9edf437271659ddeab4cdc706e73d0ae986b4 [file] [log] [blame]
Avi Drissman8ba1bad2022-09-13 19:22:361// Copyright 2022 The Chromium Authors
Yao Xiao7a1995b2022-03-09 08:18:552// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/browsing_topics/browsing_topics_service_impl.h"
6
Yao Xiaocc379392022-03-25 21:39:067#include <random>
8
9#include "base/rand_util.h"
Yao Xiao57892a22022-06-28 19:21:4110#include "base/ranges/algorithm.h"
Gabriel Charetted87f10f2022-03-31 00:44:2211#include "base/time/time.h"
Yao Xiaocc379392022-03-25 21:39:0612#include "components/browsing_topics/browsing_topics_calculator.h"
13#include "components/browsing_topics/browsing_topics_page_load_data_tracker.h"
Yao Xiao21f1faa2022-04-29 06:20:3214#include "components/browsing_topics/mojom/browsing_topics_internals.mojom.h"
Yao Xiaocc379392022-03-25 21:39:0615#include "components/browsing_topics/util.h"
Yao Xiao3a03e602022-10-18 18:17:5616#include "components/content_settings/browser/page_specific_content_settings.h"
Findit3a850b6e2022-09-08 08:39:4317#include "components/optimization_guide/content/browser/page_content_annotations_service.h"
Yao Xiaocc379392022-03-25 21:39:0618#include "content/public/browser/browsing_topics_site_data_manager.h"
19#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
Yao Xiao716e4812022-04-20 22:57:1920#include "services/metrics/public/cpp/ukm_builders.h"
21#include "services/metrics/public/cpp/ukm_recorder.h"
Yao Xiaocc379392022-03-25 21:39:0622#include "third_party/blink/public/common/features.h"
23#include "third_party/blink/public/mojom/browsing_topics/browsing_topics.mojom.h"
24
Yao Xiao7a1995b2022-03-09 08:18:5525namespace browsing_topics {
26
Yao Xiaocc379392022-03-25 21:39:0627namespace {
28
Yao Xiaobf39e34d2022-03-28 21:48:2829// Returns whether the topics should all be cleared given
30// `browsing_topics_data_accessible_since` and `is_topic_allowed_by_settings`.
31// Returns true if `browsing_topics_data_accessible_since` is greater than the
32// last calculation time, or if any top topic is disallowed from the settings.
33// The latter could happen if the topic became disallowed when
34// `browsing_topics_state` was still loading (and we didn't get a chance to
35// clear it). This is an unlikely edge case, so it's fine to over-delete.
36bool ShouldClearTopicsOnStartup(
Yao Xiaocc379392022-03-25 21:39:0637 const BrowsingTopicsState& browsing_topics_state,
Yao Xiaobf39e34d2022-03-28 21:48:2838 base::Time browsing_topics_data_accessible_since,
39 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
40 is_topic_allowed_by_settings) {
41 DCHECK(!is_topic_allowed_by_settings.is_null());
42
43 if (browsing_topics_state.epochs().empty())
44 return false;
45
Yao Xiaocc379392022-03-25 21:39:0646 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
47 // only be updated to base::Time::Now() due to data deletion. So we'll either
48 // need to clear all topics data, or no-op. If this assumption no longer
49 // holds, we'd need to iterate over all epochs, check their calculation time,
50 // and selectively delete the epochs.
Yao Xiaobf39e34d2022-03-28 21:48:2851 if (browsing_topics_data_accessible_since >
52 browsing_topics_state.epochs().back().calculation_time()) {
53 return true;
54 }
55
56 for (const EpochTopics& epoch : browsing_topics_state.epochs()) {
57 for (const TopicAndDomains& topic_and_domains :
58 epoch.top_topics_and_observing_domains()) {
59 if (!topic_and_domains.IsValid())
60 continue;
61
62 if (!is_topic_allowed_by_settings.Run(privacy_sandbox::CanonicalTopic(
63 topic_and_domains.topic(), epoch.taxonomy_version()))) {
64 return true;
65 }
66 }
67 }
68
69 return false;
Yao Xiaocc379392022-03-25 21:39:0670}
71
72struct StartupCalculateDecision {
73 bool clear_topics_data = true;
74 base::TimeDelta next_calculation_delay;
75};
76
77StartupCalculateDecision GetStartupCalculationDecision(
78 const BrowsingTopicsState& browsing_topics_state,
Yao Xiaobf39e34d2022-03-28 21:48:2879 base::Time browsing_topics_data_accessible_since,
80 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
81 is_topic_allowed_by_settings) {
Yao Xiaocc379392022-03-25 21:39:0682 // The topics have never been calculated. This could happen with a fresh
83 // profile or the if the config has updated. In case of a config update, the
84 // topics should have already been cleared when initializing the
85 // `BrowsingTopicsState`.
86 if (browsing_topics_state.next_scheduled_calculation_time().is_null()) {
87 return StartupCalculateDecision{
88 .clear_topics_data = false,
89 .next_calculation_delay = base::TimeDelta()};
90 }
91
92 // This could happen when clear-on-exit is turned on and has caused the
Yao Xiaobf39e34d2022-03-28 21:48:2893 // cookies to be deleted on startup, of if a topic became disallowed when
94 // `browsing_topics_state` was still loading.
95 bool should_clear_topics_data = ShouldClearTopicsOnStartup(
96 browsing_topics_state, browsing_topics_data_accessible_since,
97 is_topic_allowed_by_settings);
Yao Xiaocc379392022-03-25 21:39:0698
99 base::TimeDelta presumed_next_calculation_delay =
100 browsing_topics_state.next_scheduled_calculation_time() -
101 base::Time::Now();
102
103 // The scheduled calculation time was reached before the startup.
104 if (presumed_next_calculation_delay <= base::TimeDelta()) {
105 return StartupCalculateDecision{
106 .clear_topics_data = should_clear_topics_data,
107 .next_calculation_delay = base::TimeDelta()};
108 }
109
110 // This could happen if the machine time has changed since the last
111 // calculation. Recalculate immediately to align with the expected schedule
112 // rather than potentially stop computing for a very long time.
113 if (presumed_next_calculation_delay >=
114 2 * blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get()) {
115 return StartupCalculateDecision{
116 .clear_topics_data = should_clear_topics_data,
117 .next_calculation_delay = base::TimeDelta()};
118 }
119
120 return StartupCalculateDecision{
121 .clear_topics_data = should_clear_topics_data,
122 .next_calculation_delay = presumed_next_calculation_delay};
123}
124
Yao Xiao716e4812022-04-20 22:57:19125// Represents the different reasons why the topics API returns an empty result.
126// These values are persisted to logs. Entries should not be renumbered and
127// numeric values should never be reused.
128enum class EmptyApiResultReason {
129 // The topics state hasn't finished loading.
130 kStateNotReady = 0,
131
132 // Access is disallowed by user settings.
133 kAccessDisallowedBySettings = 1,
134
135 // There are no candidate topics, e.g. no candidate epochs; epoch calculation
136 // failed; individual topics were cleared or blocked.
137 kNoCandicateTopics = 2,
138
139 // The candidate topics were filtered for the requesting context.
140 kCandicateTopicsFiltered = 3,
141
142 kMaxValue = kCandicateTopicsFiltered,
143};
144
145void RecordBrowsingTopicsApiResultUkmMetrics(
146 EmptyApiResultReason empty_reason,
147 content::RenderFrameHost* main_frame) {
148 ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
149 ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult builder(
150 main_frame->GetPageUkmSourceId());
151 builder.SetEmptyReason(static_cast<int64_t>(empty_reason));
152 builder.Record(ukm_recorder->Get());
153}
154
155void RecordBrowsingTopicsApiResultUkmMetrics(
156 const std::vector<std::pair<blink::mojom::EpochTopicPtr, bool>>&
157 topics_with_status,
158 content::RenderFrameHost* main_frame) {
159 DCHECK(!topics_with_status.empty());
160
161 ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
162 ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult builder(
163 main_frame->GetPageUkmSourceId());
164
165 for (size_t i = 0; i < 3u && topics_with_status.size() > i; ++i) {
166 const blink::mojom::EpochTopicPtr& topic = topics_with_status[i].first;
167 bool is_true_topic = topics_with_status[i].second;
168
169 int taxonomy_version = 0;
170 base::StringToInt(topic->taxonomy_version, &taxonomy_version);
171 DCHECK(taxonomy_version);
172
173 int64_t model_version = 0;
174 base::StringToInt64(topic->model_version, &model_version);
175 DCHECK(model_version);
176
177 if (i == 0) {
178 builder.SetReturnedTopic0(topic->topic)
179 .SetReturnedTopic0IsTrueTopTopic(is_true_topic)
180 .SetReturnedTopic0TaxonomyVersion(taxonomy_version)
181 .SetReturnedTopic0ModelVersion(model_version);
182 } else if (i == 1) {
183 builder.SetReturnedTopic1(topic->topic)
184 .SetReturnedTopic1IsTrueTopTopic(is_true_topic)
185 .SetReturnedTopic1TaxonomyVersion(taxonomy_version)
186 .SetReturnedTopic1ModelVersion(model_version);
187 } else {
188 DCHECK_EQ(i, 2u);
189 builder.SetReturnedTopic2(topic->topic)
190 .SetReturnedTopic2IsTrueTopTopic(is_true_topic)
191 .SetReturnedTopic2TaxonomyVersion(taxonomy_version)
192 .SetReturnedTopic2ModelVersion(model_version);
193 }
194 }
195
196 builder.Record(ukm_recorder->Get());
197}
198
Yao Xiaocc379392022-03-25 21:39:06199} // namespace
200
Yao Xiao7a1995b2022-03-09 08:18:55201BrowsingTopicsServiceImpl::~BrowsingTopicsServiceImpl() = default;
202
Yao Xiaocc379392022-03-25 21:39:06203BrowsingTopicsServiceImpl::BrowsingTopicsServiceImpl(
204 const base::FilePath& profile_path,
205 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
206 history::HistoryService* history_service,
207 content::BrowsingTopicsSiteDataManager* site_data_manager,
208 optimization_guide::PageContentAnnotationsService* annotations_service)
209 : privacy_sandbox_settings_(privacy_sandbox_settings),
210 history_service_(history_service),
211 site_data_manager_(site_data_manager),
212 annotations_service_(annotations_service),
213 browsing_topics_state_(
214 profile_path,
215 base::BindOnce(
216 &BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded,
217 base::Unretained(this))) {
218 privacy_sandbox_settings_observation_.Observe(privacy_sandbox_settings);
219 history_service_observation_.Observe(history_service);
220
Yao Xiaobc1241a2022-03-29 05:23:37221 // Greedily request the model to be available to reduce the latency in later
222 // topics calculation.
Yao Xiaocc379392022-03-25 21:39:06223 annotations_service_->RequestAndNotifyWhenModelAvailable(
224 optimization_guide::AnnotationType::kPageTopics, base::DoNothing());
225}
226
227std::vector<blink::mojom::EpochTopicPtr>
228BrowsingTopicsServiceImpl::GetBrowsingTopicsForJsApi(
229 const url::Origin& context_origin,
Yao Xiao1d60ed32022-09-27 16:33:24230 content::RenderFrameHost* main_frame,
231 bool observe) {
Yao Xiao716e4812022-04-20 22:57:19232 if (!browsing_topics_state_loaded_) {
233 RecordBrowsingTopicsApiResultUkmMetrics(
234 EmptyApiResultReason::kStateNotReady, main_frame);
Yao Xiaocc379392022-03-25 21:39:06235 return {};
Yao Xiao716e4812022-04-20 22:57:19236 }
Yao Xiaocc379392022-03-25 21:39:06237
Yao Xiao716e4812022-04-20 22:57:19238 if (!privacy_sandbox_settings_->IsTopicsAllowed()) {
239 RecordBrowsingTopicsApiResultUkmMetrics(
240 EmptyApiResultReason::kAccessDisallowedBySettings, main_frame);
Yao Xiaocc379392022-03-25 21:39:06241 return {};
Yao Xiao716e4812022-04-20 22:57:19242 }
Yao Xiaocc379392022-03-25 21:39:06243
244 if (!privacy_sandbox_settings_->IsTopicsAllowedForContext(
245 context_origin.GetURL(), main_frame->GetLastCommittedOrigin())) {
Yao Xiao716e4812022-04-20 22:57:19246 RecordBrowsingTopicsApiResultUkmMetrics(
247 EmptyApiResultReason::kAccessDisallowedBySettings, main_frame);
Yao Xiaocc379392022-03-25 21:39:06248 return {};
249 }
250
251 std::string context_domain =
252 net::registry_controlled_domains::GetDomainAndRegistry(
253 context_origin.GetURL(),
254 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
255
256 HashedDomain hashed_context_domain = HashContextDomainForStorage(
257 browsing_topics_state_.hmac_key(), context_domain);
258
Yao Xiao1d60ed32022-09-27 16:33:24259 if (observe) {
260 // Track the API usage context after the permissions check.
261 BrowsingTopicsPageLoadDataTracker::GetOrCreateForPage(main_frame->GetPage())
262 ->OnBrowsingTopicsApiUsed(hashed_context_domain, history_service_);
263 }
Yao Xiaocc379392022-03-25 21:39:06264
265 std::string top_domain =
266 net::registry_controlled_domains::GetDomainAndRegistry(
267 main_frame->GetLastCommittedOrigin().GetURL(),
268 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
269
Yao Xiao716e4812022-04-20 22:57:19270 bool has_filtered_topics = false;
271
272 // The result topics along with flags denoting whether they are true topics.
273 std::vector<std::pair<blink::mojom::EpochTopicPtr, bool>> topics_with_status;
274
Yao Xiaocc379392022-03-25 21:39:06275 for (const EpochTopics* epoch :
276 browsing_topics_state_.EpochsForSite(top_domain)) {
Yao Xiao716e4812022-04-20 22:57:19277 bool output_is_true_topic = false;
278 bool candidate_topic_filtered = false;
Yao Xiaocc379392022-03-25 21:39:06279 absl::optional<Topic> topic = epoch->TopicForSite(
Yao Xiao716e4812022-04-20 22:57:19280 top_domain, hashed_context_domain, browsing_topics_state_.hmac_key(),
281 output_is_true_topic, candidate_topic_filtered);
282
283 if (candidate_topic_filtered)
284 has_filtered_topics = true;
Yao Xiaocc379392022-03-25 21:39:06285
286 // Only add a non-empty topic to the result.
287 if (!topic)
288 continue;
289
Yao Xiaobf39e34d2022-03-28 21:48:28290 // Although a top topic can never be in the disallowed state, the returned
291 // `topic` may be the random one. Thus we still need this check.
Yao Xiaocc379392022-03-25 21:39:06292 if (!privacy_sandbox_settings_->IsTopicAllowed(
293 privacy_sandbox::CanonicalTopic(*topic,
294 epoch->taxonomy_version()))) {
Yao Xiaobf39e34d2022-03-28 21:48:28295 continue;
Yao Xiaocc379392022-03-25 21:39:06296 }
297
Yao Xiao3a03e602022-10-18 18:17:56298 // `PageSpecificContentSettings` should only observe true top topics
299 // accessed on the page. It's okay to notify the same topic multiple
300 // times even though duplicate topics will be removed in the end.
301 if (output_is_true_topic) {
302 privacy_sandbox::CanonicalTopic canonical_topic(
303 browsing_topics::Topic(topic.value()), epoch->taxonomy_version());
304 content_settings::PageSpecificContentSettings::TopicAccessed(
305 main_frame, context_origin, /*blocked_by_policy=*/false,
306 canonical_topic);
307 }
308
Yao Xiao21f1faa2022-04-29 06:20:32309 auto result_topic = blink::mojom::EpochTopic::New();
Yao Xiaocc379392022-03-25 21:39:06310 result_topic->topic = topic.value().value();
311 result_topic->config_version = base::StrCat(
312 {"chrome.", base::NumberToString(
313 blink::features::kBrowsingTopicsConfigVersion.Get())});
314 result_topic->model_version = base::NumberToString(epoch->model_version());
315 result_topic->taxonomy_version =
316 base::NumberToString(epoch->taxonomy_version());
317 result_topic->version = base::StrCat({result_topic->config_version, ":",
318 result_topic->taxonomy_version, ":",
319 result_topic->model_version});
Yao Xiao716e4812022-04-20 22:57:19320 topics_with_status.emplace_back(std::move(result_topic),
321 output_is_true_topic);
Yao Xiaocc379392022-03-25 21:39:06322 }
323
Yao Xiao716e4812022-04-20 22:57:19324 // Sort `topics_with_status` based on `EpochTopicPtr` first, and if the
325 // `EpochTopicPtr` parts are equal, then a true topic will be ordered before a
326 // random topic. This ensures that when we later deduplicate based on the
327 // `EpochTopicPtr` field only, the associated is-true-topic status will be
328 // true as long as there is one true topic for that topic in
329 // `topics_with_status`.
330 std::sort(topics_with_status.begin(), topics_with_status.end(),
331 [](const auto& left, const auto& right) {
332 if (left.first < right.first)
333 return true;
334 if (left.first > right.first)
335 return false;
336 return right.second < left.second;
337 });
338
339 // Remove duplicate `EpochTopicPtr` entries.
340 topics_with_status.erase(
341 std::unique(topics_with_status.begin(), topics_with_status.end(),
342 [](const auto& left, const auto& right) {
343 return left.first == right.first;
344 }),
345 topics_with_status.end());
Yao Xiaocc379392022-03-25 21:39:06346
347 // Shuffle the entries.
Yao Xiao716e4812022-04-20 22:57:19348 base::RandomShuffle(topics_with_status.begin(), topics_with_status.end());
349
350 if (topics_with_status.empty()) {
351 if (has_filtered_topics) {
352 RecordBrowsingTopicsApiResultUkmMetrics(
353 EmptyApiResultReason::kCandicateTopicsFiltered, main_frame);
354 } else {
355 RecordBrowsingTopicsApiResultUkmMetrics(
356 EmptyApiResultReason::kNoCandicateTopics, main_frame);
357 }
358 return {};
359 }
360
361 RecordBrowsingTopicsApiResultUkmMetrics(topics_with_status, main_frame);
362
363 std::vector<blink::mojom::EpochTopicPtr> result_topics;
364 result_topics.reserve(topics_with_status.size());
365 std::transform(topics_with_status.begin(), topics_with_status.end(),
366 std::back_inserter(result_topics),
367 [](auto& topic_with_status) {
368 return std::move(topic_with_status.first);
369 });
Yao Xiaocc379392022-03-25 21:39:06370
371 return result_topics;
372}
Yao Xiao7a1995b2022-03-09 08:18:55373
Yao Xiao57892a22022-06-28 19:21:41374void BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUi(
375 bool calculate_now,
376 mojom::PageHandler::GetBrowsingTopicsStateCallback callback) {
Yao Xiao21f1faa2022-04-29 06:20:32377 if (!browsing_topics_state_loaded_) {
Yao Xiao57892a22022-06-28 19:21:41378 std::move(callback).Run(
379 mojom::WebUIGetBrowsingTopicsStateResult::NewOverrideStatusMessage(
380 "State loading hasn't finished. Please retry shortly."));
381 return;
Yao Xiao21f1faa2022-04-29 06:20:32382 }
383
Yao Xiao57892a22022-06-28 19:21:41384 // If a calculation is already in progress, get the webui topics state after
385 // the calculation is done. Do this regardless of whether `calculate_now` is
386 // true, i.e. if `calculate_now` is true, this request is effectively merged
387 // with the in progress calculation.
388 if (topics_calculator_) {
389 get_state_for_webui_callbacks_.push_back(std::move(callback));
390 return;
Yao Xiao21f1faa2022-04-29 06:20:32391 }
392
Yao Xiao57892a22022-06-28 19:21:41393 DCHECK(schedule_calculate_timer_.IsRunning());
Yao Xiao21f1faa2022-04-29 06:20:32394
Yao Xiao57892a22022-06-28 19:21:41395 if (calculate_now) {
396 get_state_for_webui_callbacks_.push_back(std::move(callback));
Yao Xiao21f1faa2022-04-29 06:20:32397
Yao Xiao57892a22022-06-28 19:21:41398 schedule_calculate_timer_.AbandonAndStop();
399 CalculateBrowsingTopics();
400 return;
Yao Xiao21f1faa2022-04-29 06:20:32401 }
402
Yao Xiao57892a22022-06-28 19:21:41403 std::move(callback).Run(GetBrowsingTopicsStateForWebUiHelper());
Yao Xiao21f1faa2022-04-29 06:20:32404}
405
Yao Xiao7a1995b2022-03-09 08:18:55406std::vector<privacy_sandbox::CanonicalTopic>
Yao Xiao7a1995b2022-03-09 08:18:55407BrowsingTopicsServiceImpl::GetTopTopicsForDisplay() const {
Yao Xiaocc379392022-03-25 21:39:06408 if (!browsing_topics_state_loaded_)
409 return {};
410
411 std::vector<privacy_sandbox::CanonicalTopic> result;
412
413 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
Yao Xiaobf39e34d2022-03-28 21:48:28414 DCHECK_LE(epoch.padded_top_topics_start_index(),
415 epoch.top_topics_and_observing_domains().size());
416
417 for (size_t i = 0; i < epoch.padded_top_topics_start_index(); ++i) {
418 const TopicAndDomains& topic_and_domains =
419 epoch.top_topics_and_observing_domains()[i];
420
421 if (!topic_and_domains.IsValid())
Yao Xiaocc379392022-03-25 21:39:06422 continue;
423
Yao Xiaobf39e34d2022-03-28 21:48:28424 // A top topic can never be in the disallowed state (i.e. it will be
425 // cleared when it becomes diallowed).
426 DCHECK(privacy_sandbox_settings_->IsTopicAllowed(
427 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
428 epoch.taxonomy_version())));
429
430 result.emplace_back(topic_and_domains.topic(), epoch.taxonomy_version());
Yao Xiaocc379392022-03-25 21:39:06431 }
432 }
433
434 return result;
435}
436
Yao Xiaobf39e34d2022-03-28 21:48:28437void BrowsingTopicsServiceImpl::ClearTopic(
438 const privacy_sandbox::CanonicalTopic& canonical_topic) {
439 if (!browsing_topics_state_loaded_)
440 return;
441
442 browsing_topics_state_.ClearTopic(canonical_topic.topic_id(),
443 canonical_topic.taxonomy_version());
444}
445
446void BrowsingTopicsServiceImpl::ClearTopicsDataForOrigin(
447 const url::Origin& origin) {
448 if (!browsing_topics_state_loaded_)
449 return;
450
451 std::string context_domain =
452 net::registry_controlled_domains::GetDomainAndRegistry(
453 origin.GetURL(),
454 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
455
456 HashedDomain hashed_context_domain = HashContextDomainForStorage(
457 browsing_topics_state_.hmac_key(), context_domain);
458
459 browsing_topics_state_.ClearContextDomain(hashed_context_domain);
460 site_data_manager_->ClearContextDomain(hashed_context_domain);
461}
462
463void BrowsingTopicsServiceImpl::ClearAllTopicsData() {
464 if (!browsing_topics_state_loaded_)
465 return;
466
467 browsing_topics_state_.ClearAllTopics();
468 site_data_manager_->ExpireDataBefore(base::Time::Now());
469}
470
Yao Xiaocc379392022-03-25 21:39:06471std::unique_ptr<BrowsingTopicsCalculator>
472BrowsingTopicsServiceImpl::CreateCalculator(
473 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
474 history::HistoryService* history_service,
475 content::BrowsingTopicsSiteDataManager* site_data_manager,
476 optimization_guide::PageContentAnnotationsService* annotations_service,
Yao Xiao57892a22022-06-28 19:21:41477 const base::circular_deque<EpochTopics>& epochs,
Yao Xiaocc379392022-03-25 21:39:06478 BrowsingTopicsCalculator::CalculateCompletedCallback callback) {
479 return std::make_unique<BrowsingTopicsCalculator>(
480 privacy_sandbox_settings, history_service, site_data_manager,
Yao Xiao57892a22022-06-28 19:21:41481 annotations_service, epochs, std::move(callback));
Yao Xiaocc379392022-03-25 21:39:06482}
483
484const BrowsingTopicsState& BrowsingTopicsServiceImpl::browsing_topics_state() {
485 return browsing_topics_state_;
486}
487
488void BrowsingTopicsServiceImpl::ScheduleBrowsingTopicsCalculation(
489 base::TimeDelta delay) {
490 DCHECK(browsing_topics_state_loaded_);
491
492 // `this` owns the timer, which is automatically cancelled on destruction, so
493 // base::Unretained(this) is safe.
494 schedule_calculate_timer_.Start(
495 FROM_HERE, delay,
496 base::BindOnce(&BrowsingTopicsServiceImpl::CalculateBrowsingTopics,
497 base::Unretained(this)));
498}
499
500void BrowsingTopicsServiceImpl::CalculateBrowsingTopics() {
501 DCHECK(browsing_topics_state_loaded_);
502
503 DCHECK(!topics_calculator_);
504
505 // `this` owns `topics_calculator_` so `topics_calculator_` should not invoke
506 // the callback once it's destroyed.
507 topics_calculator_ = CreateCalculator(
508 privacy_sandbox_settings_, history_service_, site_data_manager_,
Yao Xiao57892a22022-06-28 19:21:41509 annotations_service_, browsing_topics_state_.epochs(),
Yao Xiaocc379392022-03-25 21:39:06510 base::BindOnce(
511 &BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted,
512 base::Unretained(this)));
513}
514
515void BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted(
516 EpochTopics epoch_topics) {
517 DCHECK(browsing_topics_state_loaded_);
518
519 DCHECK(topics_calculator_);
520 topics_calculator_.reset();
521
522 browsing_topics_state_.AddEpoch(std::move(epoch_topics));
523 browsing_topics_state_.UpdateNextScheduledCalculationTime();
524
525 ScheduleBrowsingTopicsCalculation(
526 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
Yao Xiao57892a22022-06-28 19:21:41527
528 if (!get_state_for_webui_callbacks_.empty()) {
529 mojom::WebUIGetBrowsingTopicsStateResultPtr webui_state =
530 GetBrowsingTopicsStateForWebUiHelper();
531
532 for (auto& callback : get_state_for_webui_callbacks_) {
533 std::move(callback).Run(webui_state->Clone());
534 }
535
536 get_state_for_webui_callbacks_.clear();
537 }
Yao Xiaocc379392022-03-25 21:39:06538}
539
540void BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded() {
541 DCHECK(!browsing_topics_state_loaded_);
542 browsing_topics_state_loaded_ = true;
543
544 base::Time browsing_topics_data_sccessible_since =
545 privacy_sandbox_settings_->TopicsDataAccessibleSince();
546
547 StartupCalculateDecision decision = GetStartupCalculationDecision(
Yao Xiaobf39e34d2022-03-28 21:48:28548 browsing_topics_state_, browsing_topics_data_sccessible_since,
549 base::BindRepeating(
550 &privacy_sandbox::PrivacySandboxSettings::IsTopicAllowed,
551 base::Unretained(privacy_sandbox_settings_)));
Yao Xiaocc379392022-03-25 21:39:06552
553 if (decision.clear_topics_data)
554 browsing_topics_state_.ClearAllTopics();
555
556 site_data_manager_->ExpireDataBefore(browsing_topics_data_sccessible_since);
557
558 ScheduleBrowsingTopicsCalculation(decision.next_calculation_delay);
559}
560
561void BrowsingTopicsServiceImpl::Shutdown() {
562 privacy_sandbox_settings_observation_.Reset();
563 history_service_observation_.Reset();
564}
565
566void BrowsingTopicsServiceImpl::OnTopicsDataAccessibleSinceUpdated() {
567 if (!browsing_topics_state_loaded_)
568 return;
569
Yao Xiaobf39e34d2022-03-28 21:48:28570 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
571 // only be updated to base::Time::Now() due to data deletion. In this case, we
572 // should just clear all topics.
573 browsing_topics_state_.ClearAllTopics();
574 site_data_manager_->ExpireDataBefore(
575 privacy_sandbox_settings_->TopicsDataAccessibleSince());
Yao Xiaocc379392022-03-25 21:39:06576
577 // Abort the outstanding topics calculation and restart immediately.
578 if (topics_calculator_) {
579 DCHECK(!schedule_calculate_timer_.IsRunning());
580
581 topics_calculator_.reset();
582 CalculateBrowsingTopics();
583 }
584}
585
586void BrowsingTopicsServiceImpl::OnURLsDeleted(
587 history::HistoryService* history_service,
588 const history::DeletionInfo& deletion_info) {
589 if (!browsing_topics_state_loaded_)
590 return;
591
592 // Ignore invalid time_range.
593 if (!deletion_info.IsAllHistory() && !deletion_info.time_range().IsValid())
594 return;
595
596 for (size_t i = 0; i < browsing_topics_state_.epochs().size(); ++i) {
597 const EpochTopics& epoch_topics = browsing_topics_state_.epochs()[i];
598
599 if (epoch_topics.empty())
600 continue;
601
Yao Xiao57892a22022-06-28 19:21:41602 // The typical case is assumed here. We cannot always derive the original
603 // history start time, as the necessary data (e.g. its previous epoch's
604 // calculation time) may have been gone.
605 base::Time history_data_start_time =
606 epoch_topics.calculation_time() -
607 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get();
608
Yao Xiaocc379392022-03-25 21:39:06609 bool time_range_overlap =
610 epoch_topics.calculation_time() >= deletion_info.time_range().begin() &&
Yao Xiao57892a22022-06-28 19:21:41611 history_data_start_time <= deletion_info.time_range().end();
Yao Xiaocc379392022-03-25 21:39:06612
613 if (time_range_overlap)
614 browsing_topics_state_.ClearOneEpoch(i);
615 }
616
617 // If there's an outstanding topics calculation, abort and restart it.
618 if (topics_calculator_) {
619 DCHECK(!schedule_calculate_timer_.IsRunning());
620
621 topics_calculator_.reset();
622 CalculateBrowsingTopics();
623 }
Yao Xiao7a1995b2022-03-09 08:18:55624}
625
Yao Xiao57892a22022-06-28 19:21:41626mojom::WebUIGetBrowsingTopicsStateResultPtr
627BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUiHelper() {
628 DCHECK(browsing_topics_state_loaded_);
629 DCHECK(!topics_calculator_);
630
631 auto webui_state = mojom::WebUIBrowsingTopicsState::New();
632
633 webui_state->next_scheduled_calculation_time =
634 browsing_topics_state_.next_scheduled_calculation_time();
635
636 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
637 DCHECK_LE(epoch.padded_top_topics_start_index(),
638 epoch.top_topics_and_observing_domains().size());
639
640 // Note: for a failed epoch calculation, the default zero-initialized values
641 // will be displayed in the Web UI.
642 auto webui_epoch = mojom::WebUIEpoch::New();
643 webui_epoch->calculation_time = epoch.calculation_time();
644 webui_epoch->model_version = base::NumberToString(epoch.model_version());
645 webui_epoch->taxonomy_version =
646 base::NumberToString(epoch.taxonomy_version());
647
648 for (size_t i = 0; i < epoch.top_topics_and_observing_domains().size();
649 ++i) {
650 const TopicAndDomains& topic_and_domains =
651 epoch.top_topics_and_observing_domains()[i];
652
653 privacy_sandbox::CanonicalTopic canonical_topic =
654 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
655 epoch.taxonomy_version());
656
657 std::vector<std::string> webui_observed_by_domains;
658 webui_observed_by_domains.reserve(
659 topic_and_domains.hashed_domains().size());
660 for (const auto& domain : topic_and_domains.hashed_domains()) {
661 webui_observed_by_domains.push_back(
662 base::NumberToString(domain.value()));
663 }
664
665 // Note: if the topic is invalid (i.e. cleared), the output `topic_id`
666 // will be 0; if the topic is invalid, or if the taxonomy version isn't
667 // recognized by this Chrome binary, the output `topic_name` will be
668 // "Unknown".
669 auto webui_topic = mojom::WebUITopic::New();
670 webui_topic->topic_id = topic_and_domains.topic().value();
671 webui_topic->topic_name = canonical_topic.GetLocalizedRepresentation();
672 webui_topic->is_real_topic = (i < epoch.padded_top_topics_start_index());
673 webui_topic->observed_by_domains = std::move(webui_observed_by_domains);
674
675 webui_epoch->topics.push_back(std::move(webui_topic));
676 }
677
678 webui_state->epochs.push_back(std::move(webui_epoch));
679 }
680
681 // Reorder the epochs from latest to oldest.
682 base::ranges::reverse(webui_state->epochs);
683
684 return mojom::WebUIGetBrowsingTopicsStateResult::NewBrowsingTopicsState(
685 std::move(webui_state));
686}
687
Yao Xiao7a1995b2022-03-09 08:18:55688} // namespace browsing_topics