blob: d9eac97705d6ad170cc857614e07812eb59f8d73 [file] [log] [blame]
Yao Xiao7a1995b2022-03-09 08:18:551// Copyright 2022 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/browsing_topics/browsing_topics_service_impl.h"
6
Yao Xiaocc379392022-03-25 21:39:067#include <random>
8
9#include "base/rand_util.h"
Yao Xiao57892a22022-06-28 19:21:4110#include "base/ranges/algorithm.h"
Gabriel Charetted87f10f2022-03-31 00:44:2211#include "base/time/time.h"
Yao Xiaocc379392022-03-25 21:39:0612#include "components/browsing_topics/browsing_topics_calculator.h"
13#include "components/browsing_topics/browsing_topics_page_load_data_tracker.h"
Yao Xiao21f1faa2022-04-29 06:20:3214#include "components/browsing_topics/mojom/browsing_topics_internals.mojom.h"
Yao Xiaocc379392022-03-25 21:39:0615#include "components/browsing_topics/util.h"
16#include "components/optimization_guide/content/browser/page_content_annotations_service.h"
17#include "content/public/browser/browsing_topics_site_data_manager.h"
18#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
Yao Xiao716e4812022-04-20 22:57:1919#include "services/metrics/public/cpp/ukm_builders.h"
20#include "services/metrics/public/cpp/ukm_recorder.h"
Yao Xiaocc379392022-03-25 21:39:0621#include "third_party/blink/public/common/features.h"
22#include "third_party/blink/public/mojom/browsing_topics/browsing_topics.mojom.h"
23
Yao Xiao7a1995b2022-03-09 08:18:5524namespace browsing_topics {
25
Yao Xiaocc379392022-03-25 21:39:0626namespace {
27
Yao Xiaobf39e34d2022-03-28 21:48:2828// Returns whether the topics should all be cleared given
29// `browsing_topics_data_accessible_since` and `is_topic_allowed_by_settings`.
30// Returns true if `browsing_topics_data_accessible_since` is greater than the
31// last calculation time, or if any top topic is disallowed from the settings.
32// The latter could happen if the topic became disallowed when
33// `browsing_topics_state` was still loading (and we didn't get a chance to
34// clear it). This is an unlikely edge case, so it's fine to over-delete.
35bool ShouldClearTopicsOnStartup(
Yao Xiaocc379392022-03-25 21:39:0636 const BrowsingTopicsState& browsing_topics_state,
Yao Xiaobf39e34d2022-03-28 21:48:2837 base::Time browsing_topics_data_accessible_since,
38 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
39 is_topic_allowed_by_settings) {
40 DCHECK(!is_topic_allowed_by_settings.is_null());
41
42 if (browsing_topics_state.epochs().empty())
43 return false;
44
Yao Xiaocc379392022-03-25 21:39:0645 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
46 // only be updated to base::Time::Now() due to data deletion. So we'll either
47 // need to clear all topics data, or no-op. If this assumption no longer
48 // holds, we'd need to iterate over all epochs, check their calculation time,
49 // and selectively delete the epochs.
Yao Xiaobf39e34d2022-03-28 21:48:2850 if (browsing_topics_data_accessible_since >
51 browsing_topics_state.epochs().back().calculation_time()) {
52 return true;
53 }
54
55 for (const EpochTopics& epoch : browsing_topics_state.epochs()) {
56 for (const TopicAndDomains& topic_and_domains :
57 epoch.top_topics_and_observing_domains()) {
58 if (!topic_and_domains.IsValid())
59 continue;
60
61 if (!is_topic_allowed_by_settings.Run(privacy_sandbox::CanonicalTopic(
62 topic_and_domains.topic(), epoch.taxonomy_version()))) {
63 return true;
64 }
65 }
66 }
67
68 return false;
Yao Xiaocc379392022-03-25 21:39:0669}
70
71struct StartupCalculateDecision {
72 bool clear_topics_data = true;
73 base::TimeDelta next_calculation_delay;
74};
75
76StartupCalculateDecision GetStartupCalculationDecision(
77 const BrowsingTopicsState& browsing_topics_state,
Yao Xiaobf39e34d2022-03-28 21:48:2878 base::Time browsing_topics_data_accessible_since,
79 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
80 is_topic_allowed_by_settings) {
Yao Xiaocc379392022-03-25 21:39:0681 // The topics have never been calculated. This could happen with a fresh
82 // profile or the if the config has updated. In case of a config update, the
83 // topics should have already been cleared when initializing the
84 // `BrowsingTopicsState`.
85 if (browsing_topics_state.next_scheduled_calculation_time().is_null()) {
86 return StartupCalculateDecision{
87 .clear_topics_data = false,
88 .next_calculation_delay = base::TimeDelta()};
89 }
90
91 // This could happen when clear-on-exit is turned on and has caused the
Yao Xiaobf39e34d2022-03-28 21:48:2892 // cookies to be deleted on startup, of if a topic became disallowed when
93 // `browsing_topics_state` was still loading.
94 bool should_clear_topics_data = ShouldClearTopicsOnStartup(
95 browsing_topics_state, browsing_topics_data_accessible_since,
96 is_topic_allowed_by_settings);
Yao Xiaocc379392022-03-25 21:39:0697
98 base::TimeDelta presumed_next_calculation_delay =
99 browsing_topics_state.next_scheduled_calculation_time() -
100 base::Time::Now();
101
102 // The scheduled calculation time was reached before the startup.
103 if (presumed_next_calculation_delay <= base::TimeDelta()) {
104 return StartupCalculateDecision{
105 .clear_topics_data = should_clear_topics_data,
106 .next_calculation_delay = base::TimeDelta()};
107 }
108
109 // This could happen if the machine time has changed since the last
110 // calculation. Recalculate immediately to align with the expected schedule
111 // rather than potentially stop computing for a very long time.
112 if (presumed_next_calculation_delay >=
113 2 * blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get()) {
114 return StartupCalculateDecision{
115 .clear_topics_data = should_clear_topics_data,
116 .next_calculation_delay = base::TimeDelta()};
117 }
118
119 return StartupCalculateDecision{
120 .clear_topics_data = should_clear_topics_data,
121 .next_calculation_delay = presumed_next_calculation_delay};
122}
123
Yao Xiao716e4812022-04-20 22:57:19124// Represents the different reasons why the topics API returns an empty result.
125// These values are persisted to logs. Entries should not be renumbered and
126// numeric values should never be reused.
127enum class EmptyApiResultReason {
128 // The topics state hasn't finished loading.
129 kStateNotReady = 0,
130
131 // Access is disallowed by user settings.
132 kAccessDisallowedBySettings = 1,
133
134 // There are no candidate topics, e.g. no candidate epochs; epoch calculation
135 // failed; individual topics were cleared or blocked.
136 kNoCandicateTopics = 2,
137
138 // The candidate topics were filtered for the requesting context.
139 kCandicateTopicsFiltered = 3,
140
141 kMaxValue = kCandicateTopicsFiltered,
142};
143
144void RecordBrowsingTopicsApiResultUkmMetrics(
145 EmptyApiResultReason empty_reason,
146 content::RenderFrameHost* main_frame) {
147 ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
148 ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult builder(
149 main_frame->GetPageUkmSourceId());
150 builder.SetEmptyReason(static_cast<int64_t>(empty_reason));
151 builder.Record(ukm_recorder->Get());
152}
153
154void RecordBrowsingTopicsApiResultUkmMetrics(
155 const std::vector<std::pair<blink::mojom::EpochTopicPtr, bool>>&
156 topics_with_status,
157 content::RenderFrameHost* main_frame) {
158 DCHECK(!topics_with_status.empty());
159
160 ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
161 ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult builder(
162 main_frame->GetPageUkmSourceId());
163
164 for (size_t i = 0; i < 3u && topics_with_status.size() > i; ++i) {
165 const blink::mojom::EpochTopicPtr& topic = topics_with_status[i].first;
166 bool is_true_topic = topics_with_status[i].second;
167
168 int taxonomy_version = 0;
169 base::StringToInt(topic->taxonomy_version, &taxonomy_version);
170 DCHECK(taxonomy_version);
171
172 int64_t model_version = 0;
173 base::StringToInt64(topic->model_version, &model_version);
174 DCHECK(model_version);
175
176 if (i == 0) {
177 builder.SetReturnedTopic0(topic->topic)
178 .SetReturnedTopic0IsTrueTopTopic(is_true_topic)
179 .SetReturnedTopic0TaxonomyVersion(taxonomy_version)
180 .SetReturnedTopic0ModelVersion(model_version);
181 } else if (i == 1) {
182 builder.SetReturnedTopic1(topic->topic)
183 .SetReturnedTopic1IsTrueTopTopic(is_true_topic)
184 .SetReturnedTopic1TaxonomyVersion(taxonomy_version)
185 .SetReturnedTopic1ModelVersion(model_version);
186 } else {
187 DCHECK_EQ(i, 2u);
188 builder.SetReturnedTopic2(topic->topic)
189 .SetReturnedTopic2IsTrueTopTopic(is_true_topic)
190 .SetReturnedTopic2TaxonomyVersion(taxonomy_version)
191 .SetReturnedTopic2ModelVersion(model_version);
192 }
193 }
194
195 builder.Record(ukm_recorder->Get());
196}
197
Yao Xiaocc379392022-03-25 21:39:06198} // namespace
199
Yao Xiao7a1995b2022-03-09 08:18:55200BrowsingTopicsServiceImpl::~BrowsingTopicsServiceImpl() = default;
201
Yao Xiaocc379392022-03-25 21:39:06202BrowsingTopicsServiceImpl::BrowsingTopicsServiceImpl(
203 const base::FilePath& profile_path,
204 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
205 history::HistoryService* history_service,
206 content::BrowsingTopicsSiteDataManager* site_data_manager,
207 optimization_guide::PageContentAnnotationsService* annotations_service)
208 : privacy_sandbox_settings_(privacy_sandbox_settings),
209 history_service_(history_service),
210 site_data_manager_(site_data_manager),
211 annotations_service_(annotations_service),
212 browsing_topics_state_(
213 profile_path,
214 base::BindOnce(
215 &BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded,
216 base::Unretained(this))) {
217 privacy_sandbox_settings_observation_.Observe(privacy_sandbox_settings);
218 history_service_observation_.Observe(history_service);
219
Yao Xiaobc1241a2022-03-29 05:23:37220 // Greedily request the model to be available to reduce the latency in later
221 // topics calculation.
Yao Xiaocc379392022-03-25 21:39:06222 annotations_service_->RequestAndNotifyWhenModelAvailable(
223 optimization_guide::AnnotationType::kPageTopics, base::DoNothing());
224}
225
226std::vector<blink::mojom::EpochTopicPtr>
227BrowsingTopicsServiceImpl::GetBrowsingTopicsForJsApi(
228 const url::Origin& context_origin,
229 content::RenderFrameHost* main_frame) {
Yao Xiao716e4812022-04-20 22:57:19230 if (!browsing_topics_state_loaded_) {
231 RecordBrowsingTopicsApiResultUkmMetrics(
232 EmptyApiResultReason::kStateNotReady, main_frame);
Yao Xiaocc379392022-03-25 21:39:06233 return {};
Yao Xiao716e4812022-04-20 22:57:19234 }
Yao Xiaocc379392022-03-25 21:39:06235
Yao Xiao716e4812022-04-20 22:57:19236 if (!privacy_sandbox_settings_->IsTopicsAllowed()) {
237 RecordBrowsingTopicsApiResultUkmMetrics(
238 EmptyApiResultReason::kAccessDisallowedBySettings, main_frame);
Yao Xiaocc379392022-03-25 21:39:06239 return {};
Yao Xiao716e4812022-04-20 22:57:19240 }
Yao Xiaocc379392022-03-25 21:39:06241
242 if (!privacy_sandbox_settings_->IsTopicsAllowedForContext(
243 context_origin.GetURL(), main_frame->GetLastCommittedOrigin())) {
Yao Xiao716e4812022-04-20 22:57:19244 RecordBrowsingTopicsApiResultUkmMetrics(
245 EmptyApiResultReason::kAccessDisallowedBySettings, main_frame);
Yao Xiaocc379392022-03-25 21:39:06246 return {};
247 }
248
249 std::string context_domain =
250 net::registry_controlled_domains::GetDomainAndRegistry(
251 context_origin.GetURL(),
252 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
253
254 HashedDomain hashed_context_domain = HashContextDomainForStorage(
255 browsing_topics_state_.hmac_key(), context_domain);
256
257 // Track the API usage context after the permissions check.
258 BrowsingTopicsPageLoadDataTracker::GetOrCreateForPage(main_frame->GetPage())
259 ->OnBrowsingTopicsApiUsed(hashed_context_domain, history_service_);
260
261 std::string top_domain =
262 net::registry_controlled_domains::GetDomainAndRegistry(
263 main_frame->GetLastCommittedOrigin().GetURL(),
264 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
265
Yao Xiao716e4812022-04-20 22:57:19266 bool has_filtered_topics = false;
267
268 // The result topics along with flags denoting whether they are true topics.
269 std::vector<std::pair<blink::mojom::EpochTopicPtr, bool>> topics_with_status;
270
Yao Xiaocc379392022-03-25 21:39:06271 for (const EpochTopics* epoch :
272 browsing_topics_state_.EpochsForSite(top_domain)) {
Yao Xiao716e4812022-04-20 22:57:19273 bool output_is_true_topic = false;
274 bool candidate_topic_filtered = false;
Yao Xiaocc379392022-03-25 21:39:06275 absl::optional<Topic> topic = epoch->TopicForSite(
Yao Xiao716e4812022-04-20 22:57:19276 top_domain, hashed_context_domain, browsing_topics_state_.hmac_key(),
277 output_is_true_topic, candidate_topic_filtered);
278
279 if (candidate_topic_filtered)
280 has_filtered_topics = true;
Yao Xiaocc379392022-03-25 21:39:06281
282 // Only add a non-empty topic to the result.
283 if (!topic)
284 continue;
285
Yao Xiaobf39e34d2022-03-28 21:48:28286 // Although a top topic can never be in the disallowed state, the returned
287 // `topic` may be the random one. Thus we still need this check.
Yao Xiaocc379392022-03-25 21:39:06288 if (!privacy_sandbox_settings_->IsTopicAllowed(
289 privacy_sandbox::CanonicalTopic(*topic,
290 epoch->taxonomy_version()))) {
Yao Xiaobf39e34d2022-03-28 21:48:28291 continue;
Yao Xiaocc379392022-03-25 21:39:06292 }
293
Yao Xiao21f1faa2022-04-29 06:20:32294 auto result_topic = blink::mojom::EpochTopic::New();
Yao Xiaocc379392022-03-25 21:39:06295 result_topic->topic = topic.value().value();
296 result_topic->config_version = base::StrCat(
297 {"chrome.", base::NumberToString(
298 blink::features::kBrowsingTopicsConfigVersion.Get())});
299 result_topic->model_version = base::NumberToString(epoch->model_version());
300 result_topic->taxonomy_version =
301 base::NumberToString(epoch->taxonomy_version());
302 result_topic->version = base::StrCat({result_topic->config_version, ":",
303 result_topic->taxonomy_version, ":",
304 result_topic->model_version});
Yao Xiao716e4812022-04-20 22:57:19305 topics_with_status.emplace_back(std::move(result_topic),
306 output_is_true_topic);
Yao Xiaocc379392022-03-25 21:39:06307 }
308
Yao Xiao716e4812022-04-20 22:57:19309 // Sort `topics_with_status` based on `EpochTopicPtr` first, and if the
310 // `EpochTopicPtr` parts are equal, then a true topic will be ordered before a
311 // random topic. This ensures that when we later deduplicate based on the
312 // `EpochTopicPtr` field only, the associated is-true-topic status will be
313 // true as long as there is one true topic for that topic in
314 // `topics_with_status`.
315 std::sort(topics_with_status.begin(), topics_with_status.end(),
316 [](const auto& left, const auto& right) {
317 if (left.first < right.first)
318 return true;
319 if (left.first > right.first)
320 return false;
321 return right.second < left.second;
322 });
323
324 // Remove duplicate `EpochTopicPtr` entries.
325 topics_with_status.erase(
326 std::unique(topics_with_status.begin(), topics_with_status.end(),
327 [](const auto& left, const auto& right) {
328 return left.first == right.first;
329 }),
330 topics_with_status.end());
Yao Xiaocc379392022-03-25 21:39:06331
332 // Shuffle the entries.
Yao Xiao716e4812022-04-20 22:57:19333 base::RandomShuffle(topics_with_status.begin(), topics_with_status.end());
334
335 if (topics_with_status.empty()) {
336 if (has_filtered_topics) {
337 RecordBrowsingTopicsApiResultUkmMetrics(
338 EmptyApiResultReason::kCandicateTopicsFiltered, main_frame);
339 } else {
340 RecordBrowsingTopicsApiResultUkmMetrics(
341 EmptyApiResultReason::kNoCandicateTopics, main_frame);
342 }
343 return {};
344 }
345
346 RecordBrowsingTopicsApiResultUkmMetrics(topics_with_status, main_frame);
347
348 std::vector<blink::mojom::EpochTopicPtr> result_topics;
349 result_topics.reserve(topics_with_status.size());
350 std::transform(topics_with_status.begin(), topics_with_status.end(),
351 std::back_inserter(result_topics),
352 [](auto& topic_with_status) {
353 return std::move(topic_with_status.first);
354 });
Yao Xiaocc379392022-03-25 21:39:06355
356 return result_topics;
357}
Yao Xiao7a1995b2022-03-09 08:18:55358
Yao Xiao57892a22022-06-28 19:21:41359void BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUi(
360 bool calculate_now,
361 mojom::PageHandler::GetBrowsingTopicsStateCallback callback) {
Yao Xiao21f1faa2022-04-29 06:20:32362 if (!browsing_topics_state_loaded_) {
Yao Xiao57892a22022-06-28 19:21:41363 std::move(callback).Run(
364 mojom::WebUIGetBrowsingTopicsStateResult::NewOverrideStatusMessage(
365 "State loading hasn't finished. Please retry shortly."));
366 return;
Yao Xiao21f1faa2022-04-29 06:20:32367 }
368
Yao Xiao57892a22022-06-28 19:21:41369 // If a calculation is already in progress, get the webui topics state after
370 // the calculation is done. Do this regardless of whether `calculate_now` is
371 // true, i.e. if `calculate_now` is true, this request is effectively merged
372 // with the in progress calculation.
373 if (topics_calculator_) {
374 get_state_for_webui_callbacks_.push_back(std::move(callback));
375 return;
Yao Xiao21f1faa2022-04-29 06:20:32376 }
377
Yao Xiao57892a22022-06-28 19:21:41378 DCHECK(schedule_calculate_timer_.IsRunning());
Yao Xiao21f1faa2022-04-29 06:20:32379
Yao Xiao57892a22022-06-28 19:21:41380 if (calculate_now) {
381 get_state_for_webui_callbacks_.push_back(std::move(callback));
Yao Xiao21f1faa2022-04-29 06:20:32382
Yao Xiao57892a22022-06-28 19:21:41383 schedule_calculate_timer_.AbandonAndStop();
384 CalculateBrowsingTopics();
385 return;
Yao Xiao21f1faa2022-04-29 06:20:32386 }
387
Yao Xiao57892a22022-06-28 19:21:41388 std::move(callback).Run(GetBrowsingTopicsStateForWebUiHelper());
Yao Xiao21f1faa2022-04-29 06:20:32389}
390
Yao Xiao7a1995b2022-03-09 08:18:55391std::vector<privacy_sandbox::CanonicalTopic>
392BrowsingTopicsServiceImpl::GetTopicsForSiteForDisplay(
393 const url::Origin& top_origin) const {
Yao Xiaocc379392022-03-25 21:39:06394 if (!browsing_topics_state_loaded_)
395 return {};
396
397 std::string top_domain =
398 net::registry_controlled_domains::GetDomainAndRegistry(
399 top_origin.GetURL(),
400 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
401
402 std::vector<privacy_sandbox::CanonicalTopic> result;
403
404 for (const EpochTopics* epoch :
405 browsing_topics_state_.EpochsForSite(top_domain)) {
Yao Xiaobf39e34d2022-03-28 21:48:28406 absl::optional<Topic> topic = epoch->TopicForSiteForDisplay(
Yao Xiaocc379392022-03-25 21:39:06407 top_domain, browsing_topics_state_.hmac_key());
408
409 if (!topic)
410 continue;
411
Yao Xiaobf39e34d2022-03-28 21:48:28412 // `epoch->TopicForSiteForDisplay()` shall only return a top topic, and a
413 // top topic can never be in the disallowed state (i.e. it will be cleared
414 // when it becomes diallowed).
415 DCHECK(privacy_sandbox_settings_->IsTopicAllowed(
416 privacy_sandbox::CanonicalTopic(*topic, epoch->taxonomy_version())));
417
Yao Xiaocc379392022-03-25 21:39:06418 result.emplace_back(*topic, epoch->taxonomy_version());
419 }
420
421 return result;
Yao Xiao7a1995b2022-03-09 08:18:55422}
423
424std::vector<privacy_sandbox::CanonicalTopic>
425BrowsingTopicsServiceImpl::GetTopTopicsForDisplay() const {
Yao Xiaocc379392022-03-25 21:39:06426 if (!browsing_topics_state_loaded_)
427 return {};
428
429 std::vector<privacy_sandbox::CanonicalTopic> result;
430
431 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
Yao Xiaobf39e34d2022-03-28 21:48:28432 DCHECK_LE(epoch.padded_top_topics_start_index(),
433 epoch.top_topics_and_observing_domains().size());
434
435 for (size_t i = 0; i < epoch.padded_top_topics_start_index(); ++i) {
436 const TopicAndDomains& topic_and_domains =
437 epoch.top_topics_and_observing_domains()[i];
438
439 if (!topic_and_domains.IsValid())
Yao Xiaocc379392022-03-25 21:39:06440 continue;
441
Yao Xiaobf39e34d2022-03-28 21:48:28442 // A top topic can never be in the disallowed state (i.e. it will be
443 // cleared when it becomes diallowed).
444 DCHECK(privacy_sandbox_settings_->IsTopicAllowed(
445 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
446 epoch.taxonomy_version())));
447
448 result.emplace_back(topic_and_domains.topic(), epoch.taxonomy_version());
Yao Xiaocc379392022-03-25 21:39:06449 }
450 }
451
452 return result;
453}
454
Yao Xiaobf39e34d2022-03-28 21:48:28455void BrowsingTopicsServiceImpl::ClearTopic(
456 const privacy_sandbox::CanonicalTopic& canonical_topic) {
457 if (!browsing_topics_state_loaded_)
458 return;
459
460 browsing_topics_state_.ClearTopic(canonical_topic.topic_id(),
461 canonical_topic.taxonomy_version());
462}
463
464void BrowsingTopicsServiceImpl::ClearTopicsDataForOrigin(
465 const url::Origin& origin) {
466 if (!browsing_topics_state_loaded_)
467 return;
468
469 std::string context_domain =
470 net::registry_controlled_domains::GetDomainAndRegistry(
471 origin.GetURL(),
472 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
473
474 HashedDomain hashed_context_domain = HashContextDomainForStorage(
475 browsing_topics_state_.hmac_key(), context_domain);
476
477 browsing_topics_state_.ClearContextDomain(hashed_context_domain);
478 site_data_manager_->ClearContextDomain(hashed_context_domain);
479}
480
481void BrowsingTopicsServiceImpl::ClearAllTopicsData() {
482 if (!browsing_topics_state_loaded_)
483 return;
484
485 browsing_topics_state_.ClearAllTopics();
486 site_data_manager_->ExpireDataBefore(base::Time::Now());
487}
488
Yao Xiaocc379392022-03-25 21:39:06489std::unique_ptr<BrowsingTopicsCalculator>
490BrowsingTopicsServiceImpl::CreateCalculator(
491 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
492 history::HistoryService* history_service,
493 content::BrowsingTopicsSiteDataManager* site_data_manager,
494 optimization_guide::PageContentAnnotationsService* annotations_service,
Yao Xiao57892a22022-06-28 19:21:41495 const base::circular_deque<EpochTopics>& epochs,
Yao Xiaocc379392022-03-25 21:39:06496 BrowsingTopicsCalculator::CalculateCompletedCallback callback) {
497 return std::make_unique<BrowsingTopicsCalculator>(
498 privacy_sandbox_settings, history_service, site_data_manager,
Yao Xiao57892a22022-06-28 19:21:41499 annotations_service, epochs, std::move(callback));
Yao Xiaocc379392022-03-25 21:39:06500}
501
502const BrowsingTopicsState& BrowsingTopicsServiceImpl::browsing_topics_state() {
503 return browsing_topics_state_;
504}
505
506void BrowsingTopicsServiceImpl::ScheduleBrowsingTopicsCalculation(
507 base::TimeDelta delay) {
508 DCHECK(browsing_topics_state_loaded_);
509
510 // `this` owns the timer, which is automatically cancelled on destruction, so
511 // base::Unretained(this) is safe.
512 schedule_calculate_timer_.Start(
513 FROM_HERE, delay,
514 base::BindOnce(&BrowsingTopicsServiceImpl::CalculateBrowsingTopics,
515 base::Unretained(this)));
516}
517
518void BrowsingTopicsServiceImpl::CalculateBrowsingTopics() {
519 DCHECK(browsing_topics_state_loaded_);
520
521 DCHECK(!topics_calculator_);
522
523 // `this` owns `topics_calculator_` so `topics_calculator_` should not invoke
524 // the callback once it's destroyed.
525 topics_calculator_ = CreateCalculator(
526 privacy_sandbox_settings_, history_service_, site_data_manager_,
Yao Xiao57892a22022-06-28 19:21:41527 annotations_service_, browsing_topics_state_.epochs(),
Yao Xiaocc379392022-03-25 21:39:06528 base::BindOnce(
529 &BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted,
530 base::Unretained(this)));
531}
532
533void BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted(
534 EpochTopics epoch_topics) {
535 DCHECK(browsing_topics_state_loaded_);
536
537 DCHECK(topics_calculator_);
538 topics_calculator_.reset();
539
540 browsing_topics_state_.AddEpoch(std::move(epoch_topics));
541 browsing_topics_state_.UpdateNextScheduledCalculationTime();
542
543 ScheduleBrowsingTopicsCalculation(
544 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
Yao Xiao57892a22022-06-28 19:21:41545
546 if (!get_state_for_webui_callbacks_.empty()) {
547 mojom::WebUIGetBrowsingTopicsStateResultPtr webui_state =
548 GetBrowsingTopicsStateForWebUiHelper();
549
550 for (auto& callback : get_state_for_webui_callbacks_) {
551 std::move(callback).Run(webui_state->Clone());
552 }
553
554 get_state_for_webui_callbacks_.clear();
555 }
Yao Xiaocc379392022-03-25 21:39:06556}
557
558void BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded() {
559 DCHECK(!browsing_topics_state_loaded_);
560 browsing_topics_state_loaded_ = true;
561
562 base::Time browsing_topics_data_sccessible_since =
563 privacy_sandbox_settings_->TopicsDataAccessibleSince();
564
565 StartupCalculateDecision decision = GetStartupCalculationDecision(
Yao Xiaobf39e34d2022-03-28 21:48:28566 browsing_topics_state_, browsing_topics_data_sccessible_since,
567 base::BindRepeating(
568 &privacy_sandbox::PrivacySandboxSettings::IsTopicAllowed,
569 base::Unretained(privacy_sandbox_settings_)));
Yao Xiaocc379392022-03-25 21:39:06570
571 if (decision.clear_topics_data)
572 browsing_topics_state_.ClearAllTopics();
573
574 site_data_manager_->ExpireDataBefore(browsing_topics_data_sccessible_since);
575
576 ScheduleBrowsingTopicsCalculation(decision.next_calculation_delay);
577}
578
579void BrowsingTopicsServiceImpl::Shutdown() {
580 privacy_sandbox_settings_observation_.Reset();
581 history_service_observation_.Reset();
582}
583
584void BrowsingTopicsServiceImpl::OnTopicsDataAccessibleSinceUpdated() {
585 if (!browsing_topics_state_loaded_)
586 return;
587
Yao Xiaobf39e34d2022-03-28 21:48:28588 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
589 // only be updated to base::Time::Now() due to data deletion. In this case, we
590 // should just clear all topics.
591 browsing_topics_state_.ClearAllTopics();
592 site_data_manager_->ExpireDataBefore(
593 privacy_sandbox_settings_->TopicsDataAccessibleSince());
Yao Xiaocc379392022-03-25 21:39:06594
595 // Abort the outstanding topics calculation and restart immediately.
596 if (topics_calculator_) {
597 DCHECK(!schedule_calculate_timer_.IsRunning());
598
599 topics_calculator_.reset();
600 CalculateBrowsingTopics();
601 }
602}
603
604void BrowsingTopicsServiceImpl::OnURLsDeleted(
605 history::HistoryService* history_service,
606 const history::DeletionInfo& deletion_info) {
607 if (!browsing_topics_state_loaded_)
608 return;
609
610 // Ignore invalid time_range.
611 if (!deletion_info.IsAllHistory() && !deletion_info.time_range().IsValid())
612 return;
613
614 for (size_t i = 0; i < browsing_topics_state_.epochs().size(); ++i) {
615 const EpochTopics& epoch_topics = browsing_topics_state_.epochs()[i];
616
617 if (epoch_topics.empty())
618 continue;
619
Yao Xiao57892a22022-06-28 19:21:41620 // The typical case is assumed here. We cannot always derive the original
621 // history start time, as the necessary data (e.g. its previous epoch's
622 // calculation time) may have been gone.
623 base::Time history_data_start_time =
624 epoch_topics.calculation_time() -
625 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get();
626
Yao Xiaocc379392022-03-25 21:39:06627 bool time_range_overlap =
628 epoch_topics.calculation_time() >= deletion_info.time_range().begin() &&
Yao Xiao57892a22022-06-28 19:21:41629 history_data_start_time <= deletion_info.time_range().end();
Yao Xiaocc379392022-03-25 21:39:06630
631 if (time_range_overlap)
632 browsing_topics_state_.ClearOneEpoch(i);
633 }
634
635 // If there's an outstanding topics calculation, abort and restart it.
636 if (topics_calculator_) {
637 DCHECK(!schedule_calculate_timer_.IsRunning());
638
639 topics_calculator_.reset();
640 CalculateBrowsingTopics();
641 }
Yao Xiao7a1995b2022-03-09 08:18:55642}
643
Yao Xiao57892a22022-06-28 19:21:41644mojom::WebUIGetBrowsingTopicsStateResultPtr
645BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUiHelper() {
646 DCHECK(browsing_topics_state_loaded_);
647 DCHECK(!topics_calculator_);
648
649 auto webui_state = mojom::WebUIBrowsingTopicsState::New();
650
651 webui_state->next_scheduled_calculation_time =
652 browsing_topics_state_.next_scheduled_calculation_time();
653
654 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
655 DCHECK_LE(epoch.padded_top_topics_start_index(),
656 epoch.top_topics_and_observing_domains().size());
657
658 // Note: for a failed epoch calculation, the default zero-initialized values
659 // will be displayed in the Web UI.
660 auto webui_epoch = mojom::WebUIEpoch::New();
661 webui_epoch->calculation_time = epoch.calculation_time();
662 webui_epoch->model_version = base::NumberToString(epoch.model_version());
663 webui_epoch->taxonomy_version =
664 base::NumberToString(epoch.taxonomy_version());
665
666 for (size_t i = 0; i < epoch.top_topics_and_observing_domains().size();
667 ++i) {
668 const TopicAndDomains& topic_and_domains =
669 epoch.top_topics_and_observing_domains()[i];
670
671 privacy_sandbox::CanonicalTopic canonical_topic =
672 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
673 epoch.taxonomy_version());
674
675 std::vector<std::string> webui_observed_by_domains;
676 webui_observed_by_domains.reserve(
677 topic_and_domains.hashed_domains().size());
678 for (const auto& domain : topic_and_domains.hashed_domains()) {
679 webui_observed_by_domains.push_back(
680 base::NumberToString(domain.value()));
681 }
682
683 // Note: if the topic is invalid (i.e. cleared), the output `topic_id`
684 // will be 0; if the topic is invalid, or if the taxonomy version isn't
685 // recognized by this Chrome binary, the output `topic_name` will be
686 // "Unknown".
687 auto webui_topic = mojom::WebUITopic::New();
688 webui_topic->topic_id = topic_and_domains.topic().value();
689 webui_topic->topic_name = canonical_topic.GetLocalizedRepresentation();
690 webui_topic->is_real_topic = (i < epoch.padded_top_topics_start_index());
691 webui_topic->observed_by_domains = std::move(webui_observed_by_domains);
692
693 webui_epoch->topics.push_back(std::move(webui_topic));
694 }
695
696 webui_state->epochs.push_back(std::move(webui_epoch));
697 }
698
699 // Reorder the epochs from latest to oldest.
700 base::ranges::reverse(webui_state->epochs);
701
702 return mojom::WebUIGetBrowsingTopicsStateResult::NewBrowsingTopicsState(
703 std::move(webui_state));
704}
705
Yao Xiao7a1995b2022-03-09 08:18:55706} // namespace browsing_topics