blob: 18605ba6e3e2941e1a5579a3048e2fec8f53ab3b [file] [log] [blame]
Yao Xiao7a1995b2022-03-09 08:18:551// Copyright 2022 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/browsing_topics/browsing_topics_service_impl.h"
6
Yao Xiaocc379392022-03-25 21:39:067#include <random>
8
9#include "base/rand_util.h"
Gabriel Charetted87f10f2022-03-31 00:44:2210#include "base/time/time.h"
Yao Xiaocc379392022-03-25 21:39:0611#include "components/browsing_topics/browsing_topics_calculator.h"
12#include "components/browsing_topics/browsing_topics_page_load_data_tracker.h"
13#include "components/browsing_topics/util.h"
14#include "components/optimization_guide/content/browser/page_content_annotations_service.h"
15#include "content/public/browser/browsing_topics_site_data_manager.h"
16#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
17#include "third_party/blink/public/common/features.h"
18#include "third_party/blink/public/mojom/browsing_topics/browsing_topics.mojom.h"
19
Yao Xiao7a1995b2022-03-09 08:18:5520namespace browsing_topics {
21
Yao Xiaocc379392022-03-25 21:39:0622namespace {
23
Yao Xiaobf39e34d2022-03-28 21:48:2824// Returns whether the topics should all be cleared given
25// `browsing_topics_data_accessible_since` and `is_topic_allowed_by_settings`.
26// Returns true if `browsing_topics_data_accessible_since` is greater than the
27// last calculation time, or if any top topic is disallowed from the settings.
28// The latter could happen if the topic became disallowed when
29// `browsing_topics_state` was still loading (and we didn't get a chance to
30// clear it). This is an unlikely edge case, so it's fine to over-delete.
31bool ShouldClearTopicsOnStartup(
Yao Xiaocc379392022-03-25 21:39:0632 const BrowsingTopicsState& browsing_topics_state,
Yao Xiaobf39e34d2022-03-28 21:48:2833 base::Time browsing_topics_data_accessible_since,
34 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
35 is_topic_allowed_by_settings) {
36 DCHECK(!is_topic_allowed_by_settings.is_null());
37
38 if (browsing_topics_state.epochs().empty())
39 return false;
40
Yao Xiaocc379392022-03-25 21:39:0641 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
42 // only be updated to base::Time::Now() due to data deletion. So we'll either
43 // need to clear all topics data, or no-op. If this assumption no longer
44 // holds, we'd need to iterate over all epochs, check their calculation time,
45 // and selectively delete the epochs.
Yao Xiaobf39e34d2022-03-28 21:48:2846 if (browsing_topics_data_accessible_since >
47 browsing_topics_state.epochs().back().calculation_time()) {
48 return true;
49 }
50
51 for (const EpochTopics& epoch : browsing_topics_state.epochs()) {
52 for (const TopicAndDomains& topic_and_domains :
53 epoch.top_topics_and_observing_domains()) {
54 if (!topic_and_domains.IsValid())
55 continue;
56
57 if (!is_topic_allowed_by_settings.Run(privacy_sandbox::CanonicalTopic(
58 topic_and_domains.topic(), epoch.taxonomy_version()))) {
59 return true;
60 }
61 }
62 }
63
64 return false;
Yao Xiaocc379392022-03-25 21:39:0665}
66
67struct StartupCalculateDecision {
68 bool clear_topics_data = true;
69 base::TimeDelta next_calculation_delay;
70};
71
72StartupCalculateDecision GetStartupCalculationDecision(
73 const BrowsingTopicsState& browsing_topics_state,
Yao Xiaobf39e34d2022-03-28 21:48:2874 base::Time browsing_topics_data_accessible_since,
75 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
76 is_topic_allowed_by_settings) {
Yao Xiaocc379392022-03-25 21:39:0677 // The topics have never been calculated. This could happen with a fresh
78 // profile or the if the config has updated. In case of a config update, the
79 // topics should have already been cleared when initializing the
80 // `BrowsingTopicsState`.
81 if (browsing_topics_state.next_scheduled_calculation_time().is_null()) {
82 return StartupCalculateDecision{
83 .clear_topics_data = false,
84 .next_calculation_delay = base::TimeDelta()};
85 }
86
87 // This could happen when clear-on-exit is turned on and has caused the
Yao Xiaobf39e34d2022-03-28 21:48:2888 // cookies to be deleted on startup, of if a topic became disallowed when
89 // `browsing_topics_state` was still loading.
90 bool should_clear_topics_data = ShouldClearTopicsOnStartup(
91 browsing_topics_state, browsing_topics_data_accessible_since,
92 is_topic_allowed_by_settings);
Yao Xiaocc379392022-03-25 21:39:0693
94 base::TimeDelta presumed_next_calculation_delay =
95 browsing_topics_state.next_scheduled_calculation_time() -
96 base::Time::Now();
97
98 // The scheduled calculation time was reached before the startup.
99 if (presumed_next_calculation_delay <= base::TimeDelta()) {
100 return StartupCalculateDecision{
101 .clear_topics_data = should_clear_topics_data,
102 .next_calculation_delay = base::TimeDelta()};
103 }
104
105 // This could happen if the machine time has changed since the last
106 // calculation. Recalculate immediately to align with the expected schedule
107 // rather than potentially stop computing for a very long time.
108 if (presumed_next_calculation_delay >=
109 2 * blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get()) {
110 return StartupCalculateDecision{
111 .clear_topics_data = should_clear_topics_data,
112 .next_calculation_delay = base::TimeDelta()};
113 }
114
115 return StartupCalculateDecision{
116 .clear_topics_data = should_clear_topics_data,
117 .next_calculation_delay = presumed_next_calculation_delay};
118}
119
120} // namespace
121
Yao Xiao7a1995b2022-03-09 08:18:55122BrowsingTopicsServiceImpl::~BrowsingTopicsServiceImpl() = default;
123
Yao Xiaocc379392022-03-25 21:39:06124BrowsingTopicsServiceImpl::BrowsingTopicsServiceImpl(
125 const base::FilePath& profile_path,
126 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
127 history::HistoryService* history_service,
128 content::BrowsingTopicsSiteDataManager* site_data_manager,
129 optimization_guide::PageContentAnnotationsService* annotations_service)
130 : privacy_sandbox_settings_(privacy_sandbox_settings),
131 history_service_(history_service),
132 site_data_manager_(site_data_manager),
133 annotations_service_(annotations_service),
134 browsing_topics_state_(
135 profile_path,
136 base::BindOnce(
137 &BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded,
138 base::Unretained(this))) {
139 privacy_sandbox_settings_observation_.Observe(privacy_sandbox_settings);
140 history_service_observation_.Observe(history_service);
141
Yao Xiaobc1241a2022-03-29 05:23:37142 // Greedily request the model to be available to reduce the latency in later
143 // topics calculation.
Yao Xiaocc379392022-03-25 21:39:06144 annotations_service_->RequestAndNotifyWhenModelAvailable(
145 optimization_guide::AnnotationType::kPageTopics, base::DoNothing());
146}
147
148std::vector<blink::mojom::EpochTopicPtr>
149BrowsingTopicsServiceImpl::GetBrowsingTopicsForJsApi(
150 const url::Origin& context_origin,
151 content::RenderFrameHost* main_frame) {
152 if (!browsing_topics_state_loaded_)
153 return {};
154
155 if (!privacy_sandbox_settings_->IsTopicsAllowed())
156 return {};
157
158 if (!privacy_sandbox_settings_->IsTopicsAllowedForContext(
159 context_origin.GetURL(), main_frame->GetLastCommittedOrigin())) {
160 return {};
161 }
162
163 std::string context_domain =
164 net::registry_controlled_domains::GetDomainAndRegistry(
165 context_origin.GetURL(),
166 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
167
168 HashedDomain hashed_context_domain = HashContextDomainForStorage(
169 browsing_topics_state_.hmac_key(), context_domain);
170
171 // Track the API usage context after the permissions check.
172 BrowsingTopicsPageLoadDataTracker::GetOrCreateForPage(main_frame->GetPage())
173 ->OnBrowsingTopicsApiUsed(hashed_context_domain, history_service_);
174
175 std::string top_domain =
176 net::registry_controlled_domains::GetDomainAndRegistry(
177 main_frame->GetLastCommittedOrigin().GetURL(),
178 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
179
180 std::vector<blink::mojom::EpochTopicPtr> result_topics;
181 for (const EpochTopics* epoch :
182 browsing_topics_state_.EpochsForSite(top_domain)) {
183 absl::optional<Topic> topic = epoch->TopicForSite(
184 top_domain, hashed_context_domain, browsing_topics_state_.hmac_key());
185
186 // Only add a non-empty topic to the result.
187 if (!topic)
188 continue;
189
Yao Xiaobf39e34d2022-03-28 21:48:28190 // Although a top topic can never be in the disallowed state, the returned
191 // `topic` may be the random one. Thus we still need this check.
Yao Xiaocc379392022-03-25 21:39:06192 if (!privacy_sandbox_settings_->IsTopicAllowed(
193 privacy_sandbox::CanonicalTopic(*topic,
194 epoch->taxonomy_version()))) {
Yao Xiaobf39e34d2022-03-28 21:48:28195 continue;
Yao Xiaocc379392022-03-25 21:39:06196 }
197
198 blink::mojom::EpochTopicPtr result_topic = blink::mojom::EpochTopic::New();
199 result_topic->topic = topic.value().value();
200 result_topic->config_version = base::StrCat(
201 {"chrome.", base::NumberToString(
202 blink::features::kBrowsingTopicsConfigVersion.Get())});
203 result_topic->model_version = base::NumberToString(epoch->model_version());
204 result_topic->taxonomy_version =
205 base::NumberToString(epoch->taxonomy_version());
206 result_topic->version = base::StrCat({result_topic->config_version, ":",
207 result_topic->taxonomy_version, ":",
208 result_topic->model_version});
209 result_topics.push_back(std::move(result_topic));
210 }
211
212 // Remove duplicate entries.
213 std::sort(result_topics.begin(), result_topics.end());
214 result_topics.erase(std::unique(result_topics.begin(), result_topics.end()),
215 result_topics.end());
216
217 // Shuffle the entries.
218 base::RandomShuffle(result_topics.begin(), result_topics.end());
219
220 return result_topics;
221}
Yao Xiao7a1995b2022-03-09 08:18:55222
223std::vector<privacy_sandbox::CanonicalTopic>
224BrowsingTopicsServiceImpl::GetTopicsForSiteForDisplay(
225 const url::Origin& top_origin) const {
Yao Xiaocc379392022-03-25 21:39:06226 if (!browsing_topics_state_loaded_)
227 return {};
228
229 std::string top_domain =
230 net::registry_controlled_domains::GetDomainAndRegistry(
231 top_origin.GetURL(),
232 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
233
234 std::vector<privacy_sandbox::CanonicalTopic> result;
235
236 for (const EpochTopics* epoch :
237 browsing_topics_state_.EpochsForSite(top_domain)) {
Yao Xiaobf39e34d2022-03-28 21:48:28238 absl::optional<Topic> topic = epoch->TopicForSiteForDisplay(
Yao Xiaocc379392022-03-25 21:39:06239 top_domain, browsing_topics_state_.hmac_key());
240
241 if (!topic)
242 continue;
243
Yao Xiaobf39e34d2022-03-28 21:48:28244 // `epoch->TopicForSiteForDisplay()` shall only return a top topic, and a
245 // top topic can never be in the disallowed state (i.e. it will be cleared
246 // when it becomes diallowed).
247 DCHECK(privacy_sandbox_settings_->IsTopicAllowed(
248 privacy_sandbox::CanonicalTopic(*topic, epoch->taxonomy_version())));
249
Yao Xiaocc379392022-03-25 21:39:06250 result.emplace_back(*topic, epoch->taxonomy_version());
251 }
252
253 return result;
Yao Xiao7a1995b2022-03-09 08:18:55254}
255
256std::vector<privacy_sandbox::CanonicalTopic>
257BrowsingTopicsServiceImpl::GetTopTopicsForDisplay() const {
Yao Xiaocc379392022-03-25 21:39:06258 if (!browsing_topics_state_loaded_)
259 return {};
260
261 std::vector<privacy_sandbox::CanonicalTopic> result;
262
263 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
Yao Xiaobf39e34d2022-03-28 21:48:28264 DCHECK_LE(epoch.padded_top_topics_start_index(),
265 epoch.top_topics_and_observing_domains().size());
266
267 for (size_t i = 0; i < epoch.padded_top_topics_start_index(); ++i) {
268 const TopicAndDomains& topic_and_domains =
269 epoch.top_topics_and_observing_domains()[i];
270
271 if (!topic_and_domains.IsValid())
Yao Xiaocc379392022-03-25 21:39:06272 continue;
273
Yao Xiaobf39e34d2022-03-28 21:48:28274 // A top topic can never be in the disallowed state (i.e. it will be
275 // cleared when it becomes diallowed).
276 DCHECK(privacy_sandbox_settings_->IsTopicAllowed(
277 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
278 epoch.taxonomy_version())));
279
280 result.emplace_back(topic_and_domains.topic(), epoch.taxonomy_version());
Yao Xiaocc379392022-03-25 21:39:06281 }
282 }
283
284 return result;
285}
286
Yao Xiaobf39e34d2022-03-28 21:48:28287void BrowsingTopicsServiceImpl::ClearTopic(
288 const privacy_sandbox::CanonicalTopic& canonical_topic) {
289 if (!browsing_topics_state_loaded_)
290 return;
291
292 browsing_topics_state_.ClearTopic(canonical_topic.topic_id(),
293 canonical_topic.taxonomy_version());
294}
295
296void BrowsingTopicsServiceImpl::ClearTopicsDataForOrigin(
297 const url::Origin& origin) {
298 if (!browsing_topics_state_loaded_)
299 return;
300
301 std::string context_domain =
302 net::registry_controlled_domains::GetDomainAndRegistry(
303 origin.GetURL(),
304 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
305
306 HashedDomain hashed_context_domain = HashContextDomainForStorage(
307 browsing_topics_state_.hmac_key(), context_domain);
308
309 browsing_topics_state_.ClearContextDomain(hashed_context_domain);
310 site_data_manager_->ClearContextDomain(hashed_context_domain);
311}
312
313void BrowsingTopicsServiceImpl::ClearAllTopicsData() {
314 if (!browsing_topics_state_loaded_)
315 return;
316
317 browsing_topics_state_.ClearAllTopics();
318 site_data_manager_->ExpireDataBefore(base::Time::Now());
319}
320
Yao Xiaocc379392022-03-25 21:39:06321std::unique_ptr<BrowsingTopicsCalculator>
322BrowsingTopicsServiceImpl::CreateCalculator(
323 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
324 history::HistoryService* history_service,
325 content::BrowsingTopicsSiteDataManager* site_data_manager,
326 optimization_guide::PageContentAnnotationsService* annotations_service,
327 BrowsingTopicsCalculator::CalculateCompletedCallback callback) {
328 return std::make_unique<BrowsingTopicsCalculator>(
329 privacy_sandbox_settings, history_service, site_data_manager,
330 annotations_service, std::move(callback));
331}
332
333const BrowsingTopicsState& BrowsingTopicsServiceImpl::browsing_topics_state() {
334 return browsing_topics_state_;
335}
336
337void BrowsingTopicsServiceImpl::ScheduleBrowsingTopicsCalculation(
338 base::TimeDelta delay) {
339 DCHECK(browsing_topics_state_loaded_);
340
341 // `this` owns the timer, which is automatically cancelled on destruction, so
342 // base::Unretained(this) is safe.
343 schedule_calculate_timer_.Start(
344 FROM_HERE, delay,
345 base::BindOnce(&BrowsingTopicsServiceImpl::CalculateBrowsingTopics,
346 base::Unretained(this)));
347}
348
349void BrowsingTopicsServiceImpl::CalculateBrowsingTopics() {
350 DCHECK(browsing_topics_state_loaded_);
351
352 DCHECK(!topics_calculator_);
353
354 // `this` owns `topics_calculator_` so `topics_calculator_` should not invoke
355 // the callback once it's destroyed.
356 topics_calculator_ = CreateCalculator(
357 privacy_sandbox_settings_, history_service_, site_data_manager_,
358 annotations_service_,
359 base::BindOnce(
360 &BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted,
361 base::Unretained(this)));
362}
363
364void BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted(
365 EpochTopics epoch_topics) {
366 DCHECK(browsing_topics_state_loaded_);
367
368 DCHECK(topics_calculator_);
369 topics_calculator_.reset();
370
371 browsing_topics_state_.AddEpoch(std::move(epoch_topics));
372 browsing_topics_state_.UpdateNextScheduledCalculationTime();
373
374 ScheduleBrowsingTopicsCalculation(
375 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
376}
377
378void BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded() {
379 DCHECK(!browsing_topics_state_loaded_);
380 browsing_topics_state_loaded_ = true;
381
382 base::Time browsing_topics_data_sccessible_since =
383 privacy_sandbox_settings_->TopicsDataAccessibleSince();
384
385 StartupCalculateDecision decision = GetStartupCalculationDecision(
Yao Xiaobf39e34d2022-03-28 21:48:28386 browsing_topics_state_, browsing_topics_data_sccessible_since,
387 base::BindRepeating(
388 &privacy_sandbox::PrivacySandboxSettings::IsTopicAllowed,
389 base::Unretained(privacy_sandbox_settings_)));
Yao Xiaocc379392022-03-25 21:39:06390
391 if (decision.clear_topics_data)
392 browsing_topics_state_.ClearAllTopics();
393
394 site_data_manager_->ExpireDataBefore(browsing_topics_data_sccessible_since);
395
396 ScheduleBrowsingTopicsCalculation(decision.next_calculation_delay);
397}
398
399void BrowsingTopicsServiceImpl::Shutdown() {
400 privacy_sandbox_settings_observation_.Reset();
401 history_service_observation_.Reset();
402}
403
404void BrowsingTopicsServiceImpl::OnTopicsDataAccessibleSinceUpdated() {
405 if (!browsing_topics_state_loaded_)
406 return;
407
Yao Xiaobf39e34d2022-03-28 21:48:28408 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
409 // only be updated to base::Time::Now() due to data deletion. In this case, we
410 // should just clear all topics.
411 browsing_topics_state_.ClearAllTopics();
412 site_data_manager_->ExpireDataBefore(
413 privacy_sandbox_settings_->TopicsDataAccessibleSince());
Yao Xiaocc379392022-03-25 21:39:06414
415 // Abort the outstanding topics calculation and restart immediately.
416 if (topics_calculator_) {
417 DCHECK(!schedule_calculate_timer_.IsRunning());
418
419 topics_calculator_.reset();
420 CalculateBrowsingTopics();
421 }
422}
423
424void BrowsingTopicsServiceImpl::OnURLsDeleted(
425 history::HistoryService* history_service,
426 const history::DeletionInfo& deletion_info) {
427 if (!browsing_topics_state_loaded_)
428 return;
429
430 // Ignore invalid time_range.
431 if (!deletion_info.IsAllHistory() && !deletion_info.time_range().IsValid())
432 return;
433
434 for (size_t i = 0; i < browsing_topics_state_.epochs().size(); ++i) {
435 const EpochTopics& epoch_topics = browsing_topics_state_.epochs()[i];
436
437 if (epoch_topics.empty())
438 continue;
439
440 bool time_range_overlap =
441 epoch_topics.calculation_time() >= deletion_info.time_range().begin() &&
442 DeriveHistoryDataStartTime(epoch_topics.calculation_time()) <=
443 deletion_info.time_range().end();
444
445 if (time_range_overlap)
446 browsing_topics_state_.ClearOneEpoch(i);
447 }
448
449 // If there's an outstanding topics calculation, abort and restart it.
450 if (topics_calculator_) {
451 DCHECK(!schedule_calculate_timer_.IsRunning());
452
453 topics_calculator_.reset();
454 CalculateBrowsingTopics();
455 }
Yao Xiao7a1995b2022-03-09 08:18:55456}
457
458} // namespace browsing_topics