blob: 18605ba6e3e2941e1a5579a3048e2fec8f53ab3b [file] [log] [blame]
// Copyright 2022 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/browsing_topics/browsing_topics_service_impl.h"
#include <random>
#include "base/rand_util.h"
#include "base/time/time.h"
#include "components/browsing_topics/browsing_topics_calculator.h"
#include "components/browsing_topics/browsing_topics_page_load_data_tracker.h"
#include "components/browsing_topics/util.h"
#include "components/optimization_guide/content/browser/page_content_annotations_service.h"
#include "content/public/browser/browsing_topics_site_data_manager.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "third_party/blink/public/common/features.h"
#include "third_party/blink/public/mojom/browsing_topics/browsing_topics.mojom.h"
namespace browsing_topics {
namespace {
// Returns whether the topics should all be cleared given
// `browsing_topics_data_accessible_since` and `is_topic_allowed_by_settings`.
// Returns true if `browsing_topics_data_accessible_since` is greater than the
// last calculation time, or if any top topic is disallowed from the settings.
// The latter could happen if the topic became disallowed when
// `browsing_topics_state` was still loading (and we didn't get a chance to
// clear it). This is an unlikely edge case, so it's fine to over-delete.
bool ShouldClearTopicsOnStartup(
const BrowsingTopicsState& browsing_topics_state,
base::Time browsing_topics_data_accessible_since,
base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
is_topic_allowed_by_settings) {
DCHECK(!is_topic_allowed_by_settings.is_null());
if (browsing_topics_state.epochs().empty())
return false;
// Here we rely on the fact that `browsing_topics_data_accessible_since` can
// only be updated to base::Time::Now() due to data deletion. So we'll either
// need to clear all topics data, or no-op. If this assumption no longer
// holds, we'd need to iterate over all epochs, check their calculation time,
// and selectively delete the epochs.
if (browsing_topics_data_accessible_since >
browsing_topics_state.epochs().back().calculation_time()) {
return true;
}
for (const EpochTopics& epoch : browsing_topics_state.epochs()) {
for (const TopicAndDomains& topic_and_domains :
epoch.top_topics_and_observing_domains()) {
if (!topic_and_domains.IsValid())
continue;
if (!is_topic_allowed_by_settings.Run(privacy_sandbox::CanonicalTopic(
topic_and_domains.topic(), epoch.taxonomy_version()))) {
return true;
}
}
}
return false;
}
struct StartupCalculateDecision {
bool clear_topics_data = true;
base::TimeDelta next_calculation_delay;
};
StartupCalculateDecision GetStartupCalculationDecision(
const BrowsingTopicsState& browsing_topics_state,
base::Time browsing_topics_data_accessible_since,
base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
is_topic_allowed_by_settings) {
// The topics have never been calculated. This could happen with a fresh
// profile or the if the config has updated. In case of a config update, the
// topics should have already been cleared when initializing the
// `BrowsingTopicsState`.
if (browsing_topics_state.next_scheduled_calculation_time().is_null()) {
return StartupCalculateDecision{
.clear_topics_data = false,
.next_calculation_delay = base::TimeDelta()};
}
// This could happen when clear-on-exit is turned on and has caused the
// cookies to be deleted on startup, of if a topic became disallowed when
// `browsing_topics_state` was still loading.
bool should_clear_topics_data = ShouldClearTopicsOnStartup(
browsing_topics_state, browsing_topics_data_accessible_since,
is_topic_allowed_by_settings);
base::TimeDelta presumed_next_calculation_delay =
browsing_topics_state.next_scheduled_calculation_time() -
base::Time::Now();
// The scheduled calculation time was reached before the startup.
if (presumed_next_calculation_delay <= base::TimeDelta()) {
return StartupCalculateDecision{
.clear_topics_data = should_clear_topics_data,
.next_calculation_delay = base::TimeDelta()};
}
// This could happen if the machine time has changed since the last
// calculation. Recalculate immediately to align with the expected schedule
// rather than potentially stop computing for a very long time.
if (presumed_next_calculation_delay >=
2 * blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get()) {
return StartupCalculateDecision{
.clear_topics_data = should_clear_topics_data,
.next_calculation_delay = base::TimeDelta()};
}
return StartupCalculateDecision{
.clear_topics_data = should_clear_topics_data,
.next_calculation_delay = presumed_next_calculation_delay};
}
} // namespace
BrowsingTopicsServiceImpl::~BrowsingTopicsServiceImpl() = default;
BrowsingTopicsServiceImpl::BrowsingTopicsServiceImpl(
const base::FilePath& profile_path,
privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
history::HistoryService* history_service,
content::BrowsingTopicsSiteDataManager* site_data_manager,
optimization_guide::PageContentAnnotationsService* annotations_service)
: privacy_sandbox_settings_(privacy_sandbox_settings),
history_service_(history_service),
site_data_manager_(site_data_manager),
annotations_service_(annotations_service),
browsing_topics_state_(
profile_path,
base::BindOnce(
&BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded,
base::Unretained(this))) {
privacy_sandbox_settings_observation_.Observe(privacy_sandbox_settings);
history_service_observation_.Observe(history_service);
// Greedily request the model to be available to reduce the latency in later
// topics calculation.
annotations_service_->RequestAndNotifyWhenModelAvailable(
optimization_guide::AnnotationType::kPageTopics, base::DoNothing());
}
std::vector<blink::mojom::EpochTopicPtr>
BrowsingTopicsServiceImpl::GetBrowsingTopicsForJsApi(
const url::Origin& context_origin,
content::RenderFrameHost* main_frame) {
if (!browsing_topics_state_loaded_)
return {};
if (!privacy_sandbox_settings_->IsTopicsAllowed())
return {};
if (!privacy_sandbox_settings_->IsTopicsAllowedForContext(
context_origin.GetURL(), main_frame->GetLastCommittedOrigin())) {
return {};
}
std::string context_domain =
net::registry_controlled_domains::GetDomainAndRegistry(
context_origin.GetURL(),
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
HashedDomain hashed_context_domain = HashContextDomainForStorage(
browsing_topics_state_.hmac_key(), context_domain);
// Track the API usage context after the permissions check.
BrowsingTopicsPageLoadDataTracker::GetOrCreateForPage(main_frame->GetPage())
->OnBrowsingTopicsApiUsed(hashed_context_domain, history_service_);
std::string top_domain =
net::registry_controlled_domains::GetDomainAndRegistry(
main_frame->GetLastCommittedOrigin().GetURL(),
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
std::vector<blink::mojom::EpochTopicPtr> result_topics;
for (const EpochTopics* epoch :
browsing_topics_state_.EpochsForSite(top_domain)) {
absl::optional<Topic> topic = epoch->TopicForSite(
top_domain, hashed_context_domain, browsing_topics_state_.hmac_key());
// Only add a non-empty topic to the result.
if (!topic)
continue;
// Although a top topic can never be in the disallowed state, the returned
// `topic` may be the random one. Thus we still need this check.
if (!privacy_sandbox_settings_->IsTopicAllowed(
privacy_sandbox::CanonicalTopic(*topic,
epoch->taxonomy_version()))) {
continue;
}
blink::mojom::EpochTopicPtr result_topic = blink::mojom::EpochTopic::New();
result_topic->topic = topic.value().value();
result_topic->config_version = base::StrCat(
{"chrome.", base::NumberToString(
blink::features::kBrowsingTopicsConfigVersion.Get())});
result_topic->model_version = base::NumberToString(epoch->model_version());
result_topic->taxonomy_version =
base::NumberToString(epoch->taxonomy_version());
result_topic->version = base::StrCat({result_topic->config_version, ":",
result_topic->taxonomy_version, ":",
result_topic->model_version});
result_topics.push_back(std::move(result_topic));
}
// Remove duplicate entries.
std::sort(result_topics.begin(), result_topics.end());
result_topics.erase(std::unique(result_topics.begin(), result_topics.end()),
result_topics.end());
// Shuffle the entries.
base::RandomShuffle(result_topics.begin(), result_topics.end());
return result_topics;
}
std::vector<privacy_sandbox::CanonicalTopic>
BrowsingTopicsServiceImpl::GetTopicsForSiteForDisplay(
const url::Origin& top_origin) const {
if (!browsing_topics_state_loaded_)
return {};
std::string top_domain =
net::registry_controlled_domains::GetDomainAndRegistry(
top_origin.GetURL(),
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
std::vector<privacy_sandbox::CanonicalTopic> result;
for (const EpochTopics* epoch :
browsing_topics_state_.EpochsForSite(top_domain)) {
absl::optional<Topic> topic = epoch->TopicForSiteForDisplay(
top_domain, browsing_topics_state_.hmac_key());
if (!topic)
continue;
// `epoch->TopicForSiteForDisplay()` shall only return a top topic, and a
// top topic can never be in the disallowed state (i.e. it will be cleared
// when it becomes diallowed).
DCHECK(privacy_sandbox_settings_->IsTopicAllowed(
privacy_sandbox::CanonicalTopic(*topic, epoch->taxonomy_version())));
result.emplace_back(*topic, epoch->taxonomy_version());
}
return result;
}
std::vector<privacy_sandbox::CanonicalTopic>
BrowsingTopicsServiceImpl::GetTopTopicsForDisplay() const {
if (!browsing_topics_state_loaded_)
return {};
std::vector<privacy_sandbox::CanonicalTopic> result;
for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
DCHECK_LE(epoch.padded_top_topics_start_index(),
epoch.top_topics_and_observing_domains().size());
for (size_t i = 0; i < epoch.padded_top_topics_start_index(); ++i) {
const TopicAndDomains& topic_and_domains =
epoch.top_topics_and_observing_domains()[i];
if (!topic_and_domains.IsValid())
continue;
// A top topic can never be in the disallowed state (i.e. it will be
// cleared when it becomes diallowed).
DCHECK(privacy_sandbox_settings_->IsTopicAllowed(
privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
epoch.taxonomy_version())));
result.emplace_back(topic_and_domains.topic(), epoch.taxonomy_version());
}
}
return result;
}
void BrowsingTopicsServiceImpl::ClearTopic(
const privacy_sandbox::CanonicalTopic& canonical_topic) {
if (!browsing_topics_state_loaded_)
return;
browsing_topics_state_.ClearTopic(canonical_topic.topic_id(),
canonical_topic.taxonomy_version());
}
void BrowsingTopicsServiceImpl::ClearTopicsDataForOrigin(
const url::Origin& origin) {
if (!browsing_topics_state_loaded_)
return;
std::string context_domain =
net::registry_controlled_domains::GetDomainAndRegistry(
origin.GetURL(),
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
HashedDomain hashed_context_domain = HashContextDomainForStorage(
browsing_topics_state_.hmac_key(), context_domain);
browsing_topics_state_.ClearContextDomain(hashed_context_domain);
site_data_manager_->ClearContextDomain(hashed_context_domain);
}
void BrowsingTopicsServiceImpl::ClearAllTopicsData() {
if (!browsing_topics_state_loaded_)
return;
browsing_topics_state_.ClearAllTopics();
site_data_manager_->ExpireDataBefore(base::Time::Now());
}
std::unique_ptr<BrowsingTopicsCalculator>
BrowsingTopicsServiceImpl::CreateCalculator(
privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
history::HistoryService* history_service,
content::BrowsingTopicsSiteDataManager* site_data_manager,
optimization_guide::PageContentAnnotationsService* annotations_service,
BrowsingTopicsCalculator::CalculateCompletedCallback callback) {
return std::make_unique<BrowsingTopicsCalculator>(
privacy_sandbox_settings, history_service, site_data_manager,
annotations_service, std::move(callback));
}
const BrowsingTopicsState& BrowsingTopicsServiceImpl::browsing_topics_state() {
return browsing_topics_state_;
}
void BrowsingTopicsServiceImpl::ScheduleBrowsingTopicsCalculation(
base::TimeDelta delay) {
DCHECK(browsing_topics_state_loaded_);
// `this` owns the timer, which is automatically cancelled on destruction, so
// base::Unretained(this) is safe.
schedule_calculate_timer_.Start(
FROM_HERE, delay,
base::BindOnce(&BrowsingTopicsServiceImpl::CalculateBrowsingTopics,
base::Unretained(this)));
}
void BrowsingTopicsServiceImpl::CalculateBrowsingTopics() {
DCHECK(browsing_topics_state_loaded_);
DCHECK(!topics_calculator_);
// `this` owns `topics_calculator_` so `topics_calculator_` should not invoke
// the callback once it's destroyed.
topics_calculator_ = CreateCalculator(
privacy_sandbox_settings_, history_service_, site_data_manager_,
annotations_service_,
base::BindOnce(
&BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted,
base::Unretained(this)));
}
void BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted(
EpochTopics epoch_topics) {
DCHECK(browsing_topics_state_loaded_);
DCHECK(topics_calculator_);
topics_calculator_.reset();
browsing_topics_state_.AddEpoch(std::move(epoch_topics));
browsing_topics_state_.UpdateNextScheduledCalculationTime();
ScheduleBrowsingTopicsCalculation(
blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
}
void BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded() {
DCHECK(!browsing_topics_state_loaded_);
browsing_topics_state_loaded_ = true;
base::Time browsing_topics_data_sccessible_since =
privacy_sandbox_settings_->TopicsDataAccessibleSince();
StartupCalculateDecision decision = GetStartupCalculationDecision(
browsing_topics_state_, browsing_topics_data_sccessible_since,
base::BindRepeating(
&privacy_sandbox::PrivacySandboxSettings::IsTopicAllowed,
base::Unretained(privacy_sandbox_settings_)));
if (decision.clear_topics_data)
browsing_topics_state_.ClearAllTopics();
site_data_manager_->ExpireDataBefore(browsing_topics_data_sccessible_since);
ScheduleBrowsingTopicsCalculation(decision.next_calculation_delay);
}
void BrowsingTopicsServiceImpl::Shutdown() {
privacy_sandbox_settings_observation_.Reset();
history_service_observation_.Reset();
}
void BrowsingTopicsServiceImpl::OnTopicsDataAccessibleSinceUpdated() {
if (!browsing_topics_state_loaded_)
return;
// Here we rely on the fact that `browsing_topics_data_accessible_since` can
// only be updated to base::Time::Now() due to data deletion. In this case, we
// should just clear all topics.
browsing_topics_state_.ClearAllTopics();
site_data_manager_->ExpireDataBefore(
privacy_sandbox_settings_->TopicsDataAccessibleSince());
// Abort the outstanding topics calculation and restart immediately.
if (topics_calculator_) {
DCHECK(!schedule_calculate_timer_.IsRunning());
topics_calculator_.reset();
CalculateBrowsingTopics();
}
}
void BrowsingTopicsServiceImpl::OnURLsDeleted(
history::HistoryService* history_service,
const history::DeletionInfo& deletion_info) {
if (!browsing_topics_state_loaded_)
return;
// Ignore invalid time_range.
if (!deletion_info.IsAllHistory() && !deletion_info.time_range().IsValid())
return;
for (size_t i = 0; i < browsing_topics_state_.epochs().size(); ++i) {
const EpochTopics& epoch_topics = browsing_topics_state_.epochs()[i];
if (epoch_topics.empty())
continue;
bool time_range_overlap =
epoch_topics.calculation_time() >= deletion_info.time_range().begin() &&
DeriveHistoryDataStartTime(epoch_topics.calculation_time()) <=
deletion_info.time_range().end();
if (time_range_overlap)
browsing_topics_state_.ClearOneEpoch(i);
}
// If there's an outstanding topics calculation, abort and restart it.
if (topics_calculator_) {
DCHECK(!schedule_calculate_timer_.IsRunning());
topics_calculator_.reset();
CalculateBrowsingTopics();
}
}
} // namespace browsing_topics