blob: bc7b2aac66f6374ae581f583e1681f7f3e48871a [file] [log] [blame]
fgorski1d4c9c92015-12-17 20:39:321// Copyright 2015 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Dmitry Titov909e8d62017-07-19 01:39:565#include "chrome/browser/offline_pages/offline_page_utils.h"
fgorski1d4c9c92015-12-17 20:39:326
dewittjfd1614e52016-06-06 18:36:017#include "base/bind.h"
Jian Li63ceb212018-01-04 02:04:338#include "base/files/file.h"
9#include "base/files/file_path.h"
10#include "base/files/file_util.h"
dewittjfd1614e52016-06-06 18:36:0111#include "base/location.h"
jianli81fd5882017-04-12 01:42:0412#include "base/metrics/histogram_macros.h"
tripta.g3e097a92017-06-22 07:56:2613#include "base/stl_util.h"
bburns16d15332016-03-04 00:34:0914#include "base/strings/string_number_conversions.h"
fgorski1d4c9c92015-12-17 20:39:3215#include "base/strings/string_piece.h"
16#include "base/strings/string_util.h"
Jian Li009b19a2018-01-05 23:05:3017#include "base/task_scheduler/post_task.h"
dewittjfd1614e52016-06-06 18:36:0118#include "base/threading/thread_task_runner_handle.h"
fgorskie8aae2c2016-11-23 23:45:3919#include "base/time/time.h"
Dmitry Titov909e8d62017-07-19 01:39:5620#include "build/build_config.h"
jianlib0fe12b2017-01-12 01:14:0821#include "chrome/browser/net/net_error_tab_helper.h"
Dmitry Titov909e8d62017-07-19 01:39:5622#include "chrome/browser/offline_pages/offline_page_mhtml_archiver.h"
Jian Lie632ebc82017-06-30 23:40:3223#include "chrome/browser/offline_pages/offline_page_model_factory.h"
Cathy Li69ed5a82017-08-25 23:03:5024#include "chrome/browser/offline_pages/offline_page_origin_utils.h"
25#include "chrome/browser/offline_pages/offline_page_tab_helper.h"
Dmitry Titov909e8d62017-07-19 01:39:5626#include "chrome/browser/offline_pages/request_coordinator_factory.h"
chiliecd23802016-12-07 10:27:1127#include "components/offline_pages/core/background/request_coordinator.h"
28#include "components/offline_pages/core/background/save_page_request.h"
29#include "components/offline_pages/core/client_namespace_constants.h"
30#include "components/offline_pages/core/client_policy_controller.h"
31#include "components/offline_pages/core/offline_page_feature.h"
32#include "components/offline_pages/core/offline_page_item.h"
33#include "components/offline_pages/core/offline_page_model.h"
34#include "components/offline_pages/core/request_header/offline_page_header.h"
fgorski1d4c9c92015-12-17 20:39:3235#include "content/public/browser/browser_context.h"
jianli918265d2017-02-28 01:50:1736#include "content/public/browser/navigation_entry.h"
dewittj44d11cb2016-06-17 23:30:2837#include "content/public/browser/web_contents.h"
Jian Li63ceb212018-01-04 02:04:3338#include "crypto/secure_hash.h"
39#include "crypto/sha2.h"
jianli030745882017-04-28 08:35:1840#include "net/base/mime_util.h"
fgorski1d4c9c92015-12-17 20:39:3241
42namespace offline_pages {
43namespace {
44
Jian Li009b19a2018-01-05 23:05:3045class OfflinePageComparer {
46 public:
47 OfflinePageComparer() = default;
48
49 bool operator()(const OfflinePageItem& a, const OfflinePageItem& b) {
50 return a.creation_time > b.creation_time;
51 }
52};
53
jianli610b1fd2016-11-17 23:17:5254void OnGetPagesByURLDone(
55 const GURL& url,
jianlib76ea422016-08-11 02:30:5756 int tab_id,
dewittj7be8aa02016-10-07 18:19:1457 const std::vector<std::string>& namespaces_to_show_in_original_tab,
Jian Li009b19a2018-01-05 23:05:3058 const base::Callback<void(const std::vector<OfflinePageItem>&)>& callback,
jianlib76ea422016-08-11 02:30:5759 const MultipleOfflinePageItemResult& pages) {
Jian Li009b19a2018-01-05 23:05:3060 std::vector<OfflinePageItem> selected_pages;
jianlib76ea422016-08-11 02:30:5761 std::string tab_id_str = base::IntToString(tab_id);
dewittj7be8aa02016-10-07 18:19:1462
Jian Li009b19a2018-01-05 23:05:3063 // Exclude pages whose tab id does not match.
jianli610b1fd2016-11-17 23:17:5264 for (const auto& page : pages) {
tripta.g3e097a92017-06-22 07:56:2665 if (base::ContainsValue(namespaces_to_show_in_original_tab,
66 page.client_id.name_space) &&
jianli610b1fd2016-11-17 23:17:5267 page.client_id.id != tab_id_str) {
dewittj7be8aa02016-10-07 18:19:1468 continue;
69 }
Jian Li009b19a2018-01-05 23:05:3070 selected_pages.push_back(page);
jianlib76ea422016-08-11 02:30:5771 }
jianli610b1fd2016-11-17 23:17:5272
Jian Li009b19a2018-01-05 23:05:3073 // Sort based on creation date.
74 std::sort(selected_pages.begin(), selected_pages.end(),
75 OfflinePageComparer());
76
77 callback.Run(selected_pages);
jianlib76ea422016-08-11 02:30:5778}
79
jianli81fd5882017-04-12 01:42:0480bool IsSupportedByDownload(content::BrowserContext* browser_context,
81 const std::string& name_space) {
82 OfflinePageModel* offline_page_model =
83 OfflinePageModelFactory::GetForBrowserContext(browser_context);
84 DCHECK(offline_page_model);
85 ClientPolicyController* policy_controller =
86 offline_page_model->GetPolicyController();
87 DCHECK(policy_controller);
88 return policy_controller->IsSupportedByDownload(name_space);
89}
90
91void CheckDuplicateOngoingDownloads(
92 content::BrowserContext* browser_context,
93 const GURL& url,
94 const OfflinePageUtils::DuplicateCheckCallback& callback) {
95 RequestCoordinator* request_coordinator =
96 RequestCoordinatorFactory::GetForBrowserContext(browser_context);
97 if (!request_coordinator)
98 return;
99
100 auto request_coordinator_continuation =
101 [](content::BrowserContext* browser_context, const GURL& url,
102 const OfflinePageUtils::DuplicateCheckCallback& callback,
103 std::vector<std::unique_ptr<SavePageRequest>> requests) {
104 base::Time latest_request_time;
105 for (auto& request : requests) {
106 if (IsSupportedByDownload(browser_context,
107 request->client_id().name_space) &&
108 request->url() == url &&
109 latest_request_time < request->creation_time()) {
110 latest_request_time = request->creation_time();
111 }
112 }
113
114 if (latest_request_time.is_null()) {
115 callback.Run(OfflinePageUtils::DuplicateCheckResult::NOT_FOUND);
116 } else {
117 // Using CUSTOM_COUNTS instead of time-oriented histogram to record
118 // samples in seconds rather than milliseconds.
119 UMA_HISTOGRAM_CUSTOM_COUNTS(
120 "OfflinePages.DownloadRequestTimeSinceDuplicateRequested",
121 (base::Time::Now() - latest_request_time).InSeconds(),
122 base::TimeDelta::FromSeconds(1).InSeconds(),
123 base::TimeDelta::FromDays(7).InSeconds(), 50);
124
125 callback.Run(
126 OfflinePageUtils::DuplicateCheckResult::DUPLICATE_REQUEST_FOUND);
127 }
128 };
129
130 request_coordinator->GetAllRequests(base::Bind(
131 request_coordinator_continuation, browser_context, url, callback));
132}
133
romax444db4b2017-05-16 23:52:38134void DoCalculateSizeBetween(
135 const offline_pages::SizeInBytesCallback& callback,
136 const base::Time& begin_time,
137 const base::Time& end_time,
138 const offline_pages::MultipleOfflinePageItemResult& result) {
139 int64_t total_size = 0;
140 for (auto& page : result) {
141 if (begin_time <= page.creation_time && page.creation_time < end_time)
142 total_size += page.file_size;
143 }
144 callback.Run(total_size);
145}
146
fgorski1d4c9c92015-12-17 20:39:32147} // namespace
148
149// static
Jian Li009b19a2018-01-05 23:05:30150void OfflinePageUtils::SelectPagesForURL(
jianlib76ea422016-08-11 02:30:57151 content::BrowserContext* browser_context,
jianli610b1fd2016-11-17 23:17:52152 const GURL& url,
Yafei Duan3caef8c2017-08-10 00:55:24153 URLSearchMode url_search_mode,
jianlib76ea422016-08-11 02:30:57154 int tab_id,
Jian Li009b19a2018-01-05 23:05:30155 const base::Callback<void(const std::vector<OfflinePageItem>&)>& callback) {
jianlib76ea422016-08-11 02:30:57156 OfflinePageModel* offline_page_model =
157 OfflinePageModelFactory::GetForBrowserContext(browser_context);
158 if (!offline_page_model) {
159 base::ThreadTaskRunnerHandle::Get()->PostTask(
Jian Li009b19a2018-01-05 23:05:30160 FROM_HERE, base::Bind(callback, std::vector<OfflinePageItem>()));
jianlib76ea422016-08-11 02:30:57161 return;
162 }
163
jianli610b1fd2016-11-17 23:17:52164 offline_page_model->GetPagesByURL(
165 url,
166 url_search_mode,
167 base::Bind(&OnGetPagesByURLDone, url, tab_id,
168 offline_page_model->GetPolicyController()
169 ->GetNamespacesRestrictedToOriginalTab(),
170 callback));
jianlib76ea422016-08-11 02:30:57171}
172
dewittj44d11cb2016-06-17 23:30:28173const OfflinePageItem* OfflinePageUtils::GetOfflinePageFromWebContents(
174 content::WebContents* web_contents) {
175 OfflinePageTabHelper* tab_helper =
176 OfflinePageTabHelper::FromWebContents(web_contents);
jianlif68c52f2016-09-20 22:10:00177 if (!tab_helper)
178 return nullptr;
179 const OfflinePageItem* offline_page = tab_helper->offline_page();
180 if (!offline_page)
181 return nullptr;
Jian Li5de7b7e2017-12-15 22:28:24182 // TODO(jianli): Remove this when the UI knows how to handle untrusted
183 // offline pages.
184 if (!tab_helper->IsShowingTrustedOfflinePage())
185 return nullptr;
jianlif68c52f2016-09-20 22:10:00186
Jian Li5de7b7e2017-12-15 22:28:24187 // If a pending navigation that hasn't committed yet, don't return the cached
188 // offline page that was set at the last commit time. This is to prevent
jianlif68c52f2016-09-20 22:10:00189 // from returning the wrong offline page if DidStartNavigation is never called
190 // to clear it up.
Jian Li5de7b7e2017-12-15 22:28:24191 if (!EqualsIgnoringFragment(web_contents->GetVisibleURL(),
192 web_contents->GetLastCommittedURL())) {
193 return nullptr;
194 }
195
196 return offline_page;
jianlif68c52f2016-09-20 22:10:00197}
198
199// static
200const OfflinePageHeader* OfflinePageUtils::GetOfflineHeaderFromWebContents(
201 content::WebContents* web_contents) {
202 OfflinePageTabHelper* tab_helper =
203 OfflinePageTabHelper::FromWebContents(web_contents);
204 return tab_helper ? &(tab_helper->offline_header()) : nullptr;
dewittj44d11cb2016-06-17 23:30:28205}
206
fgorski7a82d312016-06-28 22:09:34207// static
ryansturmad9e85d32016-10-28 17:28:06208bool OfflinePageUtils::IsShowingOfflinePreview(
209 content::WebContents* web_contents) {
210 OfflinePageTabHelper* tab_helper =
211 OfflinePageTabHelper::FromWebContents(web_contents);
Ryan Sturm94938902017-12-09 23:53:53212 return tab_helper && tab_helper->GetOfflinePreviewItem();
ryansturmad9e85d32016-10-28 17:28:06213}
214
215// static
jianlib0fe12b2017-01-12 01:14:08216bool OfflinePageUtils::IsShowingDownloadButtonInErrorPage(
217 content::WebContents* web_contents) {
218 chrome_browser_net::NetErrorTabHelper* tab_helper =
219 chrome_browser_net::NetErrorTabHelper::FromWebContents(web_contents);
220 return tab_helper && tab_helper->is_showing_download_button_in_error_page();
221}
222
223// static
fgorski2684ff82016-10-07 17:49:40224bool OfflinePageUtils::EqualsIgnoringFragment(const GURL& lhs,
225 const GURL& rhs) {
226 GURL::Replacements remove_params;
227 remove_params.ClearRef();
228
229 GURL lhs_stripped = lhs.ReplaceComponents(remove_params);
fgorski1acc2c42016-11-29 01:15:01230 GURL rhs_stripped = rhs.ReplaceComponents(remove_params);
fgorski2684ff82016-10-07 17:49:40231
232 return lhs_stripped == rhs_stripped;
233}
234
dimich4f6b2802016-12-20 00:18:33235// static
jianli918265d2017-02-28 01:50:17236GURL OfflinePageUtils::GetOriginalURLFromWebContents(
237 content::WebContents* web_contents) {
238 content::NavigationEntry* entry =
239 web_contents->GetController().GetLastCommittedEntry();
240 if (!entry || entry->GetRedirectChain().size() <= 1)
241 return GURL();
242 return entry->GetRedirectChain().front();
243}
244
jianli81fd5882017-04-12 01:42:04245// static
246void OfflinePageUtils::CheckDuplicateDownloads(
247 content::BrowserContext* browser_context,
248 const GURL& url,
249 const DuplicateCheckCallback& callback) {
250 // First check for finished downloads, that is, saved pages.
251 OfflinePageModel* offline_page_model =
252 OfflinePageModelFactory::GetForBrowserContext(browser_context);
253 if (!offline_page_model)
254 return;
255
256 auto continuation = [](content::BrowserContext* browser_context,
257 const GURL& url,
258 const DuplicateCheckCallback& callback,
259 const std::vector<OfflinePageItem>& pages) {
260 base::Time latest_saved_time;
261 for (const auto& offline_page_item : pages) {
262 if (IsSupportedByDownload(browser_context,
263 offline_page_item.client_id.name_space) &&
264 latest_saved_time < offline_page_item.creation_time) {
265 latest_saved_time = offline_page_item.creation_time;
266 }
267 }
268 if (latest_saved_time.is_null()) {
269 // Then check for ongoing downloads, that is, requests.
270 CheckDuplicateOngoingDownloads(browser_context, url, callback);
271 } else {
272 // Using CUSTOM_COUNTS instead of time-oriented histogram to record
273 // samples in seconds rather than milliseconds.
274 UMA_HISTOGRAM_CUSTOM_COUNTS(
275 "OfflinePages.DownloadRequestTimeSinceDuplicateSaved",
276 (base::Time::Now() - latest_saved_time).InSeconds(),
277 base::TimeDelta::FromSeconds(1).InSeconds(),
278 base::TimeDelta::FromDays(7).InSeconds(), 50);
279
280 callback.Run(DuplicateCheckResult::DUPLICATE_PAGE_FOUND);
281 }
282 };
283
284 offline_page_model->GetPagesByURL(
Yafei Duan3caef8c2017-08-10 00:55:24285 url, URLSearchMode::SEARCH_BY_ALL_URLS,
jianli81fd5882017-04-12 01:42:04286 base::Bind(continuation, browser_context, url, callback));
287}
288
289// static
290void OfflinePageUtils::ScheduleDownload(content::WebContents* web_contents,
291 const std::string& name_space,
292 const GURL& url,
Cathy Li7187388f2017-08-09 15:34:51293 DownloadUIActionFlags ui_action,
294 const std::string& request_origin) {
jianli81fd5882017-04-12 01:42:04295 DCHECK(web_contents);
296
297 OfflinePageTabHelper* tab_helper =
298 OfflinePageTabHelper::FromWebContents(web_contents);
299 if (!tab_helper)
300 return;
Cathy Li7187388f2017-08-09 15:34:51301 tab_helper->ScheduleDownloadHelper(web_contents, name_space, url, ui_action,
302 request_origin);
303}
304
305// static
306void OfflinePageUtils::ScheduleDownload(content::WebContents* web_contents,
307 const std::string& name_space,
308 const GURL& url,
309 DownloadUIActionFlags ui_action) {
Cathy Li69ed5a82017-08-25 23:03:50310 std::string origin =
311 OfflinePageOriginUtils::GetEncodedOriginAppFor(web_contents);
312 ScheduleDownload(web_contents, name_space, url, ui_action, origin);
jianli81fd5882017-04-12 01:42:04313}
314
jianli030745882017-04-28 08:35:18315// static
316bool OfflinePageUtils::CanDownloadAsOfflinePage(
317 const GURL& url,
318 const std::string& contents_mime_type) {
319 return url.SchemeIsHTTPOrHTTPS() &&
320 (net::MatchesMimeType(contents_mime_type, "text/html") ||
321 net::MatchesMimeType(contents_mime_type, "application/xhtml+xml"));
322}
323
romax444db4b2017-05-16 23:52:38324// static
325bool OfflinePageUtils::GetCachedOfflinePageSizeBetween(
326 content::BrowserContext* browser_context,
327 const SizeInBytesCallback& callback,
328 const base::Time& begin_time,
329 const base::Time& end_time) {
330 OfflinePageModel* offline_page_model =
331 OfflinePageModelFactory::GetForBrowserContext(browser_context);
332 if (!offline_page_model || begin_time > end_time)
333 return false;
Filip Gorskifea9e532017-09-22 06:07:44334 offline_page_model->GetPagesRemovedOnCacheReset(
romax444db4b2017-05-16 23:52:38335 base::Bind(&DoCalculateSizeBetween, callback, begin_time, end_time));
336 return true;
337}
338
Jian Li63ceb212018-01-04 02:04:33339// static
340std::string OfflinePageUtils::ComputeDigest(const base::FilePath& file_path) {
341 base::File file(file_path, base::File::FLAG_OPEN | base::File::FLAG_READ);
342 if (!file.IsValid())
343 return std::string();
344
345 std::unique_ptr<crypto::SecureHash> secure_hash(
346 crypto::SecureHash::Create(crypto::SecureHash::SHA256));
347
348 const int kMaxBufferSize = 1024;
349 std::vector<char> buffer(kMaxBufferSize);
350 int bytes_read;
351 do {
352 bytes_read = file.ReadAtCurrentPos(buffer.data(), kMaxBufferSize);
353 if (bytes_read > 0)
354 secure_hash->Update(buffer.data(), bytes_read);
355 } while (bytes_read > 0);
356 if (bytes_read < 0)
357 return std::string();
358
359 std::string result_bytes(crypto::kSHA256Length, 0);
360 secure_hash->Finish(&(result_bytes[0]), result_bytes.size());
361 return result_bytes;
362}
363
364// static
365bool OfflinePageUtils::ValidateFile(const base::FilePath& file_path,
366 int64_t expected_file_size,
367 const std::string& expected_digest) {
368 int64_t actual_file_size;
369 if (!base::GetFileSize(file_path, &actual_file_size))
370 return false;
371 if (expected_file_size != actual_file_size)
372 return false;
373
374 std::string actual_digest = ComputeDigest(file_path);
375 return expected_digest == actual_digest;
376}
377
378// static
379std::string OfflinePageUtils::ExtractOfflineHeaderValueFromNavigationEntry(
380 const content::NavigationEntry& entry) {
381 std::string extra_headers = entry.GetExtraHeaders();
382 if (extra_headers.empty())
383 return std::string();
384
385 // The offline header will be the only extra header if it is present.
386 std::string offline_header_key(offline_pages::kOfflinePageHeader);
387 offline_header_key += ": ";
388 if (!base::StartsWith(extra_headers, offline_header_key,
389 base::CompareCase::INSENSITIVE_ASCII)) {
390 return std::string();
391 }
392 std::string header_value = extra_headers.substr(offline_header_key.length());
393 if (header_value.find("\n") != std::string::npos)
394 return std::string();
395
396 return header_value;
397}
398
fgorski1d4c9c92015-12-17 20:39:32399} // namespace offline_pages