blob: 5b0e6c0e2269fb598780c1c81fe23fe98d0709d4 [file] [log] [blame]
fgorski1d4c9c92015-12-17 20:39:321// Copyright 2015 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Dmitry Titov909e8d62017-07-19 01:39:565#include "chrome/browser/offline_pages/offline_page_utils.h"
fgorski1d4c9c92015-12-17 20:39:326
dewittjfd1614e52016-06-06 18:36:017#include "base/bind.h"
Jian Li63ceb212018-01-04 02:04:338#include "base/files/file.h"
9#include "base/files/file_path.h"
10#include "base/files/file_util.h"
dewittjfd1614e52016-06-06 18:36:0111#include "base/location.h"
jianli81fd5882017-04-12 01:42:0412#include "base/metrics/histogram_macros.h"
tripta.g3e097a92017-06-22 07:56:2613#include "base/stl_util.h"
bburns16d15332016-03-04 00:34:0914#include "base/strings/string_number_conversions.h"
fgorski1d4c9c92015-12-17 20:39:3215#include "base/strings/string_piece.h"
16#include "base/strings/string_util.h"
dewittjfd1614e52016-06-06 18:36:0117#include "base/threading/thread_task_runner_handle.h"
fgorskie8aae2c2016-11-23 23:45:3918#include "base/time/time.h"
Dmitry Titov909e8d62017-07-19 01:39:5619#include "build/build_config.h"
jianlib0fe12b2017-01-12 01:14:0820#include "chrome/browser/net/net_error_tab_helper.h"
Dmitry Titov909e8d62017-07-19 01:39:5621#include "chrome/browser/offline_pages/offline_page_mhtml_archiver.h"
Jian Lie632ebc82017-06-30 23:40:3222#include "chrome/browser/offline_pages/offline_page_model_factory.h"
Cathy Li69ed5a82017-08-25 23:03:5023#include "chrome/browser/offline_pages/offline_page_origin_utils.h"
24#include "chrome/browser/offline_pages/offline_page_tab_helper.h"
Dmitry Titov909e8d62017-07-19 01:39:5625#include "chrome/browser/offline_pages/request_coordinator_factory.h"
chiliecd23802016-12-07 10:27:1126#include "components/offline_pages/core/background/request_coordinator.h"
27#include "components/offline_pages/core/background/save_page_request.h"
28#include "components/offline_pages/core/client_namespace_constants.h"
29#include "components/offline_pages/core/client_policy_controller.h"
30#include "components/offline_pages/core/offline_page_feature.h"
31#include "components/offline_pages/core/offline_page_item.h"
32#include "components/offline_pages/core/offline_page_model.h"
33#include "components/offline_pages/core/request_header/offline_page_header.h"
fgorski1d4c9c92015-12-17 20:39:3234#include "content/public/browser/browser_context.h"
jianli918265d2017-02-28 01:50:1735#include "content/public/browser/navigation_entry.h"
dewittj44d11cb2016-06-17 23:30:2836#include "content/public/browser/web_contents.h"
Jian Li63ceb212018-01-04 02:04:3337#include "crypto/secure_hash.h"
38#include "crypto/sha2.h"
jianli030745882017-04-28 08:35:1839#include "net/base/mime_util.h"
fgorski1d4c9c92015-12-17 20:39:3240
41namespace offline_pages {
42namespace {
43
jianli610b1fd2016-11-17 23:17:5244void OnGetPagesByURLDone(
45 const GURL& url,
jianlib76ea422016-08-11 02:30:5746 int tab_id,
dewittj7be8aa02016-10-07 18:19:1447 const std::vector<std::string>& namespaces_to_show_in_original_tab,
jianlib76ea422016-08-11 02:30:5748 const base::Callback<void(const OfflinePageItem*)>& callback,
49 const MultipleOfflinePageItemResult& pages) {
jianli610b1fd2016-11-17 23:17:5250 const OfflinePageItem* selected_page_for_final_url = nullptr;
51 const OfflinePageItem* selected_page_for_original_url = nullptr;
jianlib76ea422016-08-11 02:30:5752 std::string tab_id_str = base::IntToString(tab_id);
dewittj7be8aa02016-10-07 18:19:1453
jianli610b1fd2016-11-17 23:17:5254 for (const auto& page : pages) {
tripta.g3e097a92017-06-22 07:56:2655 if (base::ContainsValue(namespaces_to_show_in_original_tab,
56 page.client_id.name_space) &&
jianli610b1fd2016-11-17 23:17:5257 page.client_id.id != tab_id_str) {
dewittj7be8aa02016-10-07 18:19:1458 continue;
59 }
60
jianli610b1fd2016-11-17 23:17:5261 if (OfflinePageUtils::EqualsIgnoringFragment(url, page.url)) {
62 if (!selected_page_for_final_url ||
63 page.creation_time > selected_page_for_final_url->creation_time) {
64 selected_page_for_final_url = &page;
65 }
66 } else {
67 // This is consistent with exact match against original url done in
Yafei Duanfe107f32018-01-02 23:38:1268 // GetPagesTask.
jianli610b1fd2016-11-17 23:17:5269 DCHECK(url == page.original_url);
70 if (!selected_page_for_original_url ||
71 page.creation_time > selected_page_for_original_url->creation_time) {
72 selected_page_for_original_url = &page;
73 }
jianlib76ea422016-08-11 02:30:5774 }
75 }
jianli610b1fd2016-11-17 23:17:5276
77 // Match for final URL should take high priority than matching for original
78 // URL.
79 callback.Run(selected_page_for_final_url ? selected_page_for_final_url
80 : selected_page_for_original_url);
jianlib76ea422016-08-11 02:30:5781}
82
jianli81fd5882017-04-12 01:42:0483bool IsSupportedByDownload(content::BrowserContext* browser_context,
84 const std::string& name_space) {
85 OfflinePageModel* offline_page_model =
86 OfflinePageModelFactory::GetForBrowserContext(browser_context);
87 DCHECK(offline_page_model);
88 ClientPolicyController* policy_controller =
89 offline_page_model->GetPolicyController();
90 DCHECK(policy_controller);
91 return policy_controller->IsSupportedByDownload(name_space);
92}
93
94void CheckDuplicateOngoingDownloads(
95 content::BrowserContext* browser_context,
96 const GURL& url,
97 const OfflinePageUtils::DuplicateCheckCallback& callback) {
98 RequestCoordinator* request_coordinator =
99 RequestCoordinatorFactory::GetForBrowserContext(browser_context);
100 if (!request_coordinator)
101 return;
102
103 auto request_coordinator_continuation =
104 [](content::BrowserContext* browser_context, const GURL& url,
105 const OfflinePageUtils::DuplicateCheckCallback& callback,
106 std::vector<std::unique_ptr<SavePageRequest>> requests) {
107 base::Time latest_request_time;
108 for (auto& request : requests) {
109 if (IsSupportedByDownload(browser_context,
110 request->client_id().name_space) &&
111 request->url() == url &&
112 latest_request_time < request->creation_time()) {
113 latest_request_time = request->creation_time();
114 }
115 }
116
117 if (latest_request_time.is_null()) {
118 callback.Run(OfflinePageUtils::DuplicateCheckResult::NOT_FOUND);
119 } else {
120 // Using CUSTOM_COUNTS instead of time-oriented histogram to record
121 // samples in seconds rather than milliseconds.
122 UMA_HISTOGRAM_CUSTOM_COUNTS(
123 "OfflinePages.DownloadRequestTimeSinceDuplicateRequested",
124 (base::Time::Now() - latest_request_time).InSeconds(),
125 base::TimeDelta::FromSeconds(1).InSeconds(),
126 base::TimeDelta::FromDays(7).InSeconds(), 50);
127
128 callback.Run(
129 OfflinePageUtils::DuplicateCheckResult::DUPLICATE_REQUEST_FOUND);
130 }
131 };
132
133 request_coordinator->GetAllRequests(base::Bind(
134 request_coordinator_continuation, browser_context, url, callback));
135}
136
romax444db4b2017-05-16 23:52:38137void DoCalculateSizeBetween(
138 const offline_pages::SizeInBytesCallback& callback,
139 const base::Time& begin_time,
140 const base::Time& end_time,
141 const offline_pages::MultipleOfflinePageItemResult& result) {
142 int64_t total_size = 0;
143 for (auto& page : result) {
144 if (begin_time <= page.creation_time && page.creation_time < end_time)
145 total_size += page.file_size;
146 }
147 callback.Run(total_size);
148}
149
fgorski1d4c9c92015-12-17 20:39:32150} // namespace
151
152// static
jianli610b1fd2016-11-17 23:17:52153void OfflinePageUtils::SelectPageForURL(
jianlib76ea422016-08-11 02:30:57154 content::BrowserContext* browser_context,
jianli610b1fd2016-11-17 23:17:52155 const GURL& url,
Yafei Duan3caef8c2017-08-10 00:55:24156 URLSearchMode url_search_mode,
jianlib76ea422016-08-11 02:30:57157 int tab_id,
158 const base::Callback<void(const OfflinePageItem*)>& callback) {
159 OfflinePageModel* offline_page_model =
160 OfflinePageModelFactory::GetForBrowserContext(browser_context);
161 if (!offline_page_model) {
162 base::ThreadTaskRunnerHandle::Get()->PostTask(
163 FROM_HERE, base::Bind(callback, nullptr));
164 return;
165 }
166
jianli610b1fd2016-11-17 23:17:52167 offline_page_model->GetPagesByURL(
168 url,
169 url_search_mode,
170 base::Bind(&OnGetPagesByURLDone, url, tab_id,
171 offline_page_model->GetPolicyController()
172 ->GetNamespacesRestrictedToOriginalTab(),
173 callback));
jianlib76ea422016-08-11 02:30:57174}
175
dewittj44d11cb2016-06-17 23:30:28176const OfflinePageItem* OfflinePageUtils::GetOfflinePageFromWebContents(
177 content::WebContents* web_contents) {
178 OfflinePageTabHelper* tab_helper =
179 OfflinePageTabHelper::FromWebContents(web_contents);
jianlif68c52f2016-09-20 22:10:00180 if (!tab_helper)
181 return nullptr;
182 const OfflinePageItem* offline_page = tab_helper->offline_page();
183 if (!offline_page)
184 return nullptr;
Jian Li5de7b7e2017-12-15 22:28:24185 // TODO(jianli): Remove this when the UI knows how to handle untrusted
186 // offline pages.
187 if (!tab_helper->IsShowingTrustedOfflinePage())
188 return nullptr;
jianlif68c52f2016-09-20 22:10:00189
Jian Li5de7b7e2017-12-15 22:28:24190 // If a pending navigation that hasn't committed yet, don't return the cached
191 // offline page that was set at the last commit time. This is to prevent
jianlif68c52f2016-09-20 22:10:00192 // from returning the wrong offline page if DidStartNavigation is never called
193 // to clear it up.
Jian Li5de7b7e2017-12-15 22:28:24194 if (!EqualsIgnoringFragment(web_contents->GetVisibleURL(),
195 web_contents->GetLastCommittedURL())) {
196 return nullptr;
197 }
198
199 return offline_page;
jianlif68c52f2016-09-20 22:10:00200}
201
202// static
203const OfflinePageHeader* OfflinePageUtils::GetOfflineHeaderFromWebContents(
204 content::WebContents* web_contents) {
205 OfflinePageTabHelper* tab_helper =
206 OfflinePageTabHelper::FromWebContents(web_contents);
207 return tab_helper ? &(tab_helper->offline_header()) : nullptr;
dewittj44d11cb2016-06-17 23:30:28208}
209
fgorski7a82d312016-06-28 22:09:34210// static
ryansturmad9e85d32016-10-28 17:28:06211bool OfflinePageUtils::IsShowingOfflinePreview(
212 content::WebContents* web_contents) {
213 OfflinePageTabHelper* tab_helper =
214 OfflinePageTabHelper::FromWebContents(web_contents);
Ryan Sturm94938902017-12-09 23:53:53215 return tab_helper && tab_helper->GetOfflinePreviewItem();
ryansturmad9e85d32016-10-28 17:28:06216}
217
218// static
jianlib0fe12b2017-01-12 01:14:08219bool OfflinePageUtils::IsShowingDownloadButtonInErrorPage(
220 content::WebContents* web_contents) {
221 chrome_browser_net::NetErrorTabHelper* tab_helper =
222 chrome_browser_net::NetErrorTabHelper::FromWebContents(web_contents);
223 return tab_helper && tab_helper->is_showing_download_button_in_error_page();
224}
225
226// static
fgorski2684ff82016-10-07 17:49:40227bool OfflinePageUtils::EqualsIgnoringFragment(const GURL& lhs,
228 const GURL& rhs) {
229 GURL::Replacements remove_params;
230 remove_params.ClearRef();
231
232 GURL lhs_stripped = lhs.ReplaceComponents(remove_params);
fgorski1acc2c42016-11-29 01:15:01233 GURL rhs_stripped = rhs.ReplaceComponents(remove_params);
fgorski2684ff82016-10-07 17:49:40234
235 return lhs_stripped == rhs_stripped;
236}
237
dimich4f6b2802016-12-20 00:18:33238// static
jianli918265d2017-02-28 01:50:17239GURL OfflinePageUtils::GetOriginalURLFromWebContents(
240 content::WebContents* web_contents) {
241 content::NavigationEntry* entry =
242 web_contents->GetController().GetLastCommittedEntry();
243 if (!entry || entry->GetRedirectChain().size() <= 1)
244 return GURL();
245 return entry->GetRedirectChain().front();
246}
247
jianli81fd5882017-04-12 01:42:04248// static
249void OfflinePageUtils::CheckDuplicateDownloads(
250 content::BrowserContext* browser_context,
251 const GURL& url,
252 const DuplicateCheckCallback& callback) {
253 // First check for finished downloads, that is, saved pages.
254 OfflinePageModel* offline_page_model =
255 OfflinePageModelFactory::GetForBrowserContext(browser_context);
256 if (!offline_page_model)
257 return;
258
259 auto continuation = [](content::BrowserContext* browser_context,
260 const GURL& url,
261 const DuplicateCheckCallback& callback,
262 const std::vector<OfflinePageItem>& pages) {
263 base::Time latest_saved_time;
264 for (const auto& offline_page_item : pages) {
265 if (IsSupportedByDownload(browser_context,
266 offline_page_item.client_id.name_space) &&
267 latest_saved_time < offline_page_item.creation_time) {
268 latest_saved_time = offline_page_item.creation_time;
269 }
270 }
271 if (latest_saved_time.is_null()) {
272 // Then check for ongoing downloads, that is, requests.
273 CheckDuplicateOngoingDownloads(browser_context, url, callback);
274 } else {
275 // Using CUSTOM_COUNTS instead of time-oriented histogram to record
276 // samples in seconds rather than milliseconds.
277 UMA_HISTOGRAM_CUSTOM_COUNTS(
278 "OfflinePages.DownloadRequestTimeSinceDuplicateSaved",
279 (base::Time::Now() - latest_saved_time).InSeconds(),
280 base::TimeDelta::FromSeconds(1).InSeconds(),
281 base::TimeDelta::FromDays(7).InSeconds(), 50);
282
283 callback.Run(DuplicateCheckResult::DUPLICATE_PAGE_FOUND);
284 }
285 };
286
287 offline_page_model->GetPagesByURL(
Yafei Duan3caef8c2017-08-10 00:55:24288 url, URLSearchMode::SEARCH_BY_ALL_URLS,
jianli81fd5882017-04-12 01:42:04289 base::Bind(continuation, browser_context, url, callback));
290}
291
292// static
293void OfflinePageUtils::ScheduleDownload(content::WebContents* web_contents,
294 const std::string& name_space,
295 const GURL& url,
Cathy Li7187388f2017-08-09 15:34:51296 DownloadUIActionFlags ui_action,
297 const std::string& request_origin) {
jianli81fd5882017-04-12 01:42:04298 DCHECK(web_contents);
299
300 OfflinePageTabHelper* tab_helper =
301 OfflinePageTabHelper::FromWebContents(web_contents);
302 if (!tab_helper)
303 return;
Cathy Li7187388f2017-08-09 15:34:51304 tab_helper->ScheduleDownloadHelper(web_contents, name_space, url, ui_action,
305 request_origin);
306}
307
308// static
309void OfflinePageUtils::ScheduleDownload(content::WebContents* web_contents,
310 const std::string& name_space,
311 const GURL& url,
312 DownloadUIActionFlags ui_action) {
Cathy Li69ed5a82017-08-25 23:03:50313 std::string origin =
314 OfflinePageOriginUtils::GetEncodedOriginAppFor(web_contents);
315 ScheduleDownload(web_contents, name_space, url, ui_action, origin);
jianli81fd5882017-04-12 01:42:04316}
317
jianli030745882017-04-28 08:35:18318// static
319bool OfflinePageUtils::CanDownloadAsOfflinePage(
320 const GURL& url,
321 const std::string& contents_mime_type) {
322 return url.SchemeIsHTTPOrHTTPS() &&
323 (net::MatchesMimeType(contents_mime_type, "text/html") ||
324 net::MatchesMimeType(contents_mime_type, "application/xhtml+xml"));
325}
326
romax444db4b2017-05-16 23:52:38327// static
328bool OfflinePageUtils::GetCachedOfflinePageSizeBetween(
329 content::BrowserContext* browser_context,
330 const SizeInBytesCallback& callback,
331 const base::Time& begin_time,
332 const base::Time& end_time) {
333 OfflinePageModel* offline_page_model =
334 OfflinePageModelFactory::GetForBrowserContext(browser_context);
335 if (!offline_page_model || begin_time > end_time)
336 return false;
Filip Gorskifea9e532017-09-22 06:07:44337 offline_page_model->GetPagesRemovedOnCacheReset(
romax444db4b2017-05-16 23:52:38338 base::Bind(&DoCalculateSizeBetween, callback, begin_time, end_time));
339 return true;
340}
341
Jian Li63ceb212018-01-04 02:04:33342// static
343std::string OfflinePageUtils::ComputeDigest(const base::FilePath& file_path) {
344 base::File file(file_path, base::File::FLAG_OPEN | base::File::FLAG_READ);
345 if (!file.IsValid())
346 return std::string();
347
348 std::unique_ptr<crypto::SecureHash> secure_hash(
349 crypto::SecureHash::Create(crypto::SecureHash::SHA256));
350
351 const int kMaxBufferSize = 1024;
352 std::vector<char> buffer(kMaxBufferSize);
353 int bytes_read;
354 do {
355 bytes_read = file.ReadAtCurrentPos(buffer.data(), kMaxBufferSize);
356 if (bytes_read > 0)
357 secure_hash->Update(buffer.data(), bytes_read);
358 } while (bytes_read > 0);
359 if (bytes_read < 0)
360 return std::string();
361
362 std::string result_bytes(crypto::kSHA256Length, 0);
363 secure_hash->Finish(&(result_bytes[0]), result_bytes.size());
364 return result_bytes;
365}
366
367// static
368bool OfflinePageUtils::ValidateFile(const base::FilePath& file_path,
369 int64_t expected_file_size,
370 const std::string& expected_digest) {
371 int64_t actual_file_size;
372 if (!base::GetFileSize(file_path, &actual_file_size))
373 return false;
374 if (expected_file_size != actual_file_size)
375 return false;
376
377 std::string actual_digest = ComputeDigest(file_path);
378 return expected_digest == actual_digest;
379}
380
381// static
382std::string OfflinePageUtils::ExtractOfflineHeaderValueFromNavigationEntry(
383 const content::NavigationEntry& entry) {
384 std::string extra_headers = entry.GetExtraHeaders();
385 if (extra_headers.empty())
386 return std::string();
387
388 // The offline header will be the only extra header if it is present.
389 std::string offline_header_key(offline_pages::kOfflinePageHeader);
390 offline_header_key += ": ";
391 if (!base::StartsWith(extra_headers, offline_header_key,
392 base::CompareCase::INSENSITIVE_ASCII)) {
393 return std::string();
394 }
395 std::string header_value = extra_headers.substr(offline_header_key.length());
396 if (header_value.find("\n") != std::string::npos)
397 return std::string();
398
399 return header_value;
400}
401
fgorski1d4c9c92015-12-17 20:39:32402} // namespace offline_pages