blob: 24bcdb79b65642e4f7404694ac4a38c9a60339a3 [file] [log] [blame]
[email protected]12a936d2013-05-15 04:55:491// Copyright (c) 2013 The Chromium Authors. All rights reserved.
license.botbf09a502008-08-24 00:55:552// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commitf5b16fe2008-07-27 00:20:514
[email protected]12a936d2013-05-15 04:55:495#include "content/renderer/savable_resources.h"
[email protected]528c56d2010-07-30 19:28:446
[email protected]52bf4652009-10-22 17:01:187#include <set>
8
[email protected]fa419692008-10-16 21:46:149#include "base/compiler_specific.h"
[email protected]38789d82010-11-17 06:03:4410#include "base/logging.h"
[email protected]21aa99682013-06-11 07:17:0111#include "base/strings/string_util.h"
jame0dcd982017-01-11 03:13:4512#include "content/public/common/url_utils.h"
lpze83861a2017-05-25 14:14:1913#include "content/renderer/render_frame_impl.h"
[email protected]5c30b5e02013-05-30 03:46:0814#include "third_party/WebKit/public/platform/WebString.h"
15#include "third_party/WebKit/public/platform/WebVector.h"
[email protected]2255a9332013-06-17 05:12:3116#include "third_party/WebKit/public/web/WebDocument.h"
17#include "third_party/WebKit/public/web/WebElement.h"
[email protected]c93932712014-02-07 18:49:0218#include "third_party/WebKit/public/web/WebElementCollection.h"
[email protected]2255a9332013-06-17 05:12:3119#include "third_party/WebKit/public/web/WebInputElement.h"
[email protected]d3576942014-04-10 18:45:3720#include "third_party/WebKit/public/web/WebLocalFrame.h"
[email protected]2255a9332013-06-17 05:12:3121#include "third_party/WebKit/public/web/WebNode.h"
[email protected]2255a9332013-06-17 05:12:3122#include "third_party/WebKit/public/web/WebView.h"
initial.commitf5b16fe2008-07-27 00:20:5123
[email protected]180ef242013-11-07 06:50:4624using blink::WebDocument;
25using blink::WebElement;
[email protected]c93932712014-02-07 18:49:0226using blink::WebElementCollection;
[email protected]180ef242013-11-07 06:50:4627using blink::WebFrame;
28using blink::WebInputElement;
[email protected]d3576942014-04-10 18:45:3729using blink::WebLocalFrame;
[email protected]180ef242013-11-07 06:50:4630using blink::WebNode;
[email protected]180ef242013-11-07 06:50:4631using blink::WebString;
32using blink::WebVector;
33using blink::WebView;
initial.commitf5b16fe2008-07-27 00:20:5134
[email protected]12a936d2013-05-15 04:55:4935namespace content {
initial.commitf5b16fe2008-07-27 00:20:5136namespace {
37
lukaszac5aebc962015-11-30 18:22:5738// Returns |true| if |web_frame| contains (or should be assumed to contain)
39// a html document.
lukaszabedb4b22017-06-23 00:00:1340bool DoesFrameContainHtmlDocument(WebFrame* web_frame,
lukaszac5aebc962015-11-30 18:22:5741 const WebElement& element) {
lukaszabedb4b22017-06-23 00:00:1342 if (web_frame->IsWebLocalFrame()) {
43 WebDocument doc = web_frame->ToWebLocalFrame()->GetDocument();
Blink Reformat1c4d759e2017-04-09 16:34:5444 return doc.IsHTMLDocument() || doc.IsXHTMLDocument();
lukaszac5aebc962015-11-30 18:22:5745 }
46
47 // Cannot inspect contents of a remote frame, so we use a heuristic:
48 // Assume that <iframe> and <frame> elements contain a html document,
49 // and other elements (i.e. <object>) contain plugins or other resources.
50 // If the heuristic is wrong (i.e. the remote frame in <object> does
51 // contain an html document), then things will still work, but with the
52 // following caveats: 1) original frame content will be saved and 2) links
53 // in frame's html doc will not be rewritten to point to locally saved
54 // files.
Blink Reformat1c4d759e2017-04-09 16:34:5455 return element.HasHTMLTagName("iframe") || element.HasHTMLTagName("frame");
lukaszac5aebc962015-11-30 18:22:5756}
57
58// If present and valid, then push the link associated with |element|
59// into either SavableResourcesResult::subframes or
60// SavableResourcesResult::resources_list.
[email protected]d9ec5c0f2009-12-23 11:55:0761void GetSavableResourceLinkForElement(
62 const WebElement& element,
63 const WebDocument& current_doc,
[email protected]12a936d2013-05-15 04:55:4964 SavableResourcesResult* result) {
initial.commitf5b16fe2008-07-27 00:20:5165 // Get absolute URL.
lukasza222a04022016-02-03 22:29:5366 WebString link_attribute_value = GetSubResourceLinkFromElement(element);
Blink Reformat1c4d759e2017-04-09 16:34:5467 GURL element_url = current_doc.CompleteURL(link_attribute_value);
lukaszac5aebc962015-11-30 18:22:5768
69 // See whether to report this element as a subframe.
Blink Reformat1c4d759e2017-04-09 16:34:5470 WebFrame* web_frame = WebFrame::FromFrameOwnerElement(element);
lukaszabedb4b22017-06-23 00:00:1371 if (web_frame && DoesFrameContainHtmlDocument(web_frame, element)) {
lukaszac5aebc962015-11-30 18:22:5772 SavableSubframe subframe;
73 subframe.original_url = element_url;
lpze83861a2017-05-25 14:14:1974 subframe.routing_id = RenderFrame::GetRoutingIdForWebFrame(web_frame);
lukaszac5aebc962015-11-30 18:22:5775 result->subframes->push_back(subframe);
initial.commitf5b16fe2008-07-27 00:20:5176 return;
lukaszac5aebc962015-11-30 18:22:5777 }
78
lukasza222a04022016-02-03 22:29:5379 // Check whether the node has sub resource URL or not.
Blink Reformat1c4d759e2017-04-09 16:34:5480 if (link_attribute_value.IsNull())
lukasza222a04022016-02-03 22:29:5381 return;
82
lukaszac5aebc962015-11-30 18:22:5783 // Ignore invalid URL.
84 if (!element_url.is_valid())
85 return;
86
initial.commitf5b16fe2008-07-27 00:20:5187 // Ignore those URLs which are not standard protocols. Because FTP
88 // protocol does no have cache mechanism, we will skip all
89 // sub-resources if they use FTP protocol.
lukaszac5aebc962015-11-30 18:22:5790 if (!element_url.SchemeIsHTTPOrHTTPS() &&
91 !element_url.SchemeIs(url::kFileScheme))
initial.commitf5b16fe2008-07-27 00:20:5192 return;
lukaszaf05b1f752015-10-30 00:00:0793
lukaszac5aebc962015-11-30 18:22:5794 result->resources_list->push_back(element_url);
initial.commitf5b16fe2008-07-27 00:20:5195}
96
lukasza6af746b72015-09-18 23:37:2297} // namespace
98
lukaszabedb4b22017-06-23 00:00:1399bool GetSavableResourceLinksForFrame(WebLocalFrame* current_frame,
jame0dcd982017-01-11 03:13:45100 SavableResourcesResult* result) {
initial.commitf5b16fe2008-07-27 00:20:51101 // Get current frame's URL.
Blink Reformat1c4d759e2017-04-09 16:34:54102 GURL current_frame_url = current_frame->GetDocument().Url();
initial.commitf5b16fe2008-07-27 00:20:51103
[email protected]dbeb3952009-10-13 18:01:18104 // If url of current frame is invalid, ignore it.
[email protected]d9ec5c0f2009-12-23 11:55:07105 if (!current_frame_url.is_valid())
lukasza6af746b72015-09-18 23:37:22106 return false;
[email protected]dbeb3952009-10-13 18:01:18107
108 // If url of current frame is not a savable protocol, ignore it.
jame0dcd982017-01-11 03:13:45109 if (!IsSavableURL(current_frame_url))
lukasza6af746b72015-09-18 23:37:22110 return false;
initial.commitf5b16fe2008-07-27 00:20:51111
112 // Get current using document.
Blink Reformat1c4d759e2017-04-09 16:34:54113 WebDocument current_doc = current_frame->GetDocument();
initial.commitf5b16fe2008-07-27 00:20:51114 // Go through all descent nodes.
Blink Reformat1c4d759e2017-04-09 16:34:54115 WebElementCollection all = current_doc.All();
[email protected]c93932712014-02-07 18:49:02116 // Go through all elements in this frame.
Blink Reformat1c4d759e2017-04-09 16:34:54117 for (WebElement element = all.FirstItem(); !element.IsNull();
118 element = all.NextItem()) {
initial.commitf5b16fe2008-07-27 00:20:51119 GetSavableResourceLinkForElement(element,
120 current_doc,
initial.commitf5b16fe2008-07-27 00:20:51121 result);
122 }
initial.commitf5b16fe2008-07-27 00:20:51123
lukasza6af746b72015-09-18 23:37:22124 return true;
125}
initial.commitf5b16fe2008-07-27 00:20:51126
[email protected]d9ec5c0f2009-12-23 11:55:07127WebString GetSubResourceLinkFromElement(const WebElement& element) {
128 const char* attribute_name = NULL;
Blink Reformat1c4d759e2017-04-09 16:34:54129 if (element.HasHTMLTagName("img") || element.HasHTMLTagName("frame") ||
130 element.HasHTMLTagName("iframe") || element.HasHTMLTagName("script")) {
[email protected]d9ec5c0f2009-12-23 11:55:07131 attribute_name = "src";
Blink Reformat1c4d759e2017-04-09 16:34:54132 } else if (element.HasHTMLTagName("input")) {
133 const WebInputElement input = element.ToConst<WebInputElement>();
134 if (input.IsImageButton()) {
[email protected]d9ec5c0f2009-12-23 11:55:07135 attribute_name = "src";
initial.commitf5b16fe2008-07-27 00:20:51136 }
Blink Reformat1c4d759e2017-04-09 16:34:54137 } else if (element.HasHTMLTagName("body") ||
138 element.HasHTMLTagName("table") || element.HasHTMLTagName("tr") ||
139 element.HasHTMLTagName("td")) {
[email protected]d9ec5c0f2009-12-23 11:55:07140 attribute_name = "background";
Blink Reformat1c4d759e2017-04-09 16:34:54141 } else if (element.HasHTMLTagName("blockquote") ||
142 element.HasHTMLTagName("q") || element.HasHTMLTagName("del") ||
143 element.HasHTMLTagName("ins")) {
[email protected]d9ec5c0f2009-12-23 11:55:07144 attribute_name = "cite";
Blink Reformat1c4d759e2017-04-09 16:34:54145 } else if (element.HasHTMLTagName("object")) {
lukasza96fd433212015-11-30 02:45:35146 attribute_name = "data";
Blink Reformat1c4d759e2017-04-09 16:34:54147 } else if (element.HasHTMLTagName("link")) {
initial.commitf5b16fe2008-07-27 00:20:51148 // If the link element is not linked to css, ignore it.
Blink Reformat1c4d759e2017-04-09 16:34:54149 WebString type = element.GetAttribute("type");
150 WebString rel = element.GetAttribute("rel");
151 if ((type.ContainsOnlyASCII() &&
152 base::LowerCaseEqualsASCII(type.Ascii(), "text/css")) ||
153 (rel.ContainsOnlyASCII() &&
154 base::LowerCaseEqualsASCII(rel.Ascii(), "stylesheet"))) {
[email protected]7f3281452010-02-24 21:27:02155 // TODO(jnd): Add support for extracting links of sub-resources which
initial.commitf5b16fe2008-07-27 00:20:51156 // are inside style-sheet such as @import, url(), etc.
157 // See bug: http://b/issue?id=1111667.
[email protected]d9ec5c0f2009-12-23 11:55:07158 attribute_name = "href";
initial.commitf5b16fe2008-07-27 00:20:51159 }
initial.commitf5b16fe2008-07-27 00:20:51160 }
161 if (!attribute_name)
[email protected]d9ec5c0f2009-12-23 11:55:07162 return WebString();
Blink Reformat1c4d759e2017-04-09 16:34:54163 WebString value = element.GetAttribute(WebString::FromUTF8(attribute_name));
initial.commitf5b16fe2008-07-27 00:20:51164 // If value has content and not start with "javascript:" then return it,
165 // otherwise return NULL.
Blink Reformat1c4d759e2017-04-09 16:34:54166 if (!value.IsNull() && !value.IsEmpty() &&
167 !base::StartsWith(value.Utf8(),
168 "javascript:", base::CompareCase::INSENSITIVE_ASCII))
initial.commitf5b16fe2008-07-27 00:20:51169 return value;
170
[email protected]d9ec5c0f2009-12-23 11:55:07171 return WebString();
initial.commitf5b16fe2008-07-27 00:20:51172}
173
[email protected]12a936d2013-05-15 04:55:49174} // namespace content