[email protected] | 12a936d | 2013-05-15 04:55:49 | [diff] [blame] | 1 | // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
license.bot | bf09a50 | 2008-08-24 00:55:55 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 4 | |
[email protected] | 12a936d | 2013-05-15 04:55:49 | [diff] [blame] | 5 | #include "content/renderer/savable_resources.h" |
[email protected] | 528c56d | 2010-07-30 19:28:44 | [diff] [blame] | 6 | |
[email protected] | 52bf465 | 2009-10-22 17:01:18 | [diff] [blame] | 7 | #include <set> |
| 8 | |
[email protected] | fa41969 | 2008-10-16 21:46:14 | [diff] [blame] | 9 | #include "base/compiler_specific.h" |
[email protected] | 38789d8 | 2010-11-17 06:03:44 | [diff] [blame] | 10 | #include "base/logging.h" |
[email protected] | 21aa9968 | 2013-06-11 07:17:01 | [diff] [blame] | 11 | #include "base/strings/string_util.h" |
jam | e0dcd98 | 2017-01-11 03:13:45 | [diff] [blame] | 12 | #include "content/public/common/url_utils.h" |
lpz | e83861a | 2017-05-25 14:14:19 | [diff] [blame] | 13 | #include "content/renderer/render_frame_impl.h" |
[email protected] | 5c30b5e0 | 2013-05-30 03:46:08 | [diff] [blame] | 14 | #include "third_party/WebKit/public/platform/WebString.h" |
| 15 | #include "third_party/WebKit/public/platform/WebVector.h" |
[email protected] | 2255a933 | 2013-06-17 05:12:31 | [diff] [blame] | 16 | #include "third_party/WebKit/public/web/WebDocument.h" |
| 17 | #include "third_party/WebKit/public/web/WebElement.h" |
[email protected] | c9393271 | 2014-02-07 18:49:02 | [diff] [blame] | 18 | #include "third_party/WebKit/public/web/WebElementCollection.h" |
[email protected] | 2255a933 | 2013-06-17 05:12:31 | [diff] [blame] | 19 | #include "third_party/WebKit/public/web/WebInputElement.h" |
[email protected] | d357694 | 2014-04-10 18:45:37 | [diff] [blame] | 20 | #include "third_party/WebKit/public/web/WebLocalFrame.h" |
[email protected] | 2255a933 | 2013-06-17 05:12:31 | [diff] [blame] | 21 | #include "third_party/WebKit/public/web/WebNode.h" |
[email protected] | 2255a933 | 2013-06-17 05:12:31 | [diff] [blame] | 22 | #include "third_party/WebKit/public/web/WebView.h" |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 23 | |
[email protected] | 180ef24 | 2013-11-07 06:50:46 | [diff] [blame] | 24 | using blink::WebDocument; |
| 25 | using blink::WebElement; |
[email protected] | c9393271 | 2014-02-07 18:49:02 | [diff] [blame] | 26 | using blink::WebElementCollection; |
[email protected] | 180ef24 | 2013-11-07 06:50:46 | [diff] [blame] | 27 | using blink::WebFrame; |
| 28 | using blink::WebInputElement; |
[email protected] | d357694 | 2014-04-10 18:45:37 | [diff] [blame] | 29 | using blink::WebLocalFrame; |
[email protected] | 180ef24 | 2013-11-07 06:50:46 | [diff] [blame] | 30 | using blink::WebNode; |
[email protected] | 180ef24 | 2013-11-07 06:50:46 | [diff] [blame] | 31 | using blink::WebString; |
| 32 | using blink::WebVector; |
| 33 | using blink::WebView; |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 34 | |
[email protected] | 12a936d | 2013-05-15 04:55:49 | [diff] [blame] | 35 | namespace content { |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 36 | namespace { |
| 37 | |
lukasza | c5aebc96 | 2015-11-30 18:22:57 | [diff] [blame] | 38 | // Returns |true| if |web_frame| contains (or should be assumed to contain) |
| 39 | // a html document. |
lukasza | bedb4b2 | 2017-06-23 00:00:13 | [diff] [blame] | 40 | bool DoesFrameContainHtmlDocument(WebFrame* web_frame, |
lukasza | c5aebc96 | 2015-11-30 18:22:57 | [diff] [blame] | 41 | const WebElement& element) { |
lukasza | bedb4b2 | 2017-06-23 00:00:13 | [diff] [blame] | 42 | if (web_frame->IsWebLocalFrame()) { |
| 43 | WebDocument doc = web_frame->ToWebLocalFrame()->GetDocument(); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 44 | return doc.IsHTMLDocument() || doc.IsXHTMLDocument(); |
lukasza | c5aebc96 | 2015-11-30 18:22:57 | [diff] [blame] | 45 | } |
| 46 | |
| 47 | // Cannot inspect contents of a remote frame, so we use a heuristic: |
| 48 | // Assume that <iframe> and <frame> elements contain a html document, |
| 49 | // and other elements (i.e. <object>) contain plugins or other resources. |
| 50 | // If the heuristic is wrong (i.e. the remote frame in <object> does |
| 51 | // contain an html document), then things will still work, but with the |
| 52 | // following caveats: 1) original frame content will be saved and 2) links |
| 53 | // in frame's html doc will not be rewritten to point to locally saved |
| 54 | // files. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 55 | return element.HasHTMLTagName("iframe") || element.HasHTMLTagName("frame"); |
lukasza | c5aebc96 | 2015-11-30 18:22:57 | [diff] [blame] | 56 | } |
| 57 | |
| 58 | // If present and valid, then push the link associated with |element| |
| 59 | // into either SavableResourcesResult::subframes or |
| 60 | // SavableResourcesResult::resources_list. |
[email protected] | d9ec5c0f | 2009-12-23 11:55:07 | [diff] [blame] | 61 | void GetSavableResourceLinkForElement( |
| 62 | const WebElement& element, |
| 63 | const WebDocument& current_doc, |
[email protected] | 12a936d | 2013-05-15 04:55:49 | [diff] [blame] | 64 | SavableResourcesResult* result) { |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 65 | // Get absolute URL. |
lukasza | 222a0402 | 2016-02-03 22:29:53 | [diff] [blame] | 66 | WebString link_attribute_value = GetSubResourceLinkFromElement(element); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 67 | GURL element_url = current_doc.CompleteURL(link_attribute_value); |
lukasza | c5aebc96 | 2015-11-30 18:22:57 | [diff] [blame] | 68 | |
| 69 | // See whether to report this element as a subframe. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 70 | WebFrame* web_frame = WebFrame::FromFrameOwnerElement(element); |
lukasza | bedb4b2 | 2017-06-23 00:00:13 | [diff] [blame] | 71 | if (web_frame && DoesFrameContainHtmlDocument(web_frame, element)) { |
lukasza | c5aebc96 | 2015-11-30 18:22:57 | [diff] [blame] | 72 | SavableSubframe subframe; |
| 73 | subframe.original_url = element_url; |
lpz | e83861a | 2017-05-25 14:14:19 | [diff] [blame] | 74 | subframe.routing_id = RenderFrame::GetRoutingIdForWebFrame(web_frame); |
lukasza | c5aebc96 | 2015-11-30 18:22:57 | [diff] [blame] | 75 | result->subframes->push_back(subframe); |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 76 | return; |
lukasza | c5aebc96 | 2015-11-30 18:22:57 | [diff] [blame] | 77 | } |
| 78 | |
lukasza | 222a0402 | 2016-02-03 22:29:53 | [diff] [blame] | 79 | // Check whether the node has sub resource URL or not. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 80 | if (link_attribute_value.IsNull()) |
lukasza | 222a0402 | 2016-02-03 22:29:53 | [diff] [blame] | 81 | return; |
| 82 | |
lukasza | c5aebc96 | 2015-11-30 18:22:57 | [diff] [blame] | 83 | // Ignore invalid URL. |
| 84 | if (!element_url.is_valid()) |
| 85 | return; |
| 86 | |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 87 | // Ignore those URLs which are not standard protocols. Because FTP |
| 88 | // protocol does no have cache mechanism, we will skip all |
| 89 | // sub-resources if they use FTP protocol. |
lukasza | c5aebc96 | 2015-11-30 18:22:57 | [diff] [blame] | 90 | if (!element_url.SchemeIsHTTPOrHTTPS() && |
| 91 | !element_url.SchemeIs(url::kFileScheme)) |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 92 | return; |
lukasza | f05b1f75 | 2015-10-30 00:00:07 | [diff] [blame] | 93 | |
lukasza | c5aebc96 | 2015-11-30 18:22:57 | [diff] [blame] | 94 | result->resources_list->push_back(element_url); |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 95 | } |
| 96 | |
lukasza | 6af746b7 | 2015-09-18 23:37:22 | [diff] [blame] | 97 | } // namespace |
| 98 | |
lukasza | bedb4b2 | 2017-06-23 00:00:13 | [diff] [blame] | 99 | bool GetSavableResourceLinksForFrame(WebLocalFrame* current_frame, |
jam | e0dcd98 | 2017-01-11 03:13:45 | [diff] [blame] | 100 | SavableResourcesResult* result) { |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 101 | // Get current frame's URL. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 102 | GURL current_frame_url = current_frame->GetDocument().Url(); |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 103 | |
[email protected] | dbeb395 | 2009-10-13 18:01:18 | [diff] [blame] | 104 | // If url of current frame is invalid, ignore it. |
[email protected] | d9ec5c0f | 2009-12-23 11:55:07 | [diff] [blame] | 105 | if (!current_frame_url.is_valid()) |
lukasza | 6af746b7 | 2015-09-18 23:37:22 | [diff] [blame] | 106 | return false; |
[email protected] | dbeb395 | 2009-10-13 18:01:18 | [diff] [blame] | 107 | |
| 108 | // If url of current frame is not a savable protocol, ignore it. |
jam | e0dcd98 | 2017-01-11 03:13:45 | [diff] [blame] | 109 | if (!IsSavableURL(current_frame_url)) |
lukasza | 6af746b7 | 2015-09-18 23:37:22 | [diff] [blame] | 110 | return false; |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 111 | |
| 112 | // Get current using document. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 113 | WebDocument current_doc = current_frame->GetDocument(); |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 114 | // Go through all descent nodes. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 115 | WebElementCollection all = current_doc.All(); |
[email protected] | c9393271 | 2014-02-07 18:49:02 | [diff] [blame] | 116 | // Go through all elements in this frame. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 117 | for (WebElement element = all.FirstItem(); !element.IsNull(); |
| 118 | element = all.NextItem()) { |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 119 | GetSavableResourceLinkForElement(element, |
| 120 | current_doc, |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 121 | result); |
| 122 | } |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 123 | |
lukasza | 6af746b7 | 2015-09-18 23:37:22 | [diff] [blame] | 124 | return true; |
| 125 | } |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 126 | |
[email protected] | d9ec5c0f | 2009-12-23 11:55:07 | [diff] [blame] | 127 | WebString GetSubResourceLinkFromElement(const WebElement& element) { |
| 128 | const char* attribute_name = NULL; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 129 | if (element.HasHTMLTagName("img") || element.HasHTMLTagName("frame") || |
| 130 | element.HasHTMLTagName("iframe") || element.HasHTMLTagName("script")) { |
[email protected] | d9ec5c0f | 2009-12-23 11:55:07 | [diff] [blame] | 131 | attribute_name = "src"; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 132 | } else if (element.HasHTMLTagName("input")) { |
| 133 | const WebInputElement input = element.ToConst<WebInputElement>(); |
| 134 | if (input.IsImageButton()) { |
[email protected] | d9ec5c0f | 2009-12-23 11:55:07 | [diff] [blame] | 135 | attribute_name = "src"; |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 136 | } |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 137 | } else if (element.HasHTMLTagName("body") || |
| 138 | element.HasHTMLTagName("table") || element.HasHTMLTagName("tr") || |
| 139 | element.HasHTMLTagName("td")) { |
[email protected] | d9ec5c0f | 2009-12-23 11:55:07 | [diff] [blame] | 140 | attribute_name = "background"; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 141 | } else if (element.HasHTMLTagName("blockquote") || |
| 142 | element.HasHTMLTagName("q") || element.HasHTMLTagName("del") || |
| 143 | element.HasHTMLTagName("ins")) { |
[email protected] | d9ec5c0f | 2009-12-23 11:55:07 | [diff] [blame] | 144 | attribute_name = "cite"; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 145 | } else if (element.HasHTMLTagName("object")) { |
lukasza | 96fd43321 | 2015-11-30 02:45:35 | [diff] [blame] | 146 | attribute_name = "data"; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 147 | } else if (element.HasHTMLTagName("link")) { |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 148 | // If the link element is not linked to css, ignore it. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 149 | WebString type = element.GetAttribute("type"); |
| 150 | WebString rel = element.GetAttribute("rel"); |
| 151 | if ((type.ContainsOnlyASCII() && |
| 152 | base::LowerCaseEqualsASCII(type.Ascii(), "text/css")) || |
| 153 | (rel.ContainsOnlyASCII() && |
| 154 | base::LowerCaseEqualsASCII(rel.Ascii(), "stylesheet"))) { |
[email protected] | 7f328145 | 2010-02-24 21:27:02 | [diff] [blame] | 155 | // TODO(jnd): Add support for extracting links of sub-resources which |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 156 | // are inside style-sheet such as @import, url(), etc. |
| 157 | // See bug: http://b/issue?id=1111667. |
[email protected] | d9ec5c0f | 2009-12-23 11:55:07 | [diff] [blame] | 158 | attribute_name = "href"; |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 159 | } |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 160 | } |
| 161 | if (!attribute_name) |
[email protected] | d9ec5c0f | 2009-12-23 11:55:07 | [diff] [blame] | 162 | return WebString(); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 163 | WebString value = element.GetAttribute(WebString::FromUTF8(attribute_name)); |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 164 | // If value has content and not start with "javascript:" then return it, |
| 165 | // otherwise return NULL. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 166 | if (!value.IsNull() && !value.IsEmpty() && |
| 167 | !base::StartsWith(value.Utf8(), |
| 168 | "javascript:", base::CompareCase::INSENSITIVE_ASCII)) |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 169 | return value; |
| 170 | |
[email protected] | d9ec5c0f | 2009-12-23 11:55:07 | [diff] [blame] | 171 | return WebString(); |
initial.commit | f5b16fe | 2008-07-27 00:20:51 | [diff] [blame] | 172 | } |
| 173 | |
[email protected] | 12a936d | 2013-05-15 04:55:49 | [diff] [blame] | 174 | } // namespace content |