Skip to content
This repository was archived by the owner on Apr 10, 2025. It is now read-only.

Commit db23c07

Browse files
keesspoelstrajeffkaufman
authored andcommitted
Strip subresource hints
Default behaviour is to strip subresource links which are in scope for pagespeed, these are the resources that are not disallowed or are valid domains in the domain laywer. Added can_modify_url flag to HtmlParse and CanModifyUrl function to the HtmlFilter which indicates whether urls can be rewritten by the parser and thus should be removed. This is tested now in the strip_subresource_hints_filter_test.cc as this is only used by the strip subresource hints feature right now. This should be moved to the HtmlParse tests. DetermineEnabledFilters has been rolled up into DetermineFiltersBehaviour, which also determines can_modify_url for all the filters and possible future "behaviors". Added new option to explicitly prevent the default behaviour: ModPreserveSubresourceHints on/off For ubuntu a check for the new setup /var/www/html instead of /var/www for the document root has been added. Fixes Issue #973. (Squash of 54983f4 and b651c78.)
1 parent a79fb28 commit db23c07

28 files changed

+655
-39
lines changed

install/debug.conf.template

+24
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,29 @@ ModPagespeedLoadFromFile "http://@@APACHE_DOMAIN@@/mod_pagespeed_test/ipro/insta
238238
Header append 'Cache-Control' 'no-transform'
239239
</Directory>
240240

241+
<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/strip_subresource_hints/default" >
242+
ModPagespeedRewriteLevel CoreFilters
243+
ModPagespeedDisableFilters add_instrumentation
244+
ModPagespeedDisallow *dontrewriteme*
245+
</Directory>
246+
247+
<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/strip_subresource_hints/preserve_on/" >
248+
ModPagespeedPreserveSubresourceHints on
249+
ModPagespeedRewriteLevel CoreFilters
250+
ModPagespeedDisableFilters add_instrumentation
251+
</Directory>
252+
253+
<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/strip_subresource_hints/preserve_off/" >
254+
ModPagespeedPreserveSubresourceHints off
255+
ModPagespeedRewriteLevel CoreFilters
256+
ModPagespeedDisableFilters add_instrumentation
257+
</Directory>
258+
259+
<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/strip_subresource_hints/default_passthrough/" >
260+
ModPagespeedRewriteLevel PassThrough
261+
ModPagespeedDisableFilters add_instrumentation
262+
</Directory>
263+
241264
# This Directory does not even exist, but by setting some options in that
242265
# scope we test to make sure the options we claim are really settable in
243266
# .htaccess. Note that <Directory> and .htaccess are enforced the same way.
@@ -1912,6 +1935,7 @@ Listen 127.0.0.2:@@APACHE_TERTIARY_PORT@@
19121935
#ALL_DIRECTIVES ModPagespeedNumExpensiveRewriteThreads 2
19131936
#ALL_DIRECTIVES ModPagespeedNumRewriteThreads 4
19141937
#ALL_DIRECTIVES ModPagespeedOptionCookiesDurationMs 12345
1938+
#ALL_DIRECTIVES ModPagespeedPreserveSubresourceHints on
19151939
#ALL_DIRECTIVES ModPagespeedPreserveUrlRelativity on
19161940
#ALL_DIRECTIVES ModPagespeedProgressiveJpegMinBytes 1000
19171941
#ALL_DIRECTIVES ModPagespeedRateLimitBackgroundFetches true
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<html>
2+
<head><link rel="subresource" src="dontrewriteme_resource.jpg"/></head>
3+
<body>
4+
</body>
5+
</html>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<html>
2+
<head><link rel="subresource" src="test"/></head>
3+
<body>
4+
</body>
5+
</html>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<html>
2+
<head>
3+
<link rel="subresource" src="test1"/>
4+
<link rel="subresource" src="test2"/>
5+
</head>
6+
<body>
7+
</body>
8+
</html>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<html>
2+
<head><link rel="subresource" src="test"/></head>
3+
<body>
4+
</body>
5+
</html>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<html>
2+
<head><link rel="subresource" src="test"/></head>
3+
<body>
4+
</body>
5+
</html>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<html>
2+
<head><link rel="subresource" src="test"/></head>
3+
<body>
4+
</body>
5+
</html>

install/setup_test_machine.sh

+1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ fi
6666
# This sequence is described in /usr/share/doc/apache2.2-common/README.Debian.gz
6767
sudo a2ensite default-ssl
6868
sudo a2enmod ssl
69+
sudo a2enmod headers
6970
sudo make-ssl-cert generate-default-snakeoil --force-overwrite
7071

7172
# TODO(jefftk): We don't restart the test servers often enough for this to be

install/ubuntu.sh

+8
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,19 @@
22

33
echo make $*
44

5+
APACHE_DOC_ROOT=/var/www
6+
# Test for new ubuntu setups where doc/root is in /var/www/html. We could run
7+
# into a false positive here, but probably not for build systems.
8+
if [ -e /var/www/html ]; then
9+
APACHE_DOC_ROOT=/var/www/html
10+
fi
11+
512
exec make \
613
APACHE_CONTROL_PROGRAM=/etc/init.d/apache2 \
714
APACHE_LOG=/var/log/apache2/error.log \
815
APACHE_MODULES=/usr/lib/apache2/modules \
916
APACHE_CONF_FILE=/etc/apache2/apache2.conf \
17+
APACHE_DOC_ROOT=$APACHE_DOC_ROOT \
1018
APACHE_PIDFILE=/var/run/apache2.pid \
1119
APACHE_PROGRAM=/usr/sbin/apache2 \
1220
APACHE_ROOT=/etc/apache2 \

net/instaweb/instaweb.gyp

+1
Original file line numberDiff line numberDiff line change
@@ -1832,6 +1832,7 @@
18321832
'rewriter/split_html_helper_filter.cc',
18331833
'rewriter/strip_non_cacheable_filter.cc',
18341834
'rewriter/strip_scripts_filter.cc',
1835+
'rewriter/strip_subresource_hints_filter.cc',
18351836
'rewriter/support_noscript_filter.cc',
18361837
'rewriter/suppress_prehead_filter.cc',
18371838
'rewriter/url_input_resource.cc',

net/instaweb/rewriter/public/rewrite_driver.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1235,7 +1235,7 @@ class RewriteDriver : public HtmlParse {
12351235
bool Decode(StringPiece leaf, ResourceNamer* resource_namer) const;
12361236

12371237
protected:
1238-
virtual void DetermineEnabledFiltersImpl();
1238+
virtual void DetermineFiltersBehaviorImpl();
12391239

12401240
private:
12411241
friend class DistributedRewriteContextTest;

net/instaweb/rewriter/public/rewrite_filter.h

+9-6
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,18 @@
2020
#define NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_FILTER_H_
2121

2222
#include "net/instaweb/rewriter/public/common_filter.h"
23+
#include "net/instaweb/rewriter/public/resource.h"
2324
#include "net/instaweb/rewriter/public/resource_slot.h"
25+
#include "net/instaweb/rewriter/public/rewrite_context.h"
26+
#include "net/instaweb/rewriter/public/rewrite_driver.h"
2427
#include "net/instaweb/rewriter/public/rewrite_options.h"
2528
#include "pagespeed/kernel/base/basictypes.h"
2629
#include "pagespeed/kernel/base/string.h"
2730
#include "pagespeed/kernel/base/string_util.h"
31+
#include "pagespeed/kernel/util/url_segment_encoder.h"
2832

2933
namespace net_instaweb {
3034

31-
class Resource;
32-
class ResourceContext;
33-
class RewriteContext;
34-
class RewriteDriver;
35-
class UrlSegmentEncoder;
36-
3735
class RewriteFilter : public CommonFilter {
3836
public:
3937
explicit RewriteFilter(RewriteDriver* driver)
@@ -49,6 +47,11 @@ class RewriteFilter : public CommonFilter {
4947
// UsePropertyCacheDomCohort to return true.
5048
virtual void DetermineEnabled(GoogleString* disabled_reason);
5149

50+
// Returns whether this filter can modify urls. Because most filters do
51+
// modify urls this defaults returning true, and filters that commit to never
52+
// modifying urls should override it to return false.
53+
virtual bool CanModifyUrls() { return true; }
54+
5255
// All RewriteFilters define how they encode URLs and other
5356
// associated information needed for a rewrite into a URL.
5457
// The default implementation handles a single URL with

net/instaweb/rewriter/public/rewrite_options.h

+12
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ class RewriteOptions {
364364
static const char kObliviousPagespeedUrls[];
365365
static const char kOptionCookiesDurationMs[];
366366
static const char kOverrideCachingTtlMs[];
367+
static const char kPreserveSubresourceHints[];
367368
static const char kPreserveUrlRelativity[];
368369
static const char kPrivateNotVaryForIE[];
369370
static const char kProactiveResourceFreshening[];
@@ -1627,6 +1628,14 @@ class RewriteOptions {
16271628
set_option(x, &blink_blacklist_end_timestamp_ms_);
16281629
}
16291630

1631+
bool preserve_subresource_hints() const {
1632+
return preserve_subresource_hints_.value();
1633+
}
1634+
void set_preserve_subresource_hints(bool x) {
1635+
set_option(x, &preserve_subresource_hints_);
1636+
}
1637+
1638+
16301639
bool preserve_url_relativity() const {
16311640
return preserve_url_relativity_.value();
16321641
}
@@ -4134,6 +4143,9 @@ class RewriteOptions {
41344143
// The timestamp when blink blacklist expires.
41354144
Option<int64> blink_blacklist_end_timestamp_ms_;
41364145

4146+
// Keep the original subresource hints
4147+
Option<bool> preserve_subresource_hints_;
4148+
41374149
// Keep rewritten URLs as relative as the original resource URL was.
41384150
// TODO(sligocki): Remove this option once we know it's always safe.
41394151
Option<bool> preserve_url_relativity_;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright 2015 Google Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
// Author: [email protected] (Kees Spoelstra)
18+
19+
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_STRIP_SUBRESOURCE_HINTS_FILTER_H_
20+
#define NET_INSTAWEB_REWRITER_PUBLIC_STRIP_SUBRESOURCE_HINTS_FILTER_H_
21+
22+
#include "pagespeed/kernel/base/basictypes.h"
23+
#include "pagespeed/kernel/html/empty_html_filter.h"
24+
25+
namespace net_instaweb {
26+
27+
class HtmlElement;
28+
class RewriteDriver;
29+
30+
// Removes rel=subresource links.
31+
class StripSubresourceHintsFilter : public EmptyHtmlFilter {
32+
public:
33+
explicit StripSubresourceHintsFilter(RewriteDriver* driver);
34+
virtual ~StripSubresourceHintsFilter();
35+
36+
virtual void StartDocument();
37+
virtual void StartElement(HtmlElement* element);
38+
virtual void EndDocument();
39+
virtual void EndElement(HtmlElement* element);
40+
virtual void Flush();
41+
virtual const char* Name() const { return "StripSubresourceHints"; }
42+
43+
private:
44+
RewriteDriver* driver_;
45+
HtmlElement* delete_element_;
46+
bool remove_;
47+
48+
DISALLOW_COPY_AND_ASSIGN(StripSubresourceHintsFilter);
49+
};
50+
51+
} // namespace net_instaweb
52+
53+
#endif // NET_INSTAWEB_REWRITER_PUBLIC_STRIP_SUBRESOURCE_HINTS_FILTER_H_

net/instaweb/rewriter/rewrite_driver.cc

+12-7
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@
127127
#include "net/instaweb/rewriter/public/split_html_helper_filter.h"
128128
#include "net/instaweb/rewriter/public/strip_non_cacheable_filter.h"
129129
#include "net/instaweb/rewriter/public/strip_scripts_filter.h"
130+
#include "net/instaweb/rewriter/public/strip_subresource_hints_filter.h"
130131
#include "net/instaweb/rewriter/public/support_noscript_filter.h"
131132
#include "net/instaweb/rewriter/public/suppress_prehead_filter.h"
132133
#include "net/instaweb/rewriter/public/url_input_resource.h"
@@ -656,7 +657,9 @@ void RewriteDriver::FlushAsync(Function* callback) {
656657
}
657658
flush_requested_ = false;
658659

659-
DetermineEnabledFilters();
660+
// Figure out which filters should be enabled and whether any enabled filter
661+
// can modify urls.
662+
DetermineFiltersBehavior();
660663

661664
for (FilterList::iterator it = early_pre_render_filters_.begin();
662665
it != early_pre_render_filters_.end(); ++it) {
@@ -1010,7 +1013,9 @@ void RewriteDriver::AddPreRenderFilters() {
10101013
dom_stats_filter_ = new DomStatsFilter(this);
10111014
AddOwnedEarlyPreRenderFilter(dom_stats_filter_);
10121015
}
1013-
1016+
if (!rewrite_options->preserve_subresource_hints()) {
1017+
AddOwnedEarlyPreRenderFilter(new StripSubresourceHintsFilter(this));
1018+
}
10141019
if (rewrite_options->Enabled(RewriteOptions::kDecodeRewrittenUrls)) {
10151020
AddOwnedEarlyPreRenderFilter(new DecodeRewrittenUrlsFilter(this));
10161021
}
@@ -3589,12 +3594,12 @@ bool RewriteDriver::Write(const ResourceVector& inputs,
35893594
return ret;
35903595
}
35913596

3592-
void RewriteDriver::DetermineEnabledFiltersImpl() {
3593-
DetermineEnabledFiltersInList(early_pre_render_filters_);
3594-
DetermineEnabledFiltersInList(pre_render_filters_);
3597+
void RewriteDriver::DetermineFiltersBehaviorImpl() {
3598+
DetermineFilterListBehavior(early_pre_render_filters_);
3599+
DetermineFilterListBehavior(pre_render_filters_);
35953600

3596-
// Call parent DetermineEnabled to setup post render filters.
3597-
HtmlParse::DetermineEnabledFiltersImpl();
3601+
// Call parent to set up post render filters.
3602+
HtmlParse::DetermineFiltersBehaviorImpl();
35983603
}
35993604

36003605
void RewriteDriver::ClearRequestProperties() {

net/instaweb/rewriter/rewrite_options.cc

+8
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,8 @@ const char RewriteOptions::kObliviousPagespeedUrls[] = "ObliviousPagespeedUrls";
267267
const char RewriteOptions::kOptionCookiesDurationMs[] =
268268
"OptionCookiesDurationMs";
269269
const char RewriteOptions::kOverrideCachingTtlMs[] = "OverrideCachingTtlMs";
270+
const char RewriteOptions::kPreserveSubresourceHints[] =
271+
"PreserveSubresourceHints";
270272
const char RewriteOptions::kPreserveUrlRelativity[] = "PreserveUrlRelativity";
271273
const char RewriteOptions::kPrivateNotVaryForIE[] = "PrivateNotVaryForIE";
272274
const char RewriteOptions::kPubliclyCacheMismatchedHashesExperimental[] =
@@ -2238,6 +2240,12 @@ void RewriteOptions::AddProperties() {
22382240
AddRequestProperty(
22392241
-1, &RewriteOptions::blink_blacklist_end_timestamp_ms_, "bbet", false);
22402242

2243+
AddBaseProperty(
2244+
false, &RewriteOptions::preserve_subresource_hints_, "psrh",
2245+
kPreserveSubresourceHints, kQueryScope,
2246+
"Keep original subresource hints in place.",
2247+
true);
2248+
22412249
AddBaseProperty(
22422250
true, &RewriteOptions::preserve_url_relativity_, "pur",
22432251
kPreserveUrlRelativity, kDirectoryScope,

net/instaweb/rewriter/rewrite_options_test.cc

+1
Original file line numberDiff line numberDiff line change
@@ -1055,6 +1055,7 @@ TEST_F(RewriteOptionsTest, LookupOptionByNameTest) {
10551055
RewriteOptions::kObliviousPagespeedUrls,
10561056
RewriteOptions::kOptionCookiesDurationMs,
10571057
RewriteOptions::kOverrideCachingTtlMs,
1058+
RewriteOptions::kPreserveSubresourceHints,
10581059
RewriteOptions::kPreserveUrlRelativity,
10591060
RewriteOptions::kPrivateNotVaryForIE,
10601061
RewriteOptions::kProactiveResourceFreshening,

0 commit comments

Comments
 (0)