Skip to content
This repository was archived by the owner on Apr 10, 2025. It is now read-only.

Commit 741a865

Browse files
sligockicrowell
authored andcommitted
Do not rewrite nor cache empty resources.
While there is nothing technically wrong with caching and rewriting empty resources, there's not much value and we have run into situations where we unexpectedly produce empty resources. Fixes issue 1050.
1 parent 0a90bb1 commit 741a865

22 files changed

+374
-77
lines changed

net/instaweb/http/cache_url_async_fetcher.cc

+12-1
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,14 @@ class CachePutFetch : public SharedAsyncFetch {
123123

124124
virtual void HandleDone(bool success) {
125125
DCHECK_EQ(request_headers()->method(), RequestHeaders::kGet);
126+
// We do not cache empty 200 responses. (Empty 404, 500 are fine.)
127+
// https://github.com/pagespeed/mod_pagespeed/issues/1050
128+
const bool empty_200 =
129+
(response_headers()->status_code() == HttpStatus::kOK &&
130+
cache_value_.contents_size() == 0);
126131
const bool insert_into_cache = (success &&
127132
cacheable_ &&
133+
!empty_200 &&
128134
cache_value_writer_.has_buffered());
129135

130136
if (insert_into_cache) {
@@ -155,6 +161,10 @@ class CachePutFetch : public SharedAsyncFetch {
155161
cache_->Put(url_, fragment_, req_properties_, http_options_,
156162
&cache_value_, handler_);
157163
}
164+
// Note: We explicitly do not remember fetch failure, uncacheable nor
165+
// empty resources here since we still want to proxy those through every
166+
// time they are requested.
167+
// TODO(sligocki): Maybe we should be remembering failures.
158168
delete this;
159169
}
160170

@@ -329,7 +339,8 @@ class CacheFindCallback : public HTTPCache::Callback {
329339
// TODO(sligocki): Should we mark resources as such in this class?
330340
case HTTPCache::kRecentFetchFailed:
331341
case HTTPCache::kRecentFetchNotCacheable:
332-
VLOG(1) << "RecentFetchFailedOrNotCacheable: "
342+
case HTTPCache::kRecentFetchEmpty:
343+
VLOG(1) << "RecentFetchFailed, NotCacheable or Empty: "
333344
<< url_ << " (" << fragment_ << ")";
334345
if (!ignore_recent_fetch_failed_) {
335346
base_fetch_->Done(false);

net/instaweb/http/cache_url_async_fetcher_test.cc

+48-8
Original file line numberDiff line numberDiff line change
@@ -1438,15 +1438,55 @@ TEST_F(CacheUrlAsyncFetcherTest, FetchFailedIgnore) {
14381438
EXPECT_EQ(0, http_cache_->cache_inserts()->Get());
14391439
}
14401440

1441+
TEST_F(CacheUrlAsyncFetcherTest, NoCacheEmpty) {
1442+
const char url[] = "http://www.example.com/empty.html";
1443+
ResponseHeaders response_headers;
1444+
SetDefaultHeaders(kContentTypeHtml, &response_headers);
1445+
int ttl_ms = 5 * Timer::kMinuteMs;
1446+
response_headers.SetDateAndCaching(timer_.NowMs(), ttl_ms);
1447+
1448+
GoogleString empty_contents = "";
1449+
mock_fetcher_.SetResponse(url, response_headers, empty_contents);
1450+
FetchAndValidate(url, empty_request_headers_, true, HttpStatus::kOK,
1451+
empty_contents, kBackendFetch, false);
1452+
1453+
GoogleString non_empty_contents = "foobar";
1454+
mock_fetcher_.SetResponse(url, response_headers, non_empty_contents);
1455+
// cache_url_fetcher did not remember the empty contents.
1456+
FetchAndValidate(url, empty_request_headers_, true, HttpStatus::kOK,
1457+
non_empty_contents, kBackendFetch, true);
1458+
}
1459+
1460+
TEST_F(CacheUrlAsyncFetcherTest, CacheNonEmpty) {
1461+
// Companion test to NoCacheEmpty to make sure we are caching non-empty
1462+
// through the same flow.
1463+
const char url[] = "http://www.example.com/non_empty.html";
1464+
ResponseHeaders response_headers;
1465+
SetDefaultHeaders(kContentTypeHtml, &response_headers);
1466+
int ttl_ms = 5 * Timer::kMinuteMs;
1467+
response_headers.SetDateAndCaching(timer_.NowMs(), ttl_ms);
1468+
1469+
GoogleString original_contents = "foo";
1470+
mock_fetcher_.SetResponse(url, response_headers, original_contents);
1471+
FetchAndValidate(url, empty_request_headers_, true, HttpStatus::kOK,
1472+
original_contents, kBackendFetch, true);
1473+
1474+
GoogleString new_contents = "foobar";
1475+
mock_fetcher_.SetResponse(url, response_headers, new_contents);
1476+
// cache_url_fetcher did remember the original content.
1477+
FetchAndValidate(url, empty_request_headers_, true, HttpStatus::kOK,
1478+
original_contents, kBackendFetch, true);
1479+
}
1480+
14411481
TEST_F(CacheUrlAsyncFetcherTest, NoCacheHtmlOnEmptyHeader) {
14421482
ResponseHeaders response_headers;
14431483
SetDefaultHeaders(kContentTypeHtml, &response_headers);
14441484
response_headers.SetDate(timer_.NowMs());
14451485
response_headers.RemoveAll(HttpAttributes::kCacheControl);
14461486
const char url[] = "http://www.example.com/foo.html";
1447-
mock_fetcher_.SetResponse(url, response_headers, "");
1487+
mock_fetcher_.SetResponse(url, response_headers, "foo");
14481488

1449-
ExpectNoCache(url, "");
1489+
ExpectNoCache(url, "foo");
14501490
}
14511491

14521492
TEST_F(CacheUrlAsyncFetcherTest, DoCacheHtmlOnEmptyHeader) {
@@ -1458,9 +1498,9 @@ TEST_F(CacheUrlAsyncFetcherTest, DoCacheHtmlOnEmptyHeader) {
14581498
response_headers.SetDate(timer_.NowMs());
14591499
response_headers.RemoveAll(HttpAttributes::kCacheControl);
14601500
const char url[] = "http://www.example.com/foo.html";
1461-
mock_fetcher_.SetResponse(url, response_headers, "");
1501+
mock_fetcher_.SetResponse(url, response_headers, "foo");
14621502

1463-
ExpectCache(url, "");
1503+
ExpectCache(url, "foo");
14641504
}
14651505

14661506
// Even when set_default_cache_html(true), we still don't cache responses
@@ -1474,9 +1514,9 @@ TEST_F(CacheUrlAsyncFetcherTest, NoCacheSetCookie) {
14741514
response_headers.RemoveAll(HttpAttributes::kCacheControl);
14751515
response_headers.Add(HttpAttributes::kSetCookie, "foo=bar");
14761516
const char url[] = "http://www.example.com/foo.html";
1477-
mock_fetcher_.SetResponse(url, response_headers, "");
1517+
mock_fetcher_.SetResponse(url, response_headers, "foo");
14781518

1479-
ExpectNoCache(url, "");
1519+
ExpectNoCache(url, "foo");
14801520
}
14811521

14821522
TEST_F(CacheUrlAsyncFetcherTest, CachePublicSansTtl) {
@@ -1488,9 +1528,9 @@ TEST_F(CacheUrlAsyncFetcherTest, CachePublicSansTtl) {
14881528
response_headers.SetDate(timer_.NowMs());
14891529
response_headers.Replace(HttpAttributes::kCacheControl, "public");
14901530
const char url[] = "http://www.example.com/foo.html";
1491-
mock_fetcher_.SetResponse(url, response_headers, "");
1531+
mock_fetcher_.SetResponse(url, response_headers, "foo");
14921532

1493-
ExpectCache(url, "");
1533+
ExpectCache(url, "foo");
14941534
}
14951535

14961536
TEST_F(CacheUrlAsyncFetcherTest, CacheVaryForNonHtml) {

net/instaweb/http/http_cache.cc

+23-7
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ namespace {
4545
// in this case we could arguably remember it using the original cc-private ttl.
4646
const int kRememberNotCacheableTtlSec = 300;
4747
const int kRememberFetchFailedTtlSec = 300;
48+
const int kRememberEmptyTtlSec = 300;
4849

4950
// We use an extremely low TTL for load-shed resources since we don't
5051
// want this to get in the way of debugging, or letting a page with
@@ -97,6 +98,7 @@ HTTPCache::HTTPCache(CacheInterface* cache, Timer* timer, Hasher* hasher,
9798
remember_not_cacheable_ttl_seconds_ = kRememberNotCacheableTtlSec;
9899
remember_fetch_failed_ttl_seconds_ = kRememberFetchFailedTtlSec;
99100
remember_fetch_dropped_ttl_seconds_ = kRememberFetchDroppedTtlSec;
101+
remember_empty_ttl_seconds_ = kRememberEmptyTtlSec;
100102
max_cacheable_response_content_length_ = kCacheSizeUnlimited;
101103
}
102104

@@ -192,7 +194,8 @@ class HTTPCacheCallback : public CacheInterface::Callback {
192194

193195
if (http_status == HttpStatus::kRememberNotCacheableStatusCode ||
194196
http_status == HttpStatus::kRememberNotCacheableAnd200StatusCode ||
195-
http_status == HttpStatus::kRememberFetchFailedStatusCode) {
197+
http_status == HttpStatus::kRememberFetchFailedStatusCode ||
198+
http_status == HttpStatus::kRememberEmptyStatusCode) {
196199
// If the response was stored as uncacheable and a 200, it may since
197200
// have since been added to the override caching group. Hence, we
198201
// consider it invalid if override_cache_ttl_ms > 0.
@@ -209,9 +212,14 @@ class HTTPCacheCallback : public CacheInterface::Callback {
209212
HttpStatus::kRememberNotCacheableAnd200StatusCode) {
210213
status = "not-cacheable";
211214
result = HTTPCache::kRecentFetchNotCacheable;
212-
} else {
215+
} else if (http_status ==
216+
HttpStatus::kRememberFetchFailedStatusCode) {
213217
status = "not-found";
214218
result = HTTPCache::kRecentFetchFailed;
219+
} else {
220+
DCHECK(http_status == HttpStatus::kRememberEmptyStatusCode);
221+
status = "empty";
222+
result = HTTPCache::kRecentFetchEmpty;
215223
}
216224
if (handler_ != NULL) {
217225
handler_->Message(kInfo,
@@ -313,7 +321,7 @@ void HTTPCache::RememberNotCacheable(const GoogleString& key,
313321
const GoogleString& fragment,
314322
bool is_200_status_code,
315323
MessageHandler* handler) {
316-
RememberFetchFailedorNotCacheableHelper(
324+
RememberFetchFailedOrNotCacheableHelper(
317325
key, fragment, handler,
318326
is_200_status_code ? HttpStatus::kRememberNotCacheableAnd200StatusCode :
319327
HttpStatus::kRememberNotCacheableStatusCode,
@@ -323,27 +331,35 @@ void HTTPCache::RememberNotCacheable(const GoogleString& key,
323331
void HTTPCache::RememberFetchFailed(const GoogleString& key,
324332
const GoogleString& fragment,
325333
MessageHandler* handler) {
326-
RememberFetchFailedorNotCacheableHelper(key, fragment, handler,
334+
RememberFetchFailedOrNotCacheableHelper(key, fragment, handler,
327335
HttpStatus::kRememberFetchFailedStatusCode,
328336
remember_fetch_failed_ttl_seconds_);
329337
}
330338

331339
void HTTPCache::RememberFetchDropped(const GoogleString& key,
332340
const GoogleString& fragment,
333-
MessageHandler* handler) {
334-
RememberFetchFailedorNotCacheableHelper(key, fragment, handler,
341+
MessageHandler* handler) {
342+
RememberFetchFailedOrNotCacheableHelper(key, fragment, handler,
335343
HttpStatus::kRememberFetchFailedStatusCode,
336344
remember_fetch_dropped_ttl_seconds_);
337345
}
338346

347+
void HTTPCache::RememberEmpty(const GoogleString& key,
348+
const GoogleString& fragment,
349+
MessageHandler* handler) {
350+
RememberFetchFailedOrNotCacheableHelper(key, fragment, handler,
351+
HttpStatus::kRememberEmptyStatusCode,
352+
remember_empty_ttl_seconds_);
353+
}
354+
339355
void HTTPCache::set_max_cacheable_response_content_length(int64 value) {
340356
DCHECK(value >= kCacheSizeUnlimited);
341357
if (value >= kCacheSizeUnlimited) {
342358
max_cacheable_response_content_length_ = value;
343359
}
344360
}
345361

346-
void HTTPCache::RememberFetchFailedorNotCacheableHelper(
362+
void HTTPCache::RememberFetchFailedOrNotCacheableHelper(
347363
const GoogleString& key, const GoogleString& fragment,
348364
MessageHandler* handler, HttpStatus::Code code, int64 ttl_sec) {
349365
ResponseHeaders headers;

net/instaweb/http/http_cache_test.cc

+25-2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "net/instaweb/http/public/http_cache.h"
2222

2323
#include <cstddef> // for size_t
24+
2425
#include "net/instaweb/http/public/content_type.h"
2526
#include "net/instaweb/http/public/http_value.h"
2627
#include "net/instaweb/http/public/meta_data.h"
@@ -29,6 +30,7 @@
2930
#include "net/instaweb/util/public/google_message_handler.h"
3031
#include "net/instaweb/util/public/gtest.h"
3132
#include "net/instaweb/util/public/lru_cache.h"
33+
#include "net/instaweb/util/public/message_handler.h"
3234
#include "net/instaweb/util/public/mock_hasher.h"
3335
#include "net/instaweb/util/public/mock_timer.h"
3436
#include "net/instaweb/util/public/platform.h"
@@ -53,8 +55,6 @@ const char kFragment2[] = "www.other.com";
5355

5456
namespace net_instaweb {
5557

56-
class MessageHandler;
57-
5858
class HTTPCacheTest : public testing::Test {
5959
protected:
6060
// Helper class for calling Get and Query methods on cache implementations
@@ -484,6 +484,29 @@ TEST_F(HTTPCacheTest, RememberDropped) {
484484
Find(kUrl, kFragment, &value, &meta_data_out, &message_handler_));
485485
}
486486

487+
// Remember empty resources.
488+
TEST_F(HTTPCacheTest, RememberEmpty) {
489+
ResponseHeaders meta_data_out;
490+
http_cache_->RememberEmpty(kUrl, kFragment, &message_handler_);
491+
HTTPValue value;
492+
EXPECT_EQ(HTTPCache::kRecentFetchEmpty,
493+
Find(kUrl, kFragment, &value, &meta_data_out, &message_handler_));
494+
495+
// Now advance time 301 seconds; the cache should allow us to try fetching
496+
// again.
497+
mock_timer_.AdvanceMs(301 * 1000);
498+
EXPECT_EQ(HTTPCache::kNotFound,
499+
Find(kUrl, kFragment, &value, &meta_data_out, &message_handler_));
500+
501+
http_cache_->set_remember_empty_ttl_seconds(600);
502+
http_cache_->RememberEmpty(kUrl, kFragment, &message_handler_);
503+
// Now advance time 301 seconds; the cache should remember that the resource
504+
// is empty.
505+
mock_timer_.AdvanceMs(301 * 1000);
506+
EXPECT_EQ(HTTPCache::kRecentFetchEmpty,
507+
Find(kUrl, kFragment, &value, &meta_data_out, &message_handler_));
508+
}
509+
487510
// Make sure we don't remember 'non-cacheable' once we've put it into
488511
// non-recording of failures mode (but do before that), and that we
489512
// remember successful results even when in SetIgnoreFailurePuts() mode.

net/instaweb/http/public/http_cache.h

+22-1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class HTTPCache {
7575
// codes or are not cacheable.
7676
kRecentFetchFailed,
7777
kRecentFetchNotCacheable,
78+
kRecentFetchEmpty, // We do not cache empty resources.
7879
};
7980

8081
virtual void set_hasher(Hasher* hasher) { hasher_ = hasher; }
@@ -287,6 +288,14 @@ class HTTPCache {
287288
const GoogleString& fragment,
288289
MessageHandler* handler);
289290

291+
// Tell the HTTP Cache to remember that a particular URL shouldn't be cached
292+
// because it was an empty resource. We defensively avoid caching empty input
293+
// resources.
294+
// https://github.com/pagespeed/mod_pagespeed/issues/1050
295+
virtual void RememberEmpty(const GoogleString& key,
296+
const GoogleString& fragment,
297+
MessageHandler* handler);
298+
290299
// Indicates if the response is within the cacheable size limit. Clients of
291300
// HTTPCache must check if they will be eventually able to cache their entries
292301
// before buffering them in memory. If the content length header is not found
@@ -353,6 +362,17 @@ class HTTPCache {
353362
}
354363
}
355364

365+
int64 remember_empty_ttl_seconds() {
366+
return remember_empty_ttl_seconds_;
367+
}
368+
369+
virtual void set_remember_empty_ttl_seconds(int64 value) {
370+
DCHECK_LE(0, value);
371+
if (value >= 0) {
372+
remember_empty_ttl_seconds_ = value;
373+
}
374+
}
375+
356376
int max_cacheable_response_content_length() {
357377
return max_cacheable_response_content_length_;
358378
}
@@ -391,7 +411,7 @@ class HTTPCache {
391411
void UpdateStats(const GoogleString& key, const GoogleString& fragment,
392412
CacheInterface::KeyState backend_state, FindResult result,
393413
bool has_fallback, bool is_expired, MessageHandler* handler);
394-
void RememberFetchFailedorNotCacheableHelper(
414+
void RememberFetchFailedOrNotCacheableHelper(
395415
const GoogleString& key, const GoogleString& fragment,
396416
MessageHandler* handler, HttpStatus::Code code, int64 ttl_sec);
397417

@@ -422,6 +442,7 @@ class HTTPCache {
422442
int64 remember_not_cacheable_ttl_seconds_;
423443
int64 remember_fetch_failed_ttl_seconds_;
424444
int64 remember_fetch_dropped_ttl_seconds_;
445+
int64 remember_empty_ttl_seconds_;
425446
int64 max_cacheable_response_content_length_;
426447
AtomicBool ignore_failure_puts_;
427448

net/instaweb/http/public/write_through_http_cache.h

+9-2
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,14 @@
2323

2424
#include "net/instaweb/http/public/http_cache.h"
2525
#include "net/instaweb/util/public/basictypes.h"
26+
#include "net/instaweb/util/public/cache_interface.h"
2627
#include "net/instaweb/util/public/scoped_ptr.h"
2728
#include "net/instaweb/util/public/string.h"
2829
#include "pagespeed/kernel/base/string_util.h"
2930

3031
namespace net_instaweb {
3132

32-
class CacheInterface;
3333
class Hasher;
34-
class HTTPValue;
3534
class MessageHandler;
3635
class Statistics;
3736
class Timer;
@@ -80,6 +79,9 @@ class WriteThroughHTTPCache : public HTTPCache {
8079
// Implements HTTPCache::set_remember_fetch_dropped_ttl_seconds();
8180
virtual void set_remember_fetch_dropped_ttl_seconds(int64 value);
8281

82+
// Implements HTTPCache::set_remember_empty_ttl_seconds();
83+
virtual void set_remember_empty_ttl_seconds(int64 value);
84+
8385
// Implements HTTPCache::set_max_cacheable_response_content_length().
8486
virtual void set_max_cacheable_response_content_length(int64 value);
8587

@@ -99,6 +101,11 @@ class WriteThroughHTTPCache : public HTTPCache {
99101
const GoogleString& fragment,
100102
MessageHandler * handler);
101103

104+
// Implements HTTPCache::RememberEmpty().
105+
virtual void RememberEmpty(const GoogleString& key,
106+
const GoogleString& fragment,
107+
MessageHandler * handler);
108+
102109
// By default, all data goes into both cache1 and cache2. But
103110
// if you only want to put small items in cache1, you can set the
104111
// size limit. Note that both the key and value will count

net/instaweb/http/write_through_http_cache.cc

+13
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,12 @@ void WriteThroughHTTPCache::set_remember_fetch_dropped_ttl_seconds(
273273
cache2_->set_remember_fetch_dropped_ttl_seconds(value);
274274
}
275275

276+
void WriteThroughHTTPCache::set_remember_empty_ttl_seconds(int64 value) {
277+
HTTPCache::set_remember_empty_ttl_seconds(value);
278+
cache1_->set_remember_empty_ttl_seconds(value);
279+
cache2_->set_remember_empty_ttl_seconds(value);
280+
}
281+
276282
void WriteThroughHTTPCache::set_max_cacheable_response_content_length(
277283
int64 value) {
278284
HTTPCache::set_max_cacheable_response_content_length(value);
@@ -302,4 +308,11 @@ void WriteThroughHTTPCache::RememberFetchDropped(const GoogleString& key,
302308
cache2_->RememberFetchDropped(key, fragment, handler);
303309
}
304310

311+
void WriteThroughHTTPCache::RememberEmpty(const GoogleString& key,
312+
const GoogleString& fragment,
313+
MessageHandler * handler) {
314+
cache1_->RememberEmpty(key, fragment, handler);
315+
cache2_->RememberEmpty(key, fragment, handler);
316+
}
317+
305318
} // namespace net_instaweb

0 commit comments

Comments
 (0)