blob: 9307cd3dc7305d92f608b35ef77f7d578602305d [file] [log] [blame]
initial.commit09911bf2008-07-26 23:55:291// Copyright 2008, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30#include "chrome/browser/safe_browsing/protocol_manager.h"
31
32#include "base/histogram.h"
33#include "base/logging.h"
34#include "base/message_loop.h"
35#include "base/string_util.h"
36#include "base/task.h"
37#include "base/timer.h"
38#include "chrome/browser/profile.h"
39#include "chrome/browser/safe_browsing/protocol_parser.h"
40#include "chrome/browser/safe_browsing/safe_browsing_database.h"
41#include "chrome/browser/safe_browsing/safe_browsing_service.h"
42#include "chrome/common/env_util.h"
43#include "chrome/common/env_vars.h"
44#include "chrome/common/rand_util.h"
45#include "chrome/common/stl_util-inl.h"
46#include "net/base/base64.h"
47#include "net/base/load_flags.h"
48
49
50// Maximum time, in seconds, from start up before we must issue an update query.
51static const int kSbTimerStartIntervalSec = 300;
52
53// Update URL for querying about the latest set of chunk updates.
54static const char* const kSbUpdateUrl =
55 "http://safebrowsing.clients.google.com/safebrowsing/downloads?client=%s&appver=%d.%d&pver=2.1";
56
57// GetHash request URL for retrieving full hashes.
58static const char* const kSbGetHashUrl =
59 "http://safebrowsing.clients.google.com/safebrowsing/gethash?client=%s&appver=%d.%d&pver=2.1";
60
61// New MAC client key requests URL.
62static const char* const kSbNewKeyUrl =
63 "https://sb-ssl.google.com/safebrowsing/newkey?client=%s&appver=%d.%d&pver=2.1";
64
65// TODO(paulg): Change these values when we get close to launch.
66static const char* const kSbClientName = "googleclient";
67static const int kSbClientMajorVersion = 1;
68static const int kSbClientMinorVersion = 0;
69
70// Maximum back off multiplier.
71static const int kSbMaxBackOff = 8;
72
73
74// Periodic update task --------------------------------------------------------
75class SafeBrowsingProtocolUpdateTask : public Task {
76 public:
77 explicit SafeBrowsingProtocolUpdateTask(SafeBrowsingProtocolManager* manager)
78 : manager_(manager) {
79 }
80
81 void Run() {
82 manager_->GetNextUpdate();
83 }
84
85 private:
86 SafeBrowsingProtocolManager* manager_;
87};
88
89
90// SafeBrowsingProtocolManager implementation ----------------------------------
91
92SafeBrowsingProtocolManager::SafeBrowsingProtocolManager(
93 SafeBrowsingService* sb_service,
94 MessageLoop* notify_loop,
95 const std::string& client_key,
96 const std::string& wrapped_key)
97 : sb_service_(sb_service),
98 request_type_(NO_REQUEST),
99 update_error_count_(0),
100 gethash_error_count_(0),
101 update_back_off_mult_(1),
102 gethash_back_off_mult_(1),
103 next_update_sec_(-1),
104 update_state_(FIRST_REQUEST),
105 initial_request_(true),
106 chunk_pending_to_write_(false),
107 notify_loop_(notify_loop),
108 client_key_(client_key),
109 wrapped_key_(wrapped_key) {
110 // Set the backoff multiplier fuzz to a random value between 0 and 1.
111 back_off_fuzz_ = static_cast<float>(rand_util::RandInt(1, INT_MAX)) / INT_MAX;
112
113 // The first update must happen between 0-5 minutes of start up.
114 next_update_sec_ = rand_util::RandInt(60, kSbTimerStartIntervalSec);
115}
116
117SafeBrowsingProtocolManager::~SafeBrowsingProtocolManager() {
118 if (update_timer_.get())
119 MessageLoop::current()->timer_manager()->StopTimer(update_timer_.get());
120
121 // Delete in-progress SafeBrowsing requests.
122 STLDeleteContainerPairFirstPointers(hash_requests_.begin(),
123 hash_requests_.end());
124 hash_requests_.clear();
125}
126
127// Public API used by the SafeBrowsingService ----------------------------------
128
129// We can only have one update or chunk request outstanding, but there may be
130// multiple GetHash requests pending since we don't want to serialize them and
131// slow down the user.
132void SafeBrowsingProtocolManager::GetFullHash(
133 SafeBrowsingService::SafeBrowsingCheck* check,
134 const std::vector<SBPrefix>& prefixes) {
135 // If we are in GetHash backoff, we need to check if we're past the next
136 // allowed time. If we are, we can proceed with the request. If not, we are
137 // required to return empty results (i.e. treat the page as safe).
138 if (gethash_error_count_ && Time::Now() <= next_gethash_time_) {
139 std::vector<SBFullHashResult> full_hashes;
140 sb_service_->HandleGetHashResults(check, full_hashes);
141 return;
142 }
143
144 std::string url = StringPrintf(kSbGetHashUrl,
145 kSbClientName,
146 kSbClientMajorVersion,
147 kSbClientMinorVersion);
148 if (!client_key_.empty()) {
149 url.append("&wrkey=");
150 url.append(wrapped_key_);
151 }
152
153 GURL gethash_url(url);
154 URLFetcher* fetcher = new URLFetcher(gethash_url, URLFetcher::POST, this);
155 hash_requests_[fetcher] = check;
156
157 std::string get_hash;
158 SafeBrowsingProtocolParser parser;
159 parser.FormatGetHash(prefixes, &get_hash);
160
161 fetcher->set_load_flags(net::LOAD_DISABLE_CACHE);
162 fetcher->set_request_context(Profile::GetDefaultRequestContext());
163 fetcher->set_upload_data("text/plain", get_hash.data());
164 fetcher->Start();
165}
166
167void SafeBrowsingProtocolManager::GetNextUpdate() {
168 if (initial_request_) {
169 if (client_key_.empty() || wrapped_key_.empty()) {
170 IssueKeyRequest();
171 return;
172 } else {
173 initial_request_ = false;
174 }
175 }
176
177 if (!request_.get())
178 IssueUpdateRequest();
179}
180
181// URLFetcher::Delegate implementation -----------------------------------------
182
183// All SafeBrowsing request responses are handled here.
184// TODO(paulg): Clarify with the SafeBrowsing team whether a failed parse of a
185// chunk should retry the download and parse of that chunk (and
186// what back off / how many times to try), and if that effects the
187// update back off. For now, a failed parse of the chunk means we
188// drop it. This isn't so bad because the next UPDATE_REQUEST we
189// do will report all the chunks we have. If that chunk is still
190// required, the SafeBrowsing servers will tell us to get it again.
191void SafeBrowsingProtocolManager::OnURLFetchComplete(
192 const URLFetcher* source,
193 const GURL& url,
194 const URLRequestStatus& status,
195 int response_code,
196 const ResponseCookies& cookies,
197 const std::string& data) {
198 scoped_ptr<const URLFetcher> fetcher;
199 bool parsed_ok = true;
200 bool must_back_off = false; // Reduce SafeBrowsing service query frequency.
201
202 HashRequests::iterator it = hash_requests_.find(source);
203 if (it != hash_requests_.end()) {
204 // GetHash response.
205 fetcher.reset(it->first);
206 SafeBrowsingService::SafeBrowsingCheck* check = it->second;
207 std::vector<SBFullHashResult> full_hashes;
208 if (response_code == 200 || response_code == 204) {
209 gethash_error_count_ = 0;
210 gethash_back_off_mult_ = 1;
211 bool re_key = false;
212 SafeBrowsingProtocolParser parser;
213 parsed_ok = parser.ParseGetHash(data.data(),
214 static_cast<int>(data.length()),
215 client_key_,
216 &re_key,
217 &full_hashes);
218 if (!parsed_ok) {
219 // If we fail to parse it, we must still inform the SafeBrowsingService
220 // so that it doesn't hold up the user's request indefinitely. Not sure
221 // what to do at that point though!
222 full_hashes.clear();
223 } else {
224 if (re_key)
225 HandleReKey();
226 }
227 } else if (response_code >= 300) {
228 HandleGetHashError();
229 SB_DLOG(INFO) << "SafeBrowsing GetHash request for: " << source->url()
230 << ", failed with error: " << response_code;
231 }
232
233 // Call back the SafeBrowsingService with full_hashes, even if there was a
234 // parse error or an error response code (in which case full_hashes will be
235 // empty). We can't block the user regardless of the error status.
236 sb_service_->HandleGetHashResults(check, full_hashes);
237
238 hash_requests_.erase(it);
239 } else {
240 // Update, chunk or key response.
241 DCHECK(source == request_.get());
242 fetcher.reset(request_.release());
243
244 if (response_code == 200) {
245 // We have data from the SafeBrowsing service.
246 parsed_ok = HandleServiceResponse(source->url(),
247 data.data(),
248 static_cast<int>(data.length()));
249 if (!parsed_ok) {
250 SB_DLOG(INFO) << "SafeBrowsing request for: " << source->url()
251 << "failed parse.";
252 }
253
254 if (request_type_ == CHUNK_REQUEST) {
255 if (parsed_ok) {
256 chunk_request_urls_.pop_front();
257 } else {
258 chunk_request_urls_.clear();
259 }
260 } else if (request_type_ == GETKEY_REQUEST && initial_request_) {
261 // This is the first request we've made this session. Now that we have
262 // the keys, do the regular update request.
263 initial_request_ = false;
264 GetNextUpdate();
265 return;
266 }
267 } else if (response_code >= 300) {
268 // The SafeBrowsing service error: back off.
269 must_back_off = true;
270 if (request_type_ == CHUNK_REQUEST)
271 chunk_request_urls_.clear();
272 SB_DLOG(INFO) << "SafeBrowsing request for: " << source->url()
273 << ", failed with error: " << response_code;
274 }
275 }
276
277 // Schedule a new update request if we've finished retrieving all the chunks
278 // from the previous update. We treat the update request and the chunk URLs it
279 // contains as an atomic unit as far as back off is concerned.
280 if (chunk_request_urls_.empty() &&
281 (request_type_ == CHUNK_REQUEST || request_type_ == UPDATE_REQUEST))
282 ScheduleNextUpdate(must_back_off);
283
284 // Get the next chunk if available.
285 IssueChunkRequest();
286}
287
288bool SafeBrowsingProtocolManager::HandleServiceResponse(const GURL& url,
289 const char* data,
290 int length) {
291 SafeBrowsingProtocolParser parser;
292
293 switch (request_type_) {
294 case UPDATE_REQUEST: {
295 int next_update_sec = -1;
296 bool re_key = false;
297 bool reset = false;
298 std::vector<SBChunkDelete>* chunk_deletes =
299 new std::vector<SBChunkDelete>;
300 std::vector<ChunkUrl> chunk_urls;
301 if (!parser.ParseUpdate(data, length, client_key_,
302 &next_update_sec, &re_key,
303 &reset, chunk_deletes, &chunk_urls)) {
304 delete chunk_deletes;
305 return false;
306 }
307
308 last_update_ = Time::Now();
309
310 if (update_state_ == FIRST_REQUEST)
311 update_state_ = SECOND_REQUEST;
312 else if (update_state_ == SECOND_REQUEST)
313 update_state_ = NORMAL_REQUEST;
314
315 // New time for the next update.
316 if (next_update_sec > 0) {
317 next_update_sec_ = next_update_sec;
318 } else if (update_state_ == SECOND_REQUEST) {
319 next_update_sec_ = rand_util::RandInt(15, 45) * 60;
320 }
321
322 // We need to request a new set of keys for MAC.
323 if (re_key)
324 HandleReKey();
325
326 // New chunks to download.
327 if (!chunk_urls.empty()) {
328 for (size_t i = 0; i < chunk_urls.size(); ++i)
329 chunk_request_urls_.push_back(chunk_urls[i]);
330 }
331
332 // Handle the case were the SafeBrowsing service tells us to dump our
333 // database.
334 if (reset) {
335 sb_service_->ResetDatabase();
336 return true;
337 }
338
339 // Chunks to delete from our storage.
340 if (!chunk_deletes->empty())
341 sb_service_->HandleChunkDelete(chunk_deletes);
342
343 break;
344 }
345 case CHUNK_REQUEST: {
346 // Find list name from url.
347 std::string url_path = url.ExtractFileName();
348 if (url_path.empty())
349 return false;
350
351 std::string::size_type pos = url_path.find_first_of('_');
352 if (pos == std::string::npos)
353 return false;
354
355 const ChunkUrl chunk_url = chunk_request_urls_.front();
356 DCHECK(url.spec().find(chunk_url.url) != std::string::npos);
357
358 bool re_key = false;
359 std::deque<SBChunk>* chunks = new std::deque<SBChunk>;
360 if (!parser.ParseChunk(data, length,
361 client_key_, chunk_url.mac,
362 &re_key, chunks)) {
363#ifndef NDEBUG
364 std::string data_str;
365 data_str.assign(data, length);
366 std::string encoded_chunk;
[email protected]a9bb6f692008-07-30 16:40:10367 net::Base64Encode(data, &encoded_chunk);
initial.commit09911bf2008-07-26 23:55:29368 SB_DLOG(INFO) << "ParseChunk error for chunk: " << chunk_url.url
369 << ", client_key: " << client_key_
370 << ", wrapped_key: " << wrapped_key_
371 << ", mac: " << chunk_url.mac
372 << ", Base64Encode(data): " << encoded_chunk
373 << ", length: " << length;
374#endif
375 safe_browsing_util::FreeChunks(chunks);
376 delete chunks;
377 return false;
378 }
379
380 if (re_key)
381 HandleReKey();
382
383 if (chunks->empty()) {
384 delete chunks;
385 } else {
386 chunk_pending_to_write_ = true;
387 std::string list_name(url_path, 0, pos);
388 sb_service_->HandleChunk(list_name, chunks);
389 }
390
391 break;
392 }
393 case GETKEY_REQUEST: {
394 std::string client_key, wrapped_key;
395 if (!parser.ParseNewKey(data, length, &client_key, &wrapped_key))
396 return false;
397
398 client_key_ = client_key;
399 wrapped_key_ = wrapped_key;
400 notify_loop_->PostTask(FROM_HERE, NewRunnableMethod(
401 sb_service_, &SafeBrowsingService::OnNewMacKeys, client_key_,
402 wrapped_key_));
403 break;
404 }
405
406 default:
407 return false;
408 }
409
410 return true;
411}
412
413void SafeBrowsingProtocolManager::Initialize() {
414 // Don't want to hit the safe browsing servers on build/chrome bots.
415 if (env_util::HasEnvironmentVariable(env_vars::kHeadless))
416 return;
417
418 ScheduleNextUpdate(false /* no back off */);
419}
420
421void SafeBrowsingProtocolManager::ScheduleNextUpdate(bool back_off) {
422 DCHECK(next_update_sec_ > 0);
423
424 if (!update_task_.get())
425 update_task_.reset(new SafeBrowsingProtocolUpdateTask(this));
426
427 // Unschedule any current timer & task.
428 TimerManager* tm = MessageLoop::current()->timer_manager();
429 if (update_timer_.get())
430 tm->StopTimer(update_timer_.get());
431
432 // Reschedule with the new update.
433 const int next_update = GetNextUpdateTime(back_off);
434 update_timer_.reset(tm->StartTimer(next_update, update_task_.get(), false));
435}
436
437// According to section 5 of the SafeBrowsing protocol specification, we must
438// back off after a certain number of errors. We only change 'next_update_sec_'
439// when we receive a response from the SafeBrowsing service.
440int SafeBrowsingProtocolManager::GetNextUpdateTime(bool back_off) {
441 int next = next_update_sec_;
442 if (back_off) {
443 next = GetNextBackOffTime(&update_error_count_, &update_back_off_mult_);
444 } else {
445 // Successful response means error reset.
446 update_error_count_ = 0;
447 update_back_off_mult_ = 1;
448 }
449 return next * 1000; // milliseconds
450}
451
452int SafeBrowsingProtocolManager::GetNextBackOffTime(int* error_count,
453 int* multiplier) {
454 DCHECK(multiplier && error_count);
455 (*error_count)++;
456 if (*error_count > 1 && *error_count < 6) {
457 int next = static_cast<int>(*multiplier * (1 + back_off_fuzz_) * 30 * 60);
458 *multiplier *= 2;
459 if (*multiplier > kSbMaxBackOff)
460 *multiplier = kSbMaxBackOff;
461 return next;
462 }
463
464 if (*error_count >= 6)
465 return 60 * 60 * 8; // 8 hours
466
467 return 60; // 1 minute
468}
469
470// This request requires getting a list of all the chunks for each list from the
471// database asynchronously. The request will be issued when we're called back in
472// OnGetChunksComplete.
473// TODO(paulg): We should get this at start up and maintain a ChunkRange cache
474// to avoid hitting the database with each update request. On the
475// otherhand, this request will only occur ~20-30 minutes so there
476// isn't that much overhead. Measure!
477void SafeBrowsingProtocolManager::IssueUpdateRequest() {
478 request_type_ = UPDATE_REQUEST;
479 sb_service_->GetAllChunks();
480}
481
482void SafeBrowsingProtocolManager::IssueChunkRequest() {
483 // We are only allowed to have one request outstanding at any time. Also,
484 // don't get the next url until the previous one has been written to disk so
485 // that we don't use too much memory.
486 if (request_.get() || chunk_request_urls_.empty() || chunk_pending_to_write_)
487 return;
488
489 ChunkUrl next_chunk = chunk_request_urls_.front();
490 DCHECK(!next_chunk.url.empty());
491 if (!StartsWithASCII(next_chunk.url, "http://", false) &&
492 !StartsWithASCII(next_chunk.url, "https://", false))
493 next_chunk.url = "http://" + next_chunk.url;
494 GURL chunk_url(next_chunk.url);
495 request_type_ = CHUNK_REQUEST;
496 request_.reset(new URLFetcher(chunk_url, URLFetcher::GET, this));
497 request_->set_load_flags(net::LOAD_DISABLE_CACHE);
498 request_->set_request_context(Profile::GetDefaultRequestContext());
499 request_->Start();
500}
501
502void SafeBrowsingProtocolManager::IssueKeyRequest() {
503 GURL key_url(StringPrintf(kSbNewKeyUrl,
504 kSbClientName,
505 kSbClientMajorVersion,
506 kSbClientMinorVersion));
507 request_type_ = GETKEY_REQUEST;
508 request_.reset(new URLFetcher(key_url, URLFetcher::GET, this));
509 request_->set_load_flags(net::LOAD_DISABLE_CACHE);
510 request_->set_request_context(Profile::GetDefaultRequestContext());
511 request_->Start();
512}
513
514void SafeBrowsingProtocolManager::OnGetChunksComplete(
515 const std::vector<SBListChunkRanges>& lists, bool database_error) {
516 DCHECK(request_type_ == UPDATE_REQUEST);
517
518 if (database_error) {
519 ScheduleNextUpdate(false);
520 return;
521 }
522
523 const bool use_mac = !client_key_.empty();
524
525 // Format our stored chunks:
526 std::string list_data;
527 bool found_malware = false;
528 bool found_phishing = false;
529 for (size_t i = 0; i < lists.size(); ++i) {
530 list_data.append(FormatList(lists[i], use_mac));
531 if (lists[i].name == "goog-phish-shavar")
532 found_phishing = true;
533
534 if (lists[i].name == "goog-malware-shavar")
535 found_malware = true;
536 }
537
538 // If we have an empty database, let the server know we want data for these
539 // lists.
540 if (!found_phishing)
541 list_data.append(FormatList(SBListChunkRanges("goog-phish-shavar"),
542 use_mac));
543
544 if (!found_malware)
545 list_data.append(FormatList(SBListChunkRanges("goog-malware-shavar"),
546 use_mac));
547
548 std::string url = StringPrintf(kSbUpdateUrl,
549 kSbClientName,
550 kSbClientMajorVersion,
551 kSbClientMinorVersion);
552 if (use_mac) {
553 url.append("&wrkey=");
554 url.append(wrapped_key_);
555 }
556
557 GURL update_url(url);
558 request_.reset(new URLFetcher(update_url, URLFetcher::POST, this));
559 request_->set_load_flags(net::LOAD_DISABLE_CACHE);
560 request_->set_request_context(Profile::GetDefaultRequestContext());
561 request_->set_upload_data("text/plain", list_data);
562 request_->Start();
563}
564
565void SafeBrowsingProtocolManager::OnChunkInserted() {
566 chunk_pending_to_write_ = false;
567
568 if (chunk_request_urls_.empty()) {
569 UMA_HISTOGRAM_LONG_TIMES(L"SB.Update", Time::Now() - last_update_);
570 } else {
571 IssueChunkRequest();
572 }
573}
574
575// static
576std::string SafeBrowsingProtocolManager::FormatList(
577 const SBListChunkRanges& list, bool use_mac) {
578 std::string formatted_results;
579 formatted_results.append(list.name);
580 formatted_results.append(";");
581 if (!list.adds.empty()) {
582 formatted_results.append("a:" + list.adds);
583 if (!list.subs.empty() || use_mac)
584 formatted_results.append(":");
585 }
586 if (!list.subs.empty()) {
587 formatted_results.append("s:" + list.subs);
588 if (use_mac)
589 formatted_results.append(":");
590 }
591 if (use_mac)
592 formatted_results.append("mac");
593 formatted_results.append("\n");
594
595 return formatted_results;
596}
597
598void SafeBrowsingProtocolManager::HandleReKey() {
599 client_key_.clear();
600 wrapped_key_.clear();
601 IssueKeyRequest();
602}
603
604void SafeBrowsingProtocolManager::HandleGetHashError() {
605 int next = GetNextBackOffTime(&gethash_error_count_, &gethash_back_off_mult_);
606 next_gethash_time_ = Time::Now() + TimeDelta::FromSeconds(next);
607}