blob: 0f9aa5f7ba9621a8b15cc8b657546ba1f1eb4e5b [file] [log] [blame]
license.botbf09a502008-08-24 00:55:551// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit09911bf2008-07-26 23:55:294
5#include "chrome/browser/safe_browsing/protocol_manager.h"
6
7#include "base/histogram.h"
8#include "base/logging.h"
9#include "base/message_loop.h"
10#include "base/string_util.h"
11#include "base/task.h"
12#include "base/timer.h"
13#include "chrome/browser/profile.h"
14#include "chrome/browser/safe_browsing/protocol_parser.h"
15#include "chrome/browser/safe_browsing/safe_browsing_database.h"
16#include "chrome/browser/safe_browsing/safe_browsing_service.h"
17#include "chrome/common/env_util.h"
18#include "chrome/common/env_vars.h"
19#include "chrome/common/rand_util.h"
20#include "chrome/common/stl_util-inl.h"
21#include "net/base/base64.h"
22#include "net/base/load_flags.h"
23
24
25// Maximum time, in seconds, from start up before we must issue an update query.
26static const int kSbTimerStartIntervalSec = 300;
27
28// Update URL for querying about the latest set of chunk updates.
29static const char* const kSbUpdateUrl =
30 "http://safebrowsing.clients.google.com/safebrowsing/downloads?client=%s&appver=%d.%d&pver=2.1";
31
32// GetHash request URL for retrieving full hashes.
33static const char* const kSbGetHashUrl =
34 "http://safebrowsing.clients.google.com/safebrowsing/gethash?client=%s&appver=%d.%d&pver=2.1";
35
36// New MAC client key requests URL.
37static const char* const kSbNewKeyUrl =
38 "https://sb-ssl.google.com/safebrowsing/newkey?client=%s&appver=%d.%d&pver=2.1";
39
[email protected]f1da1262008-08-31 23:03:5840#if defined(GOOGLE_CHROME_BUILD)
41static const char* const kSbClientName = "googlechrome";
42#else
43static const char* const kSbClientName = "chromium";
44#endif
initial.commit09911bf2008-07-26 23:55:2945static const int kSbClientMajorVersion = 1;
46static const int kSbClientMinorVersion = 0;
47
48// Maximum back off multiplier.
49static const int kSbMaxBackOff = 8;
50
51
initial.commit09911bf2008-07-26 23:55:2952// SafeBrowsingProtocolManager implementation ----------------------------------
53
54SafeBrowsingProtocolManager::SafeBrowsingProtocolManager(
55 SafeBrowsingService* sb_service,
56 MessageLoop* notify_loop,
57 const std::string& client_key,
58 const std::string& wrapped_key)
59 : sb_service_(sb_service),
60 request_type_(NO_REQUEST),
61 update_error_count_(0),
62 gethash_error_count_(0),
63 update_back_off_mult_(1),
64 gethash_back_off_mult_(1),
65 next_update_sec_(-1),
66 update_state_(FIRST_REQUEST),
67 initial_request_(true),
68 chunk_pending_to_write_(false),
69 notify_loop_(notify_loop),
70 client_key_(client_key),
71 wrapped_key_(wrapped_key) {
72 // Set the backoff multiplier fuzz to a random value between 0 and 1.
73 back_off_fuzz_ = static_cast<float>(rand_util::RandInt(1, INT_MAX)) / INT_MAX;
74
75 // The first update must happen between 0-5 minutes of start up.
76 next_update_sec_ = rand_util::RandInt(60, kSbTimerStartIntervalSec);
77}
78
79SafeBrowsingProtocolManager::~SafeBrowsingProtocolManager() {
initial.commit09911bf2008-07-26 23:55:2980 // Delete in-progress SafeBrowsing requests.
81 STLDeleteContainerPairFirstPointers(hash_requests_.begin(),
82 hash_requests_.end());
83 hash_requests_.clear();
84}
85
86// Public API used by the SafeBrowsingService ----------------------------------
87
88// We can only have one update or chunk request outstanding, but there may be
89// multiple GetHash requests pending since we don't want to serialize them and
90// slow down the user.
91void SafeBrowsingProtocolManager::GetFullHash(
92 SafeBrowsingService::SafeBrowsingCheck* check,
93 const std::vector<SBPrefix>& prefixes) {
94 // If we are in GetHash backoff, we need to check if we're past the next
95 // allowed time. If we are, we can proceed with the request. If not, we are
96 // required to return empty results (i.e. treat the page as safe).
97 if (gethash_error_count_ && Time::Now() <= next_gethash_time_) {
98 std::vector<SBFullHashResult> full_hashes;
[email protected]200abc32008-09-05 01:44:3399 sb_service_->HandleGetHashResults(check, full_hashes, false);
initial.commit09911bf2008-07-26 23:55:29100 return;
101 }
102
103 std::string url = StringPrintf(kSbGetHashUrl,
104 kSbClientName,
105 kSbClientMajorVersion,
106 kSbClientMinorVersion);
107 if (!client_key_.empty()) {
108 url.append("&wrkey=");
109 url.append(wrapped_key_);
110 }
111
112 GURL gethash_url(url);
113 URLFetcher* fetcher = new URLFetcher(gethash_url, URLFetcher::POST, this);
114 hash_requests_[fetcher] = check;
115
116 std::string get_hash;
117 SafeBrowsingProtocolParser parser;
118 parser.FormatGetHash(prefixes, &get_hash);
119
120 fetcher->set_load_flags(net::LOAD_DISABLE_CACHE);
121 fetcher->set_request_context(Profile::GetDefaultRequestContext());
[email protected]d36e3c8e2008-08-29 23:42:20122 fetcher->set_upload_data("text/plain", get_hash);
initial.commit09911bf2008-07-26 23:55:29123 fetcher->Start();
124}
125
126void SafeBrowsingProtocolManager::GetNextUpdate() {
127 if (initial_request_) {
128 if (client_key_.empty() || wrapped_key_.empty()) {
129 IssueKeyRequest();
130 return;
131 } else {
132 initial_request_ = false;
133 }
134 }
135
136 if (!request_.get())
137 IssueUpdateRequest();
138}
139
140// URLFetcher::Delegate implementation -----------------------------------------
141
142// All SafeBrowsing request responses are handled here.
143// TODO(paulg): Clarify with the SafeBrowsing team whether a failed parse of a
144// chunk should retry the download and parse of that chunk (and
145// what back off / how many times to try), and if that effects the
146// update back off. For now, a failed parse of the chunk means we
147// drop it. This isn't so bad because the next UPDATE_REQUEST we
148// do will report all the chunks we have. If that chunk is still
149// required, the SafeBrowsing servers will tell us to get it again.
150void SafeBrowsingProtocolManager::OnURLFetchComplete(
151 const URLFetcher* source,
152 const GURL& url,
153 const URLRequestStatus& status,
154 int response_code,
155 const ResponseCookies& cookies,
156 const std::string& data) {
157 scoped_ptr<const URLFetcher> fetcher;
158 bool parsed_ok = true;
159 bool must_back_off = false; // Reduce SafeBrowsing service query frequency.
160
161 HashRequests::iterator it = hash_requests_.find(source);
162 if (it != hash_requests_.end()) {
163 // GetHash response.
164 fetcher.reset(it->first);
165 SafeBrowsingService::SafeBrowsingCheck* check = it->second;
166 std::vector<SBFullHashResult> full_hashes;
[email protected]200abc32008-09-05 01:44:33167 bool can_cache = false;
initial.commit09911bf2008-07-26 23:55:29168 if (response_code == 200 || response_code == 204) {
[email protected]200abc32008-09-05 01:44:33169 can_cache = true;
initial.commit09911bf2008-07-26 23:55:29170 gethash_error_count_ = 0;
171 gethash_back_off_mult_ = 1;
172 bool re_key = false;
173 SafeBrowsingProtocolParser parser;
174 parsed_ok = parser.ParseGetHash(data.data(),
175 static_cast<int>(data.length()),
176 client_key_,
177 &re_key,
178 &full_hashes);
179 if (!parsed_ok) {
180 // If we fail to parse it, we must still inform the SafeBrowsingService
181 // so that it doesn't hold up the user's request indefinitely. Not sure
182 // what to do at that point though!
183 full_hashes.clear();
184 } else {
185 if (re_key)
186 HandleReKey();
187 }
188 } else if (response_code >= 300) {
189 HandleGetHashError();
190 SB_DLOG(INFO) << "SafeBrowsing GetHash request for: " << source->url()
191 << ", failed with error: " << response_code;
192 }
193
194 // Call back the SafeBrowsingService with full_hashes, even if there was a
195 // parse error or an error response code (in which case full_hashes will be
196 // empty). We can't block the user regardless of the error status.
[email protected]200abc32008-09-05 01:44:33197 sb_service_->HandleGetHashResults(check, full_hashes, can_cache);
initial.commit09911bf2008-07-26 23:55:29198
199 hash_requests_.erase(it);
200 } else {
201 // Update, chunk or key response.
202 DCHECK(source == request_.get());
203 fetcher.reset(request_.release());
204
205 if (response_code == 200) {
206 // We have data from the SafeBrowsing service.
207 parsed_ok = HandleServiceResponse(source->url(),
208 data.data(),
209 static_cast<int>(data.length()));
210 if (!parsed_ok) {
211 SB_DLOG(INFO) << "SafeBrowsing request for: " << source->url()
212 << "failed parse.";
213 }
214
215 if (request_type_ == CHUNK_REQUEST) {
216 if (parsed_ok) {
217 chunk_request_urls_.pop_front();
218 } else {
219 chunk_request_urls_.clear();
220 }
221 } else if (request_type_ == GETKEY_REQUEST && initial_request_) {
222 // This is the first request we've made this session. Now that we have
223 // the keys, do the regular update request.
224 initial_request_ = false;
225 GetNextUpdate();
226 return;
227 }
228 } else if (response_code >= 300) {
229 // The SafeBrowsing service error: back off.
230 must_back_off = true;
231 if (request_type_ == CHUNK_REQUEST)
232 chunk_request_urls_.clear();
233 SB_DLOG(INFO) << "SafeBrowsing request for: " << source->url()
234 << ", failed with error: " << response_code;
235 }
236 }
237
238 // Schedule a new update request if we've finished retrieving all the chunks
239 // from the previous update. We treat the update request and the chunk URLs it
240 // contains as an atomic unit as far as back off is concerned.
241 if (chunk_request_urls_.empty() &&
242 (request_type_ == CHUNK_REQUEST || request_type_ == UPDATE_REQUEST))
243 ScheduleNextUpdate(must_back_off);
244
245 // Get the next chunk if available.
246 IssueChunkRequest();
247}
248
249bool SafeBrowsingProtocolManager::HandleServiceResponse(const GURL& url,
250 const char* data,
251 int length) {
252 SafeBrowsingProtocolParser parser;
253
254 switch (request_type_) {
255 case UPDATE_REQUEST: {
256 int next_update_sec = -1;
257 bool re_key = false;
258 bool reset = false;
259 std::vector<SBChunkDelete>* chunk_deletes =
260 new std::vector<SBChunkDelete>;
261 std::vector<ChunkUrl> chunk_urls;
262 if (!parser.ParseUpdate(data, length, client_key_,
263 &next_update_sec, &re_key,
264 &reset, chunk_deletes, &chunk_urls)) {
265 delete chunk_deletes;
266 return false;
267 }
268
269 last_update_ = Time::Now();
270
271 if (update_state_ == FIRST_REQUEST)
272 update_state_ = SECOND_REQUEST;
273 else if (update_state_ == SECOND_REQUEST)
274 update_state_ = NORMAL_REQUEST;
275
276 // New time for the next update.
277 if (next_update_sec > 0) {
278 next_update_sec_ = next_update_sec;
279 } else if (update_state_ == SECOND_REQUEST) {
280 next_update_sec_ = rand_util::RandInt(15, 45) * 60;
281 }
282
283 // We need to request a new set of keys for MAC.
284 if (re_key)
285 HandleReKey();
286
287 // New chunks to download.
288 if (!chunk_urls.empty()) {
289 for (size_t i = 0; i < chunk_urls.size(); ++i)
290 chunk_request_urls_.push_back(chunk_urls[i]);
291 }
292
293 // Handle the case were the SafeBrowsing service tells us to dump our
294 // database.
295 if (reset) {
296 sb_service_->ResetDatabase();
297 return true;
298 }
299
300 // Chunks to delete from our storage.
301 if (!chunk_deletes->empty())
302 sb_service_->HandleChunkDelete(chunk_deletes);
303
304 break;
305 }
306 case CHUNK_REQUEST: {
307 // Find list name from url.
308 std::string url_path = url.ExtractFileName();
309 if (url_path.empty())
310 return false;
311
312 std::string::size_type pos = url_path.find_first_of('_');
313 if (pos == std::string::npos)
314 return false;
315
316 const ChunkUrl chunk_url = chunk_request_urls_.front();
317 DCHECK(url.spec().find(chunk_url.url) != std::string::npos);
318
319 bool re_key = false;
320 std::deque<SBChunk>* chunks = new std::deque<SBChunk>;
321 if (!parser.ParseChunk(data, length,
322 client_key_, chunk_url.mac,
323 &re_key, chunks)) {
324#ifndef NDEBUG
325 std::string data_str;
326 data_str.assign(data, length);
327 std::string encoded_chunk;
[email protected]a9bb6f692008-07-30 16:40:10328 net::Base64Encode(data, &encoded_chunk);
initial.commit09911bf2008-07-26 23:55:29329 SB_DLOG(INFO) << "ParseChunk error for chunk: " << chunk_url.url
330 << ", client_key: " << client_key_
331 << ", wrapped_key: " << wrapped_key_
332 << ", mac: " << chunk_url.mac
333 << ", Base64Encode(data): " << encoded_chunk
334 << ", length: " << length;
335#endif
336 safe_browsing_util::FreeChunks(chunks);
337 delete chunks;
338 return false;
339 }
340
341 if (re_key)
342 HandleReKey();
343
344 if (chunks->empty()) {
345 delete chunks;
346 } else {
347 chunk_pending_to_write_ = true;
348 std::string list_name(url_path, 0, pos);
349 sb_service_->HandleChunk(list_name, chunks);
350 }
351
352 break;
353 }
354 case GETKEY_REQUEST: {
355 std::string client_key, wrapped_key;
356 if (!parser.ParseNewKey(data, length, &client_key, &wrapped_key))
357 return false;
358
359 client_key_ = client_key;
360 wrapped_key_ = wrapped_key;
361 notify_loop_->PostTask(FROM_HERE, NewRunnableMethod(
362 sb_service_, &SafeBrowsingService::OnNewMacKeys, client_key_,
363 wrapped_key_));
364 break;
365 }
366
367 default:
368 return false;
369 }
370
371 return true;
372}
373
374void SafeBrowsingProtocolManager::Initialize() {
375 // Don't want to hit the safe browsing servers on build/chrome bots.
376 if (env_util::HasEnvironmentVariable(env_vars::kHeadless))
377 return;
378
379 ScheduleNextUpdate(false /* no back off */);
380}
381
382void SafeBrowsingProtocolManager::ScheduleNextUpdate(bool back_off) {
383 DCHECK(next_update_sec_ > 0);
384
[email protected]2d316662008-09-03 18:18:14385 // Unschedule any current timer.
386 update_timer_.Stop();
initial.commit09911bf2008-07-26 23:55:29387
388 // Reschedule with the new update.
389 const int next_update = GetNextUpdateTime(back_off);
[email protected]2d316662008-09-03 18:18:14390 update_timer_.Start(TimeDelta::FromMilliseconds(next_update), this,
391 &SafeBrowsingProtocolManager::GetNextUpdate);
initial.commit09911bf2008-07-26 23:55:29392}
393
394// According to section 5 of the SafeBrowsing protocol specification, we must
395// back off after a certain number of errors. We only change 'next_update_sec_'
396// when we receive a response from the SafeBrowsing service.
397int SafeBrowsingProtocolManager::GetNextUpdateTime(bool back_off) {
398 int next = next_update_sec_;
399 if (back_off) {
400 next = GetNextBackOffTime(&update_error_count_, &update_back_off_mult_);
401 } else {
402 // Successful response means error reset.
403 update_error_count_ = 0;
404 update_back_off_mult_ = 1;
405 }
406 return next * 1000; // milliseconds
407}
408
409int SafeBrowsingProtocolManager::GetNextBackOffTime(int* error_count,
410 int* multiplier) {
411 DCHECK(multiplier && error_count);
412 (*error_count)++;
413 if (*error_count > 1 && *error_count < 6) {
414 int next = static_cast<int>(*multiplier * (1 + back_off_fuzz_) * 30 * 60);
415 *multiplier *= 2;
416 if (*multiplier > kSbMaxBackOff)
417 *multiplier = kSbMaxBackOff;
418 return next;
419 }
420
421 if (*error_count >= 6)
422 return 60 * 60 * 8; // 8 hours
423
424 return 60; // 1 minute
425}
426
427// This request requires getting a list of all the chunks for each list from the
428// database asynchronously. The request will be issued when we're called back in
429// OnGetChunksComplete.
430// TODO(paulg): We should get this at start up and maintain a ChunkRange cache
431// to avoid hitting the database with each update request. On the
432// otherhand, this request will only occur ~20-30 minutes so there
433// isn't that much overhead. Measure!
434void SafeBrowsingProtocolManager::IssueUpdateRequest() {
435 request_type_ = UPDATE_REQUEST;
436 sb_service_->GetAllChunks();
437}
438
439void SafeBrowsingProtocolManager::IssueChunkRequest() {
440 // We are only allowed to have one request outstanding at any time. Also,
441 // don't get the next url until the previous one has been written to disk so
442 // that we don't use too much memory.
443 if (request_.get() || chunk_request_urls_.empty() || chunk_pending_to_write_)
444 return;
445
446 ChunkUrl next_chunk = chunk_request_urls_.front();
447 DCHECK(!next_chunk.url.empty());
448 if (!StartsWithASCII(next_chunk.url, "http://", false) &&
449 !StartsWithASCII(next_chunk.url, "https://", false))
450 next_chunk.url = "http://" + next_chunk.url;
451 GURL chunk_url(next_chunk.url);
452 request_type_ = CHUNK_REQUEST;
453 request_.reset(new URLFetcher(chunk_url, URLFetcher::GET, this));
454 request_->set_load_flags(net::LOAD_DISABLE_CACHE);
455 request_->set_request_context(Profile::GetDefaultRequestContext());
456 request_->Start();
457}
458
459void SafeBrowsingProtocolManager::IssueKeyRequest() {
460 GURL key_url(StringPrintf(kSbNewKeyUrl,
461 kSbClientName,
462 kSbClientMajorVersion,
463 kSbClientMinorVersion));
464 request_type_ = GETKEY_REQUEST;
465 request_.reset(new URLFetcher(key_url, URLFetcher::GET, this));
466 request_->set_load_flags(net::LOAD_DISABLE_CACHE);
467 request_->set_request_context(Profile::GetDefaultRequestContext());
468 request_->Start();
469}
470
471void SafeBrowsingProtocolManager::OnGetChunksComplete(
472 const std::vector<SBListChunkRanges>& lists, bool database_error) {
473 DCHECK(request_type_ == UPDATE_REQUEST);
474
475 if (database_error) {
476 ScheduleNextUpdate(false);
477 return;
478 }
479
480 const bool use_mac = !client_key_.empty();
481
482 // Format our stored chunks:
483 std::string list_data;
484 bool found_malware = false;
485 bool found_phishing = false;
486 for (size_t i = 0; i < lists.size(); ++i) {
487 list_data.append(FormatList(lists[i], use_mac));
488 if (lists[i].name == "goog-phish-shavar")
489 found_phishing = true;
490
491 if (lists[i].name == "goog-malware-shavar")
492 found_malware = true;
493 }
494
495 // If we have an empty database, let the server know we want data for these
496 // lists.
497 if (!found_phishing)
498 list_data.append(FormatList(SBListChunkRanges("goog-phish-shavar"),
499 use_mac));
500
501 if (!found_malware)
502 list_data.append(FormatList(SBListChunkRanges("goog-malware-shavar"),
503 use_mac));
504
505 std::string url = StringPrintf(kSbUpdateUrl,
506 kSbClientName,
507 kSbClientMajorVersion,
508 kSbClientMinorVersion);
509 if (use_mac) {
510 url.append("&wrkey=");
511 url.append(wrapped_key_);
512 }
513
514 GURL update_url(url);
515 request_.reset(new URLFetcher(update_url, URLFetcher::POST, this));
516 request_->set_load_flags(net::LOAD_DISABLE_CACHE);
517 request_->set_request_context(Profile::GetDefaultRequestContext());
518 request_->set_upload_data("text/plain", list_data);
519 request_->Start();
520}
521
522void SafeBrowsingProtocolManager::OnChunkInserted() {
523 chunk_pending_to_write_ = false;
524
525 if (chunk_request_urls_.empty()) {
526 UMA_HISTOGRAM_LONG_TIMES(L"SB.Update", Time::Now() - last_update_);
527 } else {
528 IssueChunkRequest();
529 }
530}
531
532// static
533std::string SafeBrowsingProtocolManager::FormatList(
534 const SBListChunkRanges& list, bool use_mac) {
535 std::string formatted_results;
536 formatted_results.append(list.name);
537 formatted_results.append(";");
538 if (!list.adds.empty()) {
539 formatted_results.append("a:" + list.adds);
540 if (!list.subs.empty() || use_mac)
541 formatted_results.append(":");
542 }
543 if (!list.subs.empty()) {
544 formatted_results.append("s:" + list.subs);
545 if (use_mac)
546 formatted_results.append(":");
547 }
548 if (use_mac)
549 formatted_results.append("mac");
550 formatted_results.append("\n");
551
552 return formatted_results;
553}
554
555void SafeBrowsingProtocolManager::HandleReKey() {
556 client_key_.clear();
557 wrapped_key_.clear();
558 IssueKeyRequest();
559}
560
561void SafeBrowsingProtocolManager::HandleGetHashError() {
562 int next = GetNextBackOffTime(&gethash_error_count_, &gethash_back_off_mult_);
563 next_gethash_time_ = Time::Now() + TimeDelta::FromSeconds(next);
564}
license.botbf09a502008-08-24 00:55:55565