blob: 80ac3a1f642ff5f350d1deb88885068a38eb7d0f [file] [log] [blame]
license.botbf09a502008-08-24 00:55:551// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit09911bf2008-07-26 23:55:294
5#include "chrome/browser/safe_browsing/protocol_manager.h"
6
7#include "base/histogram.h"
8#include "base/logging.h"
9#include "base/message_loop.h"
10#include "base/string_util.h"
11#include "base/task.h"
12#include "base/timer.h"
13#include "chrome/browser/profile.h"
14#include "chrome/browser/safe_browsing/protocol_parser.h"
15#include "chrome/browser/safe_browsing/safe_browsing_database.h"
16#include "chrome/browser/safe_browsing/safe_browsing_service.h"
17#include "chrome/common/env_util.h"
18#include "chrome/common/env_vars.h"
19#include "chrome/common/rand_util.h"
20#include "chrome/common/stl_util-inl.h"
21#include "net/base/base64.h"
22#include "net/base/load_flags.h"
23
24
25// Maximum time, in seconds, from start up before we must issue an update query.
26static const int kSbTimerStartIntervalSec = 300;
27
28// Update URL for querying about the latest set of chunk updates.
29static const char* const kSbUpdateUrl =
30 "http://safebrowsing.clients.google.com/safebrowsing/downloads?client=%s&appver=%d.%d&pver=2.1";
31
32// GetHash request URL for retrieving full hashes.
33static const char* const kSbGetHashUrl =
34 "http://safebrowsing.clients.google.com/safebrowsing/gethash?client=%s&appver=%d.%d&pver=2.1";
35
36// New MAC client key requests URL.
37static const char* const kSbNewKeyUrl =
38 "https://sb-ssl.google.com/safebrowsing/newkey?client=%s&appver=%d.%d&pver=2.1";
39
40// TODO(paulg): Change these values when we get close to launch.
41static const char* const kSbClientName = "googleclient";
42static const int kSbClientMajorVersion = 1;
43static const int kSbClientMinorVersion = 0;
44
45// Maximum back off multiplier.
46static const int kSbMaxBackOff = 8;
47
48
49// Periodic update task --------------------------------------------------------
50class SafeBrowsingProtocolUpdateTask : public Task {
51 public:
52 explicit SafeBrowsingProtocolUpdateTask(SafeBrowsingProtocolManager* manager)
53 : manager_(manager) {
54 }
55
56 void Run() {
57 manager_->GetNextUpdate();
58 }
59
60 private:
61 SafeBrowsingProtocolManager* manager_;
62};
63
64
65// SafeBrowsingProtocolManager implementation ----------------------------------
66
67SafeBrowsingProtocolManager::SafeBrowsingProtocolManager(
68 SafeBrowsingService* sb_service,
69 MessageLoop* notify_loop,
70 const std::string& client_key,
71 const std::string& wrapped_key)
72 : sb_service_(sb_service),
73 request_type_(NO_REQUEST),
74 update_error_count_(0),
75 gethash_error_count_(0),
76 update_back_off_mult_(1),
77 gethash_back_off_mult_(1),
78 next_update_sec_(-1),
79 update_state_(FIRST_REQUEST),
80 initial_request_(true),
81 chunk_pending_to_write_(false),
82 notify_loop_(notify_loop),
83 client_key_(client_key),
84 wrapped_key_(wrapped_key) {
85 // Set the backoff multiplier fuzz to a random value between 0 and 1.
86 back_off_fuzz_ = static_cast<float>(rand_util::RandInt(1, INT_MAX)) / INT_MAX;
87
88 // The first update must happen between 0-5 minutes of start up.
89 next_update_sec_ = rand_util::RandInt(60, kSbTimerStartIntervalSec);
90}
91
92SafeBrowsingProtocolManager::~SafeBrowsingProtocolManager() {
93 if (update_timer_.get())
94 MessageLoop::current()->timer_manager()->StopTimer(update_timer_.get());
95
96 // Delete in-progress SafeBrowsing requests.
97 STLDeleteContainerPairFirstPointers(hash_requests_.begin(),
98 hash_requests_.end());
99 hash_requests_.clear();
100}
101
102// Public API used by the SafeBrowsingService ----------------------------------
103
104// We can only have one update or chunk request outstanding, but there may be
105// multiple GetHash requests pending since we don't want to serialize them and
106// slow down the user.
107void SafeBrowsingProtocolManager::GetFullHash(
108 SafeBrowsingService::SafeBrowsingCheck* check,
109 const std::vector<SBPrefix>& prefixes) {
110 // If we are in GetHash backoff, we need to check if we're past the next
111 // allowed time. If we are, we can proceed with the request. If not, we are
112 // required to return empty results (i.e. treat the page as safe).
113 if (gethash_error_count_ && Time::Now() <= next_gethash_time_) {
114 std::vector<SBFullHashResult> full_hashes;
115 sb_service_->HandleGetHashResults(check, full_hashes);
116 return;
117 }
118
119 std::string url = StringPrintf(kSbGetHashUrl,
120 kSbClientName,
121 kSbClientMajorVersion,
122 kSbClientMinorVersion);
123 if (!client_key_.empty()) {
124 url.append("&wrkey=");
125 url.append(wrapped_key_);
126 }
127
128 GURL gethash_url(url);
129 URLFetcher* fetcher = new URLFetcher(gethash_url, URLFetcher::POST, this);
130 hash_requests_[fetcher] = check;
131
132 std::string get_hash;
133 SafeBrowsingProtocolParser parser;
134 parser.FormatGetHash(prefixes, &get_hash);
135
136 fetcher->set_load_flags(net::LOAD_DISABLE_CACHE);
137 fetcher->set_request_context(Profile::GetDefaultRequestContext());
[email protected]d36e3c8e2008-08-29 23:42:20138 fetcher->set_upload_data("text/plain", get_hash);
initial.commit09911bf2008-07-26 23:55:29139 fetcher->Start();
140}
141
142void SafeBrowsingProtocolManager::GetNextUpdate() {
143 if (initial_request_) {
144 if (client_key_.empty() || wrapped_key_.empty()) {
145 IssueKeyRequest();
146 return;
147 } else {
148 initial_request_ = false;
149 }
150 }
151
152 if (!request_.get())
153 IssueUpdateRequest();
154}
155
156// URLFetcher::Delegate implementation -----------------------------------------
157
158// All SafeBrowsing request responses are handled here.
159// TODO(paulg): Clarify with the SafeBrowsing team whether a failed parse of a
160// chunk should retry the download and parse of that chunk (and
161// what back off / how many times to try), and if that effects the
162// update back off. For now, a failed parse of the chunk means we
163// drop it. This isn't so bad because the next UPDATE_REQUEST we
164// do will report all the chunks we have. If that chunk is still
165// required, the SafeBrowsing servers will tell us to get it again.
166void SafeBrowsingProtocolManager::OnURLFetchComplete(
167 const URLFetcher* source,
168 const GURL& url,
169 const URLRequestStatus& status,
170 int response_code,
171 const ResponseCookies& cookies,
172 const std::string& data) {
173 scoped_ptr<const URLFetcher> fetcher;
174 bool parsed_ok = true;
175 bool must_back_off = false; // Reduce SafeBrowsing service query frequency.
176
177 HashRequests::iterator it = hash_requests_.find(source);
178 if (it != hash_requests_.end()) {
179 // GetHash response.
180 fetcher.reset(it->first);
181 SafeBrowsingService::SafeBrowsingCheck* check = it->second;
182 std::vector<SBFullHashResult> full_hashes;
183 if (response_code == 200 || response_code == 204) {
184 gethash_error_count_ = 0;
185 gethash_back_off_mult_ = 1;
186 bool re_key = false;
187 SafeBrowsingProtocolParser parser;
188 parsed_ok = parser.ParseGetHash(data.data(),
189 static_cast<int>(data.length()),
190 client_key_,
191 &re_key,
192 &full_hashes);
193 if (!parsed_ok) {
194 // If we fail to parse it, we must still inform the SafeBrowsingService
195 // so that it doesn't hold up the user's request indefinitely. Not sure
196 // what to do at that point though!
197 full_hashes.clear();
198 } else {
199 if (re_key)
200 HandleReKey();
201 }
202 } else if (response_code >= 300) {
203 HandleGetHashError();
204 SB_DLOG(INFO) << "SafeBrowsing GetHash request for: " << source->url()
205 << ", failed with error: " << response_code;
206 }
207
208 // Call back the SafeBrowsingService with full_hashes, even if there was a
209 // parse error or an error response code (in which case full_hashes will be
210 // empty). We can't block the user regardless of the error status.
211 sb_service_->HandleGetHashResults(check, full_hashes);
212
213 hash_requests_.erase(it);
214 } else {
215 // Update, chunk or key response.
216 DCHECK(source == request_.get());
217 fetcher.reset(request_.release());
218
219 if (response_code == 200) {
220 // We have data from the SafeBrowsing service.
221 parsed_ok = HandleServiceResponse(source->url(),
222 data.data(),
223 static_cast<int>(data.length()));
224 if (!parsed_ok) {
225 SB_DLOG(INFO) << "SafeBrowsing request for: " << source->url()
226 << "failed parse.";
227 }
228
229 if (request_type_ == CHUNK_REQUEST) {
230 if (parsed_ok) {
231 chunk_request_urls_.pop_front();
232 } else {
233 chunk_request_urls_.clear();
234 }
235 } else if (request_type_ == GETKEY_REQUEST && initial_request_) {
236 // This is the first request we've made this session. Now that we have
237 // the keys, do the regular update request.
238 initial_request_ = false;
239 GetNextUpdate();
240 return;
241 }
242 } else if (response_code >= 300) {
243 // The SafeBrowsing service error: back off.
244 must_back_off = true;
245 if (request_type_ == CHUNK_REQUEST)
246 chunk_request_urls_.clear();
247 SB_DLOG(INFO) << "SafeBrowsing request for: " << source->url()
248 << ", failed with error: " << response_code;
249 }
250 }
251
252 // Schedule a new update request if we've finished retrieving all the chunks
253 // from the previous update. We treat the update request and the chunk URLs it
254 // contains as an atomic unit as far as back off is concerned.
255 if (chunk_request_urls_.empty() &&
256 (request_type_ == CHUNK_REQUEST || request_type_ == UPDATE_REQUEST))
257 ScheduleNextUpdate(must_back_off);
258
259 // Get the next chunk if available.
260 IssueChunkRequest();
261}
262
263bool SafeBrowsingProtocolManager::HandleServiceResponse(const GURL& url,
264 const char* data,
265 int length) {
266 SafeBrowsingProtocolParser parser;
267
268 switch (request_type_) {
269 case UPDATE_REQUEST: {
270 int next_update_sec = -1;
271 bool re_key = false;
272 bool reset = false;
273 std::vector<SBChunkDelete>* chunk_deletes =
274 new std::vector<SBChunkDelete>;
275 std::vector<ChunkUrl> chunk_urls;
276 if (!parser.ParseUpdate(data, length, client_key_,
277 &next_update_sec, &re_key,
278 &reset, chunk_deletes, &chunk_urls)) {
279 delete chunk_deletes;
280 return false;
281 }
282
283 last_update_ = Time::Now();
284
285 if (update_state_ == FIRST_REQUEST)
286 update_state_ = SECOND_REQUEST;
287 else if (update_state_ == SECOND_REQUEST)
288 update_state_ = NORMAL_REQUEST;
289
290 // New time for the next update.
291 if (next_update_sec > 0) {
292 next_update_sec_ = next_update_sec;
293 } else if (update_state_ == SECOND_REQUEST) {
294 next_update_sec_ = rand_util::RandInt(15, 45) * 60;
295 }
296
297 // We need to request a new set of keys for MAC.
298 if (re_key)
299 HandleReKey();
300
301 // New chunks to download.
302 if (!chunk_urls.empty()) {
303 for (size_t i = 0; i < chunk_urls.size(); ++i)
304 chunk_request_urls_.push_back(chunk_urls[i]);
305 }
306
307 // Handle the case were the SafeBrowsing service tells us to dump our
308 // database.
309 if (reset) {
310 sb_service_->ResetDatabase();
311 return true;
312 }
313
314 // Chunks to delete from our storage.
315 if (!chunk_deletes->empty())
316 sb_service_->HandleChunkDelete(chunk_deletes);
317
318 break;
319 }
320 case CHUNK_REQUEST: {
321 // Find list name from url.
322 std::string url_path = url.ExtractFileName();
323 if (url_path.empty())
324 return false;
325
326 std::string::size_type pos = url_path.find_first_of('_');
327 if (pos == std::string::npos)
328 return false;
329
330 const ChunkUrl chunk_url = chunk_request_urls_.front();
331 DCHECK(url.spec().find(chunk_url.url) != std::string::npos);
332
333 bool re_key = false;
334 std::deque<SBChunk>* chunks = new std::deque<SBChunk>;
335 if (!parser.ParseChunk(data, length,
336 client_key_, chunk_url.mac,
337 &re_key, chunks)) {
338#ifndef NDEBUG
339 std::string data_str;
340 data_str.assign(data, length);
341 std::string encoded_chunk;
[email protected]a9bb6f692008-07-30 16:40:10342 net::Base64Encode(data, &encoded_chunk);
initial.commit09911bf2008-07-26 23:55:29343 SB_DLOG(INFO) << "ParseChunk error for chunk: " << chunk_url.url
344 << ", client_key: " << client_key_
345 << ", wrapped_key: " << wrapped_key_
346 << ", mac: " << chunk_url.mac
347 << ", Base64Encode(data): " << encoded_chunk
348 << ", length: " << length;
349#endif
350 safe_browsing_util::FreeChunks(chunks);
351 delete chunks;
352 return false;
353 }
354
355 if (re_key)
356 HandleReKey();
357
358 if (chunks->empty()) {
359 delete chunks;
360 } else {
361 chunk_pending_to_write_ = true;
362 std::string list_name(url_path, 0, pos);
363 sb_service_->HandleChunk(list_name, chunks);
364 }
365
366 break;
367 }
368 case GETKEY_REQUEST: {
369 std::string client_key, wrapped_key;
370 if (!parser.ParseNewKey(data, length, &client_key, &wrapped_key))
371 return false;
372
373 client_key_ = client_key;
374 wrapped_key_ = wrapped_key;
375 notify_loop_->PostTask(FROM_HERE, NewRunnableMethod(
376 sb_service_, &SafeBrowsingService::OnNewMacKeys, client_key_,
377 wrapped_key_));
378 break;
379 }
380
381 default:
382 return false;
383 }
384
385 return true;
386}
387
388void SafeBrowsingProtocolManager::Initialize() {
389 // Don't want to hit the safe browsing servers on build/chrome bots.
390 if (env_util::HasEnvironmentVariable(env_vars::kHeadless))
391 return;
392
393 ScheduleNextUpdate(false /* no back off */);
394}
395
396void SafeBrowsingProtocolManager::ScheduleNextUpdate(bool back_off) {
397 DCHECK(next_update_sec_ > 0);
398
399 if (!update_task_.get())
400 update_task_.reset(new SafeBrowsingProtocolUpdateTask(this));
401
402 // Unschedule any current timer & task.
403 TimerManager* tm = MessageLoop::current()->timer_manager();
404 if (update_timer_.get())
405 tm->StopTimer(update_timer_.get());
406
407 // Reschedule with the new update.
408 const int next_update = GetNextUpdateTime(back_off);
409 update_timer_.reset(tm->StartTimer(next_update, update_task_.get(), false));
410}
411
412// According to section 5 of the SafeBrowsing protocol specification, we must
413// back off after a certain number of errors. We only change 'next_update_sec_'
414// when we receive a response from the SafeBrowsing service.
415int SafeBrowsingProtocolManager::GetNextUpdateTime(bool back_off) {
416 int next = next_update_sec_;
417 if (back_off) {
418 next = GetNextBackOffTime(&update_error_count_, &update_back_off_mult_);
419 } else {
420 // Successful response means error reset.
421 update_error_count_ = 0;
422 update_back_off_mult_ = 1;
423 }
424 return next * 1000; // milliseconds
425}
426
427int SafeBrowsingProtocolManager::GetNextBackOffTime(int* error_count,
428 int* multiplier) {
429 DCHECK(multiplier && error_count);
430 (*error_count)++;
431 if (*error_count > 1 && *error_count < 6) {
432 int next = static_cast<int>(*multiplier * (1 + back_off_fuzz_) * 30 * 60);
433 *multiplier *= 2;
434 if (*multiplier > kSbMaxBackOff)
435 *multiplier = kSbMaxBackOff;
436 return next;
437 }
438
439 if (*error_count >= 6)
440 return 60 * 60 * 8; // 8 hours
441
442 return 60; // 1 minute
443}
444
445// This request requires getting a list of all the chunks for each list from the
446// database asynchronously. The request will be issued when we're called back in
447// OnGetChunksComplete.
448// TODO(paulg): We should get this at start up and maintain a ChunkRange cache
449// to avoid hitting the database with each update request. On the
450// otherhand, this request will only occur ~20-30 minutes so there
451// isn't that much overhead. Measure!
452void SafeBrowsingProtocolManager::IssueUpdateRequest() {
453 request_type_ = UPDATE_REQUEST;
454 sb_service_->GetAllChunks();
455}
456
457void SafeBrowsingProtocolManager::IssueChunkRequest() {
458 // We are only allowed to have one request outstanding at any time. Also,
459 // don't get the next url until the previous one has been written to disk so
460 // that we don't use too much memory.
461 if (request_.get() || chunk_request_urls_.empty() || chunk_pending_to_write_)
462 return;
463
464 ChunkUrl next_chunk = chunk_request_urls_.front();
465 DCHECK(!next_chunk.url.empty());
466 if (!StartsWithASCII(next_chunk.url, "http://", false) &&
467 !StartsWithASCII(next_chunk.url, "https://", false))
468 next_chunk.url = "http://" + next_chunk.url;
469 GURL chunk_url(next_chunk.url);
470 request_type_ = CHUNK_REQUEST;
471 request_.reset(new URLFetcher(chunk_url, URLFetcher::GET, this));
472 request_->set_load_flags(net::LOAD_DISABLE_CACHE);
473 request_->set_request_context(Profile::GetDefaultRequestContext());
474 request_->Start();
475}
476
477void SafeBrowsingProtocolManager::IssueKeyRequest() {
478 GURL key_url(StringPrintf(kSbNewKeyUrl,
479 kSbClientName,
480 kSbClientMajorVersion,
481 kSbClientMinorVersion));
482 request_type_ = GETKEY_REQUEST;
483 request_.reset(new URLFetcher(key_url, URLFetcher::GET, this));
484 request_->set_load_flags(net::LOAD_DISABLE_CACHE);
485 request_->set_request_context(Profile::GetDefaultRequestContext());
486 request_->Start();
487}
488
489void SafeBrowsingProtocolManager::OnGetChunksComplete(
490 const std::vector<SBListChunkRanges>& lists, bool database_error) {
491 DCHECK(request_type_ == UPDATE_REQUEST);
492
493 if (database_error) {
494 ScheduleNextUpdate(false);
495 return;
496 }
497
498 const bool use_mac = !client_key_.empty();
499
500 // Format our stored chunks:
501 std::string list_data;
502 bool found_malware = false;
503 bool found_phishing = false;
504 for (size_t i = 0; i < lists.size(); ++i) {
505 list_data.append(FormatList(lists[i], use_mac));
506 if (lists[i].name == "goog-phish-shavar")
507 found_phishing = true;
508
509 if (lists[i].name == "goog-malware-shavar")
510 found_malware = true;
511 }
512
513 // If we have an empty database, let the server know we want data for these
514 // lists.
515 if (!found_phishing)
516 list_data.append(FormatList(SBListChunkRanges("goog-phish-shavar"),
517 use_mac));
518
519 if (!found_malware)
520 list_data.append(FormatList(SBListChunkRanges("goog-malware-shavar"),
521 use_mac));
522
523 std::string url = StringPrintf(kSbUpdateUrl,
524 kSbClientName,
525 kSbClientMajorVersion,
526 kSbClientMinorVersion);
527 if (use_mac) {
528 url.append("&wrkey=");
529 url.append(wrapped_key_);
530 }
531
532 GURL update_url(url);
533 request_.reset(new URLFetcher(update_url, URLFetcher::POST, this));
534 request_->set_load_flags(net::LOAD_DISABLE_CACHE);
535 request_->set_request_context(Profile::GetDefaultRequestContext());
536 request_->set_upload_data("text/plain", list_data);
537 request_->Start();
538}
539
540void SafeBrowsingProtocolManager::OnChunkInserted() {
541 chunk_pending_to_write_ = false;
542
543 if (chunk_request_urls_.empty()) {
544 UMA_HISTOGRAM_LONG_TIMES(L"SB.Update", Time::Now() - last_update_);
545 } else {
546 IssueChunkRequest();
547 }
548}
549
550// static
551std::string SafeBrowsingProtocolManager::FormatList(
552 const SBListChunkRanges& list, bool use_mac) {
553 std::string formatted_results;
554 formatted_results.append(list.name);
555 formatted_results.append(";");
556 if (!list.adds.empty()) {
557 formatted_results.append("a:" + list.adds);
558 if (!list.subs.empty() || use_mac)
559 formatted_results.append(":");
560 }
561 if (!list.subs.empty()) {
562 formatted_results.append("s:" + list.subs);
563 if (use_mac)
564 formatted_results.append(":");
565 }
566 if (use_mac)
567 formatted_results.append("mac");
568 formatted_results.append("\n");
569
570 return formatted_results;
571}
572
573void SafeBrowsingProtocolManager::HandleReKey() {
574 client_key_.clear();
575 wrapped_key_.clear();
576 IssueKeyRequest();
577}
578
579void SafeBrowsingProtocolManager::HandleGetHashError() {
580 int next = GetNextBackOffTime(&gethash_error_count_, &gethash_back_off_mult_);
581 next_gethash_time_ = Time::Now() + TimeDelta::FromSeconds(next);
582}
license.botbf09a502008-08-24 00:55:55583