blob: 20a4a00a701055e9a4cdbdf544ec7da113a021dc [file] [log] [blame]
license.botbf09a502008-08-24 00:55:551// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit586acc5fe2008-07-26 22:42:524
5// Portions of this code based on Mozilla:
6// (netwerk/cookie/src/nsCookieService.cpp)
7/* ***** BEGIN LICENSE BLOCK *****
8 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
9 *
10 * The contents of this file are subject to the Mozilla Public License Version
11 * 1.1 (the "License"); you may not use this file except in compliance with
12 * the License. You may obtain a copy of the License at
13 * http://www.mozilla.org/MPL/
14 *
15 * Software distributed under the License is distributed on an "AS IS" basis,
16 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
17 * for the specific language governing rights and limitations under the
18 * License.
19 *
20 * The Original Code is mozilla.org code.
21 *
22 * The Initial Developer of the Original Code is
23 * Netscape Communications Corporation.
24 * Portions created by the Initial Developer are Copyright (C) 2003
25 * the Initial Developer. All Rights Reserved.
26 *
27 * Contributor(s):
28 * Daniel Witte ([email protected])
29 * Michiel van Leeuwen ([email protected])
30 *
31 * Alternatively, the contents of this file may be used under the terms of
32 * either the GNU General Public License Version 2 or later (the "GPL"), or
33 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
34 * in which case the provisions of the GPL or the LGPL are applicable instead
35 * of those above. If you wish to allow use of your version of this file only
36 * under the terms of either the GPL or the LGPL, and not to allow others to
37 * use your version of this file under the terms of the MPL, indicate your
38 * decision by deleting the provisions above and replace them with the notice
39 * and other provisions required by the GPL or the LGPL. If you do not delete
40 * the provisions above, a recipient may use your version of this file under
41 * the terms of any one of the MPL, the GPL or the LGPL.
42 *
43 * ***** END LICENSE BLOCK ***** */
44
45#include "net/base/cookie_monster.h"
46
47#include <algorithm>
48
49#include "base/basictypes.h"
50#include "base/logging.h"
51#include "base/scoped_ptr.h"
52#include "base/string_tokenizer.h"
53#include "base/string_util.h"
54#include "googleurl/src/gurl.h"
55#include "googleurl/src/url_canon.h"
56#include "net/base/net_util.h"
57#include "net/base/registry_controlled_domain.h"
58
59// #define COOKIE_LOGGING_ENABLED
60#ifdef COOKIE_LOGGING_ENABLED
61#define COOKIE_DLOG(severity) DLOG_IF(INFO, 1)
62#else
63#define COOKIE_DLOG(severity) DLOG_IF(INFO, 0)
64#endif
65
[email protected]8ac1a752008-07-31 19:40:3766namespace net {
67
68// static
69bool CookieMonster::enable_file_scheme_ = false;
initial.commit586acc5fe2008-07-26 22:42:5270
71// static
72void CookieMonster::EnableFileScheme() {
73 enable_file_scheme_ = true;
74}
75
76CookieMonster::CookieMonster()
77 : initialized_(false),
78 store_(NULL) {
79}
80
81CookieMonster::CookieMonster(PersistentCookieStore* store)
82 : initialized_(false),
83 store_(store) {
84}
85
86CookieMonster::~CookieMonster() {
87 DeleteAll(false);
88}
89
90void CookieMonster::InitStore() {
91 DCHECK(store_) << "Store must exist to initialize";
92
93 // Initialize the store and sync in any saved persistent cookies. We don't
94 // care if it's expired, insert it so it can be garbage collected, removed,
95 // and sync'd.
96 std::vector<KeyedCanonicalCookie> cookies;
97 store_->Load(&cookies);
98 for (std::vector<KeyedCanonicalCookie>::const_iterator it = cookies.begin();
99 it != cookies.end(); ++it) {
100 InternalInsertCookie(it->first, it->second, false);
101 }
102}
103
104// The system resolution is not high enough, so we can have multiple
105// set cookies that result in the same system time. When this happens, we
106// increment by one Time unit. Let's hope computers don't get too fast.
107Time CookieMonster::CurrentTime() {
108 return std::max(Time::Now(),
109 Time::FromInternalValue(last_time_seen_.ToInternalValue() + 1));
110}
111
112// Parse a cookie expiration time. We try to be lenient, but we need to
113// assume some order to distinguish the fields. The basic rules:
114// - The month name must be present and prefix the first 3 letters of the
115// full month name (jan for January, jun for June).
116// - If the year is <= 2 digits, it must occur after the day of month.
117// - The time must be of the format hh:mm:ss.
118// An average cookie expiration will look something like this:
119// Sat, 15-Apr-17 21:01:22 GMT
120Time CookieMonster::ParseCookieTime(const std::string& time_string) {
121 static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun",
122 "jul", "aug", "sep", "oct", "nov", "dec" };
123 static const int kMonthsLen = arraysize(kMonths);
124 // We want to be pretty liberal, and support most non-ascii and non-digit
125 // characters as a delimiter. We can't treat : as a delimiter, because it
126 // is the delimiter for hh:mm:ss, and we want to keep this field together.
127 // We make sure to include - and +, since they could prefix numbers.
128 // If the cookie attribute came in in quotes (ex expires="XXX"), the quotes
129 // will be preserved, and we will get them here. So we make sure to include
130 // quote characters, and also \ for anything that was internally escaped.
131 static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{|}~";
132
133 Time::Exploded exploded = {0};
134
135 StringTokenizer tokenizer(time_string, kDelimiters);
136
137 bool found_day_of_month = false;
138 bool found_month = false;
139 bool found_time = false;
140 bool found_year = false;
141
142 while (tokenizer.GetNext()) {
143 const std::string token = tokenizer.token();
144 DCHECK(!token.empty());
145 bool numerical = IsAsciiDigit(token[0]);
146
147 // String field
148 if (!numerical) {
149 if (!found_month) {
150 for (int i = 0; i < kMonthsLen; ++i) {
151 // Match prefix, so we could match January, etc
152 if (StrNCaseCmp(token.c_str(), kMonths[i], 3) == 0) {
153 exploded.month = i + 1;
154 found_month = true;
155 break;
156 }
157 }
158 } else {
159 // If we've gotten here, it means we've already found and parsed our
160 // month, and we have another string, which we would expect to be the
161 // the time zone name. According to the RFC and my experiments with
162 // how sites format their expirations, we don't have much of a reason
163 // to support timezones. We don't want to ever barf on user input,
164 // but this DCHECK should pass for well-formed data.
165 // DCHECK(token == "GMT");
166 }
167 // Numeric field w/ a colon
168 } else if (token.find(':') != std::string::npos) {
169 if (!found_time &&
[email protected]d862fd92008-08-21 18:15:35170#ifdef COMPILER_MSVC
171 sscanf_s(
172#else
173 sscanf(
174#endif
175 token.c_str(), "%2u:%2u:%2u", &exploded.hour,
176 &exploded.minute, &exploded.second) == 3) {
initial.commit586acc5fe2008-07-26 22:42:52177 found_time = true;
178 } else {
179 // We should only ever encounter one time-like thing. If we're here,
180 // it means we've found a second, which shouldn't happen. We keep
181 // the first. This check should be ok for well-formed input:
182 // NOTREACHED();
183 }
184 // Numeric field
185 } else {
186 // Overflow with atoi() is unspecified, so we enforce a max length.
187 if (!found_day_of_month && token.length() <= 2) {
188 exploded.day_of_month = atoi(token.c_str());
189 found_day_of_month = true;
190 } else if (!found_year && token.length() <= 5) {
191 exploded.year = atoi(token.c_str());
192 found_year = true;
193 } else {
194 // If we're here, it means we've either found an extra numeric field,
195 // or a numeric field which was too long. For well-formed input, the
196 // following check would be reasonable:
197 // NOTREACHED();
198 }
199 }
200 }
201
202 if (!found_day_of_month || !found_month || !found_time || !found_year) {
203 // We didn't find all of the fields we need. For well-formed input, the
204 // following check would be reasonable:
205 // NOTREACHED() << "Cookie parse expiration failed: " << time_string;
206 return Time();
207 }
208
209 // Normalize the year to expand abbreviated years to the full year.
210 if (exploded.year >= 69 && exploded.year <= 99)
211 exploded.year += 1900;
212 if (exploded.year >= 0 && exploded.year <= 68)
213 exploded.year += 2000;
214
215 // If our values are within their correct ranges, we got our time.
216 if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 &&
217 exploded.month >= 1 && exploded.month <= 12 &&
218 exploded.year >= 1601 && exploded.year <= 30827 &&
219 exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) {
220 return Time::FromUTCExploded(exploded);
221 }
222
223 // One of our values was out of expected range. For well-formed input,
224 // the following check would be reasonable:
225 // NOTREACHED() << "Cookie exploded expiration failed: " << time_string;
226
227 return Time();
228}
229
230// Determine the cookie domain key to use for setting the specified cookie.
231// On success returns true, and sets cookie_domain_key to either a
232// -host cookie key (ex: "google.com")
233// -domain cookie key (ex: ".google.com")
234static bool GetCookieDomainKey(const GURL& url,
235 const CookieMonster::ParsedCookie& pc,
236 std::string* cookie_domain_key) {
237 const std::string url_host(url.host());
238 if (!pc.HasDomain() || pc.Domain().empty()) {
239 // No domain was specified in cookie -- default to host cookie.
240 *cookie_domain_key = url_host;
241 DCHECK((*cookie_domain_key)[0] != '.');
242 return true;
243 }
244
245 // Get the normalized domain specified in cookie line.
246 // Note: The RFC says we can reject a cookie if the domain
247 // attribute does not start with a dot. IE/FF/Safari however, allow a cookie
248 // of the form domain=my.domain.com, treating it the same as
249 // domain=.my.domain.com -- for compatibility we do the same here. Firefox
250 // also treats domain=.....my.domain.com like domain=.my.domain.com, but
251 // neither IE nor Safari do this, and we don't either.
[email protected]8ac1a752008-07-31 19:40:37252 std::string cookie_domain(net::CanonicalizeHost(pc.Domain(), NULL));
initial.commit586acc5fe2008-07-26 22:42:52253 if (cookie_domain.empty())
254 return false;
255 if (cookie_domain[0] != '.')
256 cookie_domain = "." + cookie_domain;
257
258 // Ensure |url| and |cookie_domain| have the same domain+registry.
259 const std::string url_domain_and_registry(
260 RegistryControlledDomainService::GetDomainAndRegistry(url));
261 if (url_domain_and_registry.empty())
262 return false; // IP addresses/intranet hosts can't set domain cookies.
263 const std::string cookie_domain_and_registry(
264 RegistryControlledDomainService::GetDomainAndRegistry(cookie_domain));
265 if (url_domain_and_registry != cookie_domain_and_registry)
266 return false; // Can't set a cookie on a different domain + registry.
267
268 // Ensure |url_host| is |cookie_domain| or one of its subdomains. Given that
269 // we know the domain+registry are the same from the above checks, this is
270 // basically a simple string suffix check.
271 if ((url_host.length() < cookie_domain.length()) ?
272 (cookie_domain != ("." + url_host)) :
273 url_host.compare(url_host.length() - cookie_domain.length(),
274 cookie_domain.length(), cookie_domain))
275 return false;
276
277
278 *cookie_domain_key = cookie_domain;
279 return true;
280}
281
282static std::string CanonPath(const GURL& url,
283 const CookieMonster::ParsedCookie& pc) {
284 // The RFC says the path should be a prefix of the current URL path.
285 // However, Mozilla allows you to set any path for compatibility with
286 // broken websites. We unfortunately will mimic this behavior. We try
287 // to be generous and accept cookies with an invalid path attribute, and
288 // default the path to something reasonable.
289
290 // The path was supplied in the cookie, we'll take it.
291 if (pc.HasPath() && !pc.Path().empty() && pc.Path()[0] == '/')
292 return pc.Path();
293
294 // The path was not supplied in the cookie or invalid, we will default
295 // to the current URL path.
296 // """Defaults to the path of the request URL that generated the
297 // Set-Cookie response, up to, but not including, the
298 // right-most /."""
299 // How would this work for a cookie on /? We will include it then.
300 const std::string& url_path = url.path();
301
302 std::string::size_type idx = url_path.find_last_of('/');
303
304 // The cookie path was invalid or a single '/'.
305 if (idx == 0 || idx == std::string::npos)
306 return std::string("/");
307
308 // Return up to the rightmost '/'.
309 return url_path.substr(0, idx);
310}
311
312static Time CanonExpiration(const CookieMonster::ParsedCookie& pc,
313 const Time& current) {
314 // First, try the Max-Age attribute.
315 uint64 max_age = 0;
316 if (pc.HasMaxAge() &&
[email protected]d862fd92008-08-21 18:15:35317#if defined(COMPILER_MSVC)
initial.commit586acc5fe2008-07-26 22:42:52318 sscanf_s(pc.MaxAge().c_str(), " %I64u", &max_age) == 1) {
[email protected]d862fd92008-08-21 18:15:35319
320#else
321 sscanf(pc.MaxAge().c_str(), " %llu", &max_age) == 1) {
322#endif
initial.commit586acc5fe2008-07-26 22:42:52323 return current + TimeDelta::FromSeconds(max_age);
324 }
325
326 // Try the Expires attribute.
327 if (pc.HasExpires())
328 return CookieMonster::ParseCookieTime(pc.Expires());
329
330 // Invalid or no expiration, persistent cookie.
331 return Time();
332}
333
334static bool HasCookieableScheme(const GURL& url) {
335 static const char* kCookieableSchemes[] = { "http", "https", "file" };
336 static const int kCookieableSchemesLen = arraysize(kCookieableSchemes);
337 static const int kCookieableSchemesFileIndex = 2;
338
339 // Make sure the request is on a cookie-able url scheme.
340 for (int i = 0; i < kCookieableSchemesLen; ++i) {
341 // We matched a scheme.
342 if (url.SchemeIs(kCookieableSchemes[i])) {
343 // This is file:// scheme
344 if (i == kCookieableSchemesFileIndex)
345 return CookieMonster::enable_file_scheme_;
346 // We've matched a supported scheme.
347 return true;
348 }
349 }
350
351 // The scheme didn't match any in our whitelist.
352 COOKIE_DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme();
353 return false;
354}
355
356bool CookieMonster::SetCookie(const GURL& url,
357 const std::string& cookie_line) {
358 Time creation_date = CurrentTime();
359 last_time_seen_ = creation_date;
360 return SetCookieWithCreationTime(url, cookie_line, creation_date);
361}
362
363bool CookieMonster::SetCookieWithCreationTime(const GURL& url,
364 const std::string& cookie_line,
365 const Time& creation_time) {
366 DCHECK(!creation_time.is_null());
367
368 if (!HasCookieableScheme(url)) {
369 DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme();
370 return false;
371 }
372
373 AutoLock autolock(lock_);
374 InitIfNecessary();
375
376 COOKIE_DLOG(INFO) << "SetCookie() line: " << cookie_line;
377
378 // Parse the cookie.
379 ParsedCookie pc(cookie_line);
380
381 if (!pc.IsValid()) {
382 COOKIE_DLOG(WARNING) << "Couldn't parse cookie";
383 return false;
384 }
385
386 std::string cookie_domain;
387 if (!GetCookieDomainKey(url, pc, &cookie_domain)) {
388 return false;
389 }
390
391 std::string cookie_path = CanonPath(url, pc);
392
393 scoped_ptr<CanonicalCookie> cc;
394 Time cookie_expires = CanonExpiration(pc, creation_time);
395
396 cc.reset(new CanonicalCookie(pc.Name(), pc.Value(), cookie_path,
397 pc.IsSecure(), pc.IsHttpOnly(),
398 creation_time, !cookie_expires.is_null(),
399 cookie_expires));
400
401 if (!cc.get()) {
402 COOKIE_DLOG(WARNING) << "Failed to allocate CanonicalCookie";
403 return false;
404 }
405
406 // We should have only purged at most one matching cookie.
407 int num_deleted = DeleteEquivalentCookies(cookie_domain, *cc);
[email protected]d862fd92008-08-21 18:15:35408 DCHECK(num_deleted <= 1);
initial.commit586acc5fe2008-07-26 22:42:52409
410 COOKIE_DLOG(INFO) << "SetCookie() cc: " << cc->DebugString();
411
412 // Realize that we might be setting an expired cookie, and the only point
413 // was to delete the cookie which we've already done.
414 if (!cc->IsExpired(creation_time))
415 InternalInsertCookie(cookie_domain, cc.release(), true);
416
417 // We assume that hopefully setting a cookie will be less common than
418 // querying a cookie. Since setting a cookie can put us over our limits,
419 // make sure that we garbage collect... We can also make the assumption that
420 // if a cookie was set, in the common case it will be used soon after,
421 // and we will purge the expired cookies in GetCookies().
422 GarbageCollect(creation_time, cookie_domain);
423
424 return true;
425}
426
427void CookieMonster::SetCookies(const GURL& url,
428 const std::vector<std::string>& cookies) {
429 for (std::vector<std::string>::const_iterator iter = cookies.begin();
430 iter != cookies.end(); ++iter)
431 SetCookie(url, *iter);
432}
433
434void CookieMonster::InternalInsertCookie(const std::string& key,
435 CanonicalCookie* cc,
436 bool sync_to_store) {
437 if (cc->IsPersistent() && store_ && sync_to_store)
438 store_->AddCookie(key, *cc);
439 cookies_.insert(CookieMap::value_type(key, cc));
440}
441
442void CookieMonster::InternalDeleteCookie(CookieMap::iterator it,
443 bool sync_to_store) {
444 CanonicalCookie* cc = it->second;
445 COOKIE_DLOG(INFO) << "InternalDeleteCookie() cc: " << cc->DebugString();
446 if (cc->IsPersistent() && store_ && sync_to_store)
447 store_->DeleteCookie(*cc);
448 cookies_.erase(it);
449 delete cc;
450}
451
452int CookieMonster::DeleteEquivalentCookies(const std::string& key,
453 const CanonicalCookie& ecc) {
454 int num_deleted = 0;
455 for (CookieMapItPair its = cookies_.equal_range(key);
456 its.first != its.second; ) {
457 CookieMap::iterator curit = its.first;
458 CanonicalCookie* cc = curit->second;
459 ++its.first;
460
461 // TODO while we're here, we might as well purge expired cookies too.
462
463 if (ecc.IsEquivalent(*cc)) {
464 InternalDeleteCookie(curit, true);
465 ++num_deleted;
466#ifdef NDEBUG
467 // We should only ever find a single equivalent cookie
468 break;
469#endif
470 }
471 }
472
473 // Our internal state should be consistent, we should never have more
474 // than one equivalent cookie, since they should overwrite each other.
475 DCHECK(num_deleted <= 1);
476
477 return num_deleted;
478}
479
480// TODO we should be sorting by last access time, however, right now
481// we're not saving an access time, so we're sorting by creation time.
482static bool OldestCookieSorter(const CookieMonster::CookieMap::iterator& it1,
483 const CookieMonster::CookieMap::iterator& it2) {
484 return it1->second->CreationDate() < it2->second->CreationDate();
485}
486
487// is vector::size_type always going to be size_t?
488int CookieMonster::GarbageCollectRange(const Time& current,
489 const CookieMapItPair& itpair,
490 size_t num_max, size_t num_purge) {
491 int num_deleted = 0;
492
493 // First, walk through and delete anything that's expired.
494 // Save a list of iterators to the ones that weren't expired
495 std::vector<CookieMap::iterator> cookie_its;
496 for (CookieMap::iterator it = itpair.first, end = itpair.second; it != end;) {
497 CookieMap::iterator curit = it;
498 CanonicalCookie* cc = curit->second;
499 ++it;
500
501 if (cc->IsExpired(current)) {
502 InternalDeleteCookie(curit, true);
503 ++num_deleted;
504 } else {
505 cookie_its.push_back(curit);
506 }
507 }
508
509 if (cookie_its.size() > num_max) {
510 COOKIE_DLOG(INFO) << "GarbageCollectRange() Deep Garbage Collect.";
511 num_purge += cookie_its.size() - num_max;
512 // Sort the top N we want to purge.
513 std::partial_sort(cookie_its.begin(), cookie_its.begin() + num_purge,
514 cookie_its.end(), OldestCookieSorter);
515
516 // TODO should probably use an iterator and not an index.
517 for (size_t i = 0; i < num_purge; ++i) {
518 InternalDeleteCookie(cookie_its[i], true);
519 ++num_deleted;
520 }
521 }
522
523 return num_deleted;
524}
525
526// TODO Whenever we delete, check last_cur_utc_...
527int CookieMonster::GarbageCollect(const Time& current,
528 const std::string& key) {
529 // Based off of the Mozilla defaults
530 // It might seem scary to have a high purge value, but really it's not. You
531 // just make sure that you increase the max to cover the increase in purge,
532 // and we would have been purging the same amount of cookies. We're just
533 // going through the garbage collection process less often.
534 static const size_t kNumCookiesPerHost = 70; // ~50 cookies
535 static const size_t kNumCookiesPerHostPurge = 20;
536 static const size_t kNumCookiesTotal = 1100; // ~1000 cookies
537 static const size_t kNumCookiesTotalPurge = 100;
538
539 int num_deleted = 0;
540
541 // Collect garbage for this key.
542 if (cookies_.count(key) > kNumCookiesPerHost) {
543 COOKIE_DLOG(INFO) << "GarbageCollect() key: " << key;
544 num_deleted += GarbageCollectRange(current, cookies_.equal_range(key),
545 kNumCookiesPerHost,
546 kNumCookiesPerHostPurge);
547 }
548
549 // Collect garbage for everything.
550 if (cookies_.size() > kNumCookiesTotal) {
551 COOKIE_DLOG(INFO) << "GarbageCollect() everything";
552 num_deleted += GarbageCollectRange(current,
553 CookieMapItPair(cookies_.begin(),
554 cookies_.end()),
555 kNumCookiesTotal, kNumCookiesTotalPurge);
556 }
557
558 return num_deleted;
559}
560
561int CookieMonster::DeleteAll(bool sync_to_store) {
562 AutoLock autolock(lock_);
563 InitIfNecessary();
564
565 int num_deleted = 0;
566 for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) {
567 CookieMap::iterator curit = it;
568 ++it;
569 InternalDeleteCookie(curit, sync_to_store);
570 ++num_deleted;
571 }
572
573 return num_deleted;
574}
575
576int CookieMonster::DeleteAllCreatedBetween(const Time& delete_begin,
577 const Time& delete_end,
578 bool sync_to_store) {
579 AutoLock autolock(lock_);
580 InitIfNecessary();
581
582 int num_deleted = 0;
583 for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) {
584 CookieMap::iterator curit = it;
585 CanonicalCookie* cc = curit->second;
586 ++it;
587
588 if (cc->CreationDate() >= delete_begin &&
589 (delete_end.is_null() || cc->CreationDate() < delete_end)) {
590 InternalDeleteCookie(curit, sync_to_store);
591 ++num_deleted;
592 }
593 }
594
595 return num_deleted;
596}
597
598int CookieMonster::DeleteAllCreatedAfter(const Time& delete_begin,
599 bool sync_to_store) {
600 return DeleteAllCreatedBetween(delete_begin, Time(), sync_to_store);
601}
602
603bool CookieMonster::DeleteCookie(const std::string& domain,
604 const CanonicalCookie& cookie,
605 bool sync_to_store) {
606 AutoLock autolock(lock_);
607 InitIfNecessary();
608
609 for (CookieMapItPair its = cookies_.equal_range(domain);
610 its.first != its.second; ++its.first) {
611 // The creation date acts as our unique index...
612 if (its.first->second->CreationDate() == cookie.CreationDate()) {
613 InternalDeleteCookie(its.first, sync_to_store);
614 return true;
615 }
616 }
617 return false;
618}
619
620// Mozilla sorts on the path length (longest first), and then it
621// sorts by creation time (oldest first).
622// The RFC says the sort order for the domain attribute is undefined.
623static bool CookieSorter(CookieMonster::CanonicalCookie* cc1,
624 CookieMonster::CanonicalCookie* cc2) {
625 if (cc1->Path().length() == cc2->Path().length())
626 return cc1->CreationDate() < cc2->CreationDate();
627 return cc1->Path().length() > cc2->Path().length();
628}
629
630std::string CookieMonster::GetCookies(const GURL& url) {
631 return GetCookiesWithOptions(url, NORMAL);
632}
633
634// Currently our cookie datastructure is based on Mozilla's approach. We have a
635// hash keyed on the cookie's domain, and for any query we walk down the domain
636// components and probe for cookies until we reach the TLD, where we stop.
637// For example, a.b.blah.com, we would probe
638// - a.b.blah.com
639// - .a.b.blah.com (TODO should we check this first or second?)
640// - .b.blah.com
641// - .blah.com
642// There are some alternative datastructures we could try, like a
643// search/prefix trie, where we reverse the hostname and query for all
644// keys that are a prefix of our hostname. I think the hash probing
645// should be fast and simple enough for now.
646std::string CookieMonster::GetCookiesWithOptions(const GURL& url,
647 CookieOptions options) {
648 if (!HasCookieableScheme(url)) {
649 DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme();
650 return std::string();
651 }
652
653 // Get the cookies for this host and its domain(s).
654 std::vector<CanonicalCookie*> cookies;
655 FindCookiesForHostAndDomain(url, options, &cookies);
656 std::sort(cookies.begin(), cookies.end(), CookieSorter);
657
658 std::string cookie_line;
659 for (std::vector<CanonicalCookie*>::const_iterator it = cookies.begin();
660 it != cookies.end(); ++it) {
661 if (it != cookies.begin())
662 cookie_line += "; ";
663 // In Mozilla if you set a cookie like AAAA, it will have an empty token
664 // and a value of AAAA. When it sends the cookie back, it will send AAAA,
665 // so we need to avoid sending =AAAA for a blank token value.
666 if (!(*it)->Name().empty())
667 cookie_line += (*it)->Name() + "=";
668 cookie_line += (*it)->Value();
669 }
670
671 COOKIE_DLOG(INFO) << "GetCookies() result: " << cookie_line;
672
673 return cookie_line;
674}
675
676// TODO(deanm): We could have expired cookies that haven't been purged yet,
677// and exporting these would be inaccurate, for example in the cookie manager
678// it might show cookies that are actually expired already. We should do
679// a full garbage collection before ... There actually isn't a way to do
680// this right now (a forceful full GC), so we'll have to live with the
681// possibility of showing the user expired cookies. This shouldn't be very
682// common since most persistent cookies have a long lifetime.
683CookieMonster::CookieList CookieMonster::GetAllCookies() {
684 AutoLock autolock(lock_);
685 InitIfNecessary();
686
687 CookieList cookie_list;
688
689 for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end(); ++it) {
690 cookie_list.push_back(CookieListPair(it->first, *it->second));
691 }
692
693 return cookie_list;
694}
695
696void CookieMonster::FindCookiesForHostAndDomain(
697 const GURL& url,
698 CookieOptions options,
699 std::vector<CanonicalCookie*>* cookies) {
700 AutoLock autolock(lock_);
701 InitIfNecessary();
702
703 const Time current_time(CurrentTime());
704
705 // Query for the full host, For example: 'a.c.blah.com'.
706 std::string key(url.host());
707 FindCookiesForKey(key, url, options, current_time, cookies);
708
709 // See if we can search for domain cookies, i.e. if the host has a TLD + 1.
710 const std::string domain(
711 RegistryControlledDomainService::GetDomainAndRegistry(key));
712 if (domain.empty())
713 return;
714 DCHECK_LE(domain.length(), key.length());
715 DCHECK_EQ(0, key.compare(key.length() - domain.length(), domain.length(),
716 domain));
717
718 // Walk through the string and query at the dot points (GURL should have
719 // canonicalized the dots, so this should be safe). Stop once we reach the
720 // domain + registry; we can't write cookies past this point, and with some
721 // registrars other domains can, in which case we don't want to read their
722 // cookies.
723 for (key = "." + key; key.length() > domain.length(); ) {
724 FindCookiesForKey(key, url, options, current_time, cookies);
725 const size_t next_dot = key.find('.', 1); // Skip over leading dot.
726 key.erase(0, next_dot);
727 }
728}
729
730void CookieMonster::FindCookiesForKey(
731 const std::string& key,
732 const GURL& url,
733 CookieOptions options,
734 const Time& current,
735 std::vector<CanonicalCookie*>* cookies) {
736 bool secure = url.SchemeIsSecure();
737
738 for (CookieMapItPair its = cookies_.equal_range(key);
739 its.first != its.second; ) {
740 CookieMap::iterator curit = its.first;
741 CanonicalCookie* cc = curit->second;
742 ++its.first;
743
744 // If the cookie is expired, delete it.
745 if (cc->IsExpired(current)) {
746 InternalDeleteCookie(curit, true);
747 continue;
748 }
749
750 // Filter out HttpOnly cookies unless they where explicitly requested.
751 if ((options & INCLUDE_HTTPONLY) == 0 && cc->IsHttpOnly())
752 continue;
753
754 // Filter out secure cookies unless we're https.
755 if (!secure && cc->IsSecure())
756 continue;
757
758 if (!cc->IsOnPath(url.path()))
759 continue;
760
761 // Congratulations Charlie, you passed the test!
762 cookies->push_back(cc);
763 }
764}
765
766
767CookieMonster::ParsedCookie::ParsedCookie(const std::string& cookie_line)
768 : is_valid_(false),
769 path_index_(0),
770 domain_index_(0),
771 expires_index_(0),
772 maxage_index_(0),
773 secure_index_(0),
774 httponly_index_(0) {
775
776 if (cookie_line.size() > kMaxCookieSize) {
777 LOG(INFO) << "Not parsing cookie, too large: " << cookie_line.size();
778 return;
779 }
780
781 ParseTokenValuePairs(cookie_line);
782 if (pairs_.size() > 0) {
783 is_valid_ = true;
784 SetupAttributes();
785 }
786}
787
788// Returns true if |c| occurs in |chars|
789// TODO maybe make this take an iterator, could check for end also?
790static inline bool CharIsA(const char c, const char* chars) {
791 return strchr(chars, c) != NULL;
792}
793// Seek the iterator to the first occurrence of a character in |chars|.
794// Returns true if it hit the end, false otherwise.
795static inline bool SeekTo(std::string::const_iterator* it,
796 const std::string::const_iterator& end,
797 const char* chars) {
798 for (; *it != end && !CharIsA(**it, chars); ++(*it));
799 return *it == end;
800}
801// Seek the iterator to the first occurrence of a character not in |chars|.
802// Returns true if it hit the end, false otherwise.
803static inline bool SeekPast(std::string::const_iterator* it,
804 const std::string::const_iterator& end,
805 const char* chars) {
806 for (; *it != end && CharIsA(**it, chars); ++(*it));
807 return *it == end;
808}
809static inline bool SeekBackPast(std::string::const_iterator* it,
810 const std::string::const_iterator& end,
811 const char* chars) {
812 for (; *it != end && CharIsA(**it, chars); --(*it));
813 return *it == end;
814}
815
816// Parse all token/value pairs and populate pairs_.
817void CookieMonster::ParsedCookie::ParseTokenValuePairs(
818 const std::string& cookie_line) {
819 static const char kTerminator[] = "\n\r\0";
820 static const int kTerminatorLen = sizeof(kTerminator) - 1;
821 static const char kWhitespace[] = " \t";
822 static const char kQuoteTerminator[] = "\"";
823 static const char kValueSeparator[] = ";";
824 static const char kTokenSeparator[] = ";=";
825
826 pairs_.clear();
827
828 // Ok, here we go. We should be expecting to be starting somewhere
829 // before the cookie line, not including any header name...
830 std::string::const_iterator start = cookie_line.begin();
831 std::string::const_iterator end = cookie_line.end();
832 std::string::const_iterator it = start;
833
834 // TODO Make sure we're stripping \r\n in the network code. Then we
835 // can log any unexpected terminators.
836 std::string::size_type term_pos = cookie_line.find_first_of(
837 std::string(kTerminator, kTerminatorLen));
838 if (term_pos != std::string::npos) {
839 // We found a character we should treat as an end of string.
840 end = start + term_pos;
841 }
842
843 for (int pair_num = 0; pair_num < kMaxPairs && it != end; ++pair_num) {
844 TokenValuePair pair;
845 std::string::const_iterator token_start, token_real_end, token_end;
846
847 // Seek past any whitespace before the "token" (the name).
848 // token_start should point at the first character in the token
849 if (SeekPast(&it, end, kWhitespace))
850 break; // No token, whitespace or empty.
851 token_start = it;
852
853 // Seek over the token, to the token separator.
854 // token_real_end should point at the token separator, i.e. '='.
855 // If it == end after the seek, we probably have a token-value.
856 SeekTo(&it, end, kTokenSeparator);
857 token_real_end = it;
858
859 // Ignore any whitespace between the token and the token separator.
860 // token_end should point after the last interesting token character,
861 // pointing at either whitespace, or at '=' (and equal to token_real_end).
862 if (it != token_start) { // We could have an empty token name.
863 --it; // Go back before the token separator.
864 // Skip over any whitespace to the first non-whitespace character.
865 SeekBackPast(&it, token_start, kWhitespace);
866 // Point after it.
867 ++it;
868 }
869 token_end = it;
870
871 // Seek us back to the end of the token.
872 it = token_real_end;
873
874 if (it == end || *it != '=') {
875 // We have a token-value, we didn't have any token name.
876 if (pair_num == 0) {
877 // For the first time around, we want to treat single values
878 // as a value with an empty name. (Mozilla bug 169091).
879 // IE seems to also have this behavior, ex "AAA", and "AAA=10" will
880 // set 2 different cookies, and setting "BBB" will then replace "AAA".
881 pair.first = "";
882 // Rewind to the beginning of what we thought was the token name,
883 // and let it get parsed as a value.
884 it = token_start;
885 } else {
886 // Any not-first attribute we want to treat a value as a
887 // name with an empty value... This is so something like
888 // "secure;" will get parsed as a Token name, and not a value.
889 pair.first = std::string(token_start, token_end);
890 }
891 } else {
892 // We have a TOKEN=VALUE.
893 pair.first = std::string(token_start, token_end);
894 ++it; // Skip past the '='.
895 }
896
897 // OK, now try to parse a value.
898 std::string::const_iterator value_start, value_end;
899
900 // Seek past any whitespace that might in-between the token and value.
901 SeekPast(&it, end, kWhitespace);
902 // value_start should point at the first character of the value.
903 value_start = it;
904
905 // The value is double quoted, process <quoted-string>.
906 if (it != end && *it == '"') {
907 // Skip over the first double quote, and parse until
908 // a terminating double quote or the end.
909 for (++it; it != end && !CharIsA(*it, kQuoteTerminator); ++it) {
910 // Allow an escaped \" in a double quoted string.
911 if (*it == '\\') {
912 ++it;
913 if (it == end)
914 break;
915 }
916 }
917
918 SeekTo(&it, end, kValueSeparator);
919 // We could seek to the end, that's ok.
920 value_end = it;
921 } else {
922 // The value is non-quoted, process <token-value>.
923 // Just look for ';' to terminate ('=' allowed).
924 // We can hit the end, maybe they didn't terminate.
925 SeekTo(&it, end, kValueSeparator);
926
927 // Ignore any whitespace between the value and the value separator
928 if (it != value_start) { // Could have an empty value
929 --it;
930 SeekBackPast(&it, value_start, kWhitespace);
931 ++it;
932 }
933
934 value_end = it;
935 }
936
937 // OK, we're finished with a Token/Value.
938 pair.second = std::string(value_start, value_end);
939 // From RFC2109: "Attributes (names) (attr) are case-insensitive."
940 if (pair_num != 0)
941 StringToLowerASCII(&pair.first);
942 pairs_.push_back(pair);
943
944 // We've processed a token/value pair, we're either at the end of
945 // the string or a ValueSeparator like ';', which we want to skip.
946 if (it != end)
947 ++it;
948 }
949}
950
951void CookieMonster::ParsedCookie::SetupAttributes() {
952 static const char kPathTokenName[] = "path";
953 static const char kDomainTokenName[] = "domain";
954 static const char kExpiresTokenName[] = "expires";
955 static const char kMaxAgeTokenName[] = "max-age";
956 static const char kSecureTokenName[] = "secure";
957 static const char kHttpOnlyTokenName[] = "httponly";
958
959 // We skip over the first token/value, the user supplied one.
960 for (size_t i = 1; i < pairs_.size(); ++i) {
961 if (pairs_[i].first == kPathTokenName)
962 path_index_ = i;
963 else if (pairs_[i].first == kDomainTokenName)
964 domain_index_ = i;
965 else if (pairs_[i].first == kExpiresTokenName)
966 expires_index_ = i;
967 else if (pairs_[i].first == kMaxAgeTokenName)
968 maxage_index_ = i;
969 else if (pairs_[i].first == kSecureTokenName)
970 secure_index_ = i;
971 else if (pairs_[i].first == kHttpOnlyTokenName)
972 httponly_index_ = i;
973 else { /* some attribute we don't know or don't care about. */ }
974 }
975}
976
977// Create a cookie-line for the cookie. For debugging only!
978// If we want to use this for something more than debugging, we
979// should rewrite it better...
980std::string CookieMonster::ParsedCookie::DebugString() const {
981 std::string out;
982 for (PairList::const_iterator it = pairs_.begin();
983 it != pairs_.end(); ++it) {
984 out.append(it->first);
985 out.append("=");
986 out.append(it->second);
987 out.append("; ");
988 }
989 return out;
990}
991
992bool CookieMonster::CanonicalCookie::IsOnPath(
993 const std::string& url_path) const {
994
995 // A zero length would be unsafe for our trailing '/' checks, and
996 // would also make no sense for our prefix match. The code that
997 // creates a CanonicalCookie should make sure the path is never zero length,
998 // but we double check anyway.
999 if (path_.empty())
1000 return false;
1001
1002 // The Mozilla code broke it into 3 cases, if it's strings lengths
1003 // are less than, equal, or greater. I think this is simpler:
1004
1005 // Make sure the cookie path is a prefix of the url path. If the
1006 // url path is shorter than the cookie path, then the cookie path
1007 // can't be a prefix.
1008 if (url_path.find(path_) != 0)
1009 return false;
1010
1011 // Now we know that url_path is >= cookie_path, and that cookie_path
1012 // is a prefix of url_path. If they are the are the same length then
1013 // they are identical, otherwise we need an additional check:
1014
1015 // In order to avoid in correctly matching a cookie path of /blah
1016 // with a request path of '/blahblah/', we need to make sure that either
1017 // the cookie path ends in a trailing '/', or that we prefix up to a '/'
1018 // in the url path. Since we know that the url path length is greater
1019 // than the cookie path length, it's safe to index one byte past.
1020 if (path_.length() != url_path.length() &&
1021 path_[path_.length() - 1] != '/' &&
1022 url_path[path_.length()] != '/')
1023 return false;
1024
1025 return true;
1026}
1027
1028std::string CookieMonster::CanonicalCookie::DebugString() const {
1029 return StringPrintf("name: %s value: %s path: %s creation: %llu",
1030 name_.c_str(), value_.c_str(), path_.c_str(),
1031 creation_date_.ToTimeT());
1032}
[email protected]8ac1a752008-07-31 19:40:371033
1034} // namespace
license.botbf09a502008-08-24 00:55:551035