Blame - net/base/cookie_monster.cc - chromium/src.git

blob: 20a4a00a701055e9a4cdbdf544ec7da113a021dc [file] [log] [blame]

license.bot	bf09a50	2008-08-24 00:55:55	[diff] [blame^]	1	// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
initial.commit	586acc5fe	2008-07-26 22:42:52	[diff] [blame]	4
				5	// Portions of this code based on Mozilla:
				6	// (netwerk/cookie/src/nsCookieService.cpp)
				7	/* *** BEGIN LICENSE BLOCK ***
				8	* Version: MPL 1.1/GPL 2.0/LGPL 2.1
				9	*
				10	* The contents of this file are subject to the Mozilla Public License Version
				11	* 1.1 (the "License"); you may not use this file except in compliance with
				12	* the License. You may obtain a copy of the License at
				13	* http://www.mozilla.org/MPL/
				14	*
				15	* Software distributed under the License is distributed on an "AS IS" basis,
				16	* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
				17	* for the specific language governing rights and limitations under the
				18	* License.
				19	*
				20	* The Original Code is mozilla.org code.
				21	*
				22	* The Initial Developer of the Original Code is
				23	* Netscape Communications Corporation.
				24	* Portions created by the Initial Developer are Copyright (C) 2003
				25	* the Initial Developer. All Rights Reserved.
				26	*
				27	* Contributor(s):
				28	* Daniel Witte ([email protected])
				29	* Michiel van Leeuwen ([email protected])
				30	*
				31	* Alternatively, the contents of this file may be used under the terms of
				32	* either the GNU General Public License Version 2 or later (the "GPL"), or
				33	* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
				34	* in which case the provisions of the GPL or the LGPL are applicable instead
				35	* of those above. If you wish to allow use of your version of this file only
				36	* under the terms of either the GPL or the LGPL, and not to allow others to
				37	* use your version of this file under the terms of the MPL, indicate your
				38	* decision by deleting the provisions above and replace them with the notice
				39	* and other provisions required by the GPL or the LGPL. If you do not delete
				40	* the provisions above, a recipient may use your version of this file under
				41	* the terms of any one of the MPL, the GPL or the LGPL.
				42	*
				43	* *** END LICENSE BLOCK *** */
				44
				45	#include "net/base/cookie_monster.h"
				46
				47	#include <algorithm>
				48
				49	#include "base/basictypes.h"
				50	#include "base/logging.h"
				51	#include "base/scoped_ptr.h"
				52	#include "base/string_tokenizer.h"
				53	#include "base/string_util.h"
				54	#include "googleurl/src/gurl.h"
				55	#include "googleurl/src/url_canon.h"
				56	#include "net/base/net_util.h"
				57	#include "net/base/registry_controlled_domain.h"
				58
				59	// #define COOKIE_LOGGING_ENABLED
				60	#ifdef COOKIE_LOGGING_ENABLED
				61	#define COOKIE_DLOG(severity) DLOG_IF(INFO, 1)
				62	#else
				63	#define COOKIE_DLOG(severity) DLOG_IF(INFO, 0)
				64	#endif
				65
[email protected]	8ac1a75	2008-07-31 19:40:37	[diff] [blame]	66	namespace net {
				67
				68	// static
				69	bool CookieMonster::enable_file_scheme_ = false;
initial.commit	586acc5fe	2008-07-26 22:42:52	[diff] [blame]	70
				71	// static
				72	void CookieMonster::EnableFileScheme() {
				73	enable_file_scheme_ = true;
				74	}
				75
				76	CookieMonster::CookieMonster()
				77	: initialized_(false),
				78	store_(NULL) {
				79	}
				80
				81	CookieMonster::CookieMonster(PersistentCookieStore* store)
				82	: initialized_(false),
				83	store_(store) {
				84	}
				85
				86	CookieMonster::~CookieMonster() {
				87	DeleteAll(false);
				88	}
				89
				90	void CookieMonster::InitStore() {
				91	DCHECK(store_) << "Store must exist to initialize";
				92
				93	// Initialize the store and sync in any saved persistent cookies. We don't
				94	// care if it's expired, insert it so it can be garbage collected, removed,
				95	// and sync'd.
				96	std::vector<KeyedCanonicalCookie> cookies;
				97	store_->Load(&cookies);
				98	for (std::vector<KeyedCanonicalCookie>::const_iterator it = cookies.begin();
				99	it != cookies.end(); ++it) {
				100	InternalInsertCookie(it->first, it->second, false);
				101	}
				102	}
				103
				104	// The system resolution is not high enough, so we can have multiple
				105	// set cookies that result in the same system time. When this happens, we
				106	// increment by one Time unit. Let's hope computers don't get too fast.
				107	Time CookieMonster::CurrentTime() {
				108	return std::max(Time::Now(),
				109	Time::FromInternalValue(last_time_seen_.ToInternalValue() + 1));
				110	}
				111
				112	// Parse a cookie expiration time. We try to be lenient, but we need to
				113	// assume some order to distinguish the fields. The basic rules:
				114	// - The month name must be present and prefix the first 3 letters of the
				115	// full month name (jan for January, jun for June).
				116	// - If the year is <= 2 digits, it must occur after the day of month.
				117	// - The time must be of the format hh:mm:ss.
				118	// An average cookie expiration will look something like this:
				119	// Sat, 15-Apr-17 21:01:22 GMT
				120	Time CookieMonster::ParseCookieTime(const std::string& time_string) {
				121	static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun",
				122	"jul", "aug", "sep", "oct", "nov", "dec" };
				123	static const int kMonthsLen = arraysize(kMonths);
				124	// We want to be pretty liberal, and support most non-ascii and non-digit
				125	// characters as a delimiter. We can't treat : as a delimiter, because it
				126	// is the delimiter for hh:mm:ss, and we want to keep this field together.
				127	// We make sure to include - and +, since they could prefix numbers.
				128	// If the cookie attribute came in in quotes (ex expires="XXX"), the quotes
				129	// will be preserved, and we will get them here. So we make sure to include
				130	// quote characters, and also \ for anything that was internally escaped.
				131	static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{\|}~";
				132
				133	Time::Exploded exploded = {0};
				134
				135	StringTokenizer tokenizer(time_string, kDelimiters);
				136
				137	bool found_day_of_month = false;
				138	bool found_month = false;
				139	bool found_time = false;
				140	bool found_year = false;
				141
				142	while (tokenizer.GetNext()) {
				143	const std::string token = tokenizer.token();
				144	DCHECK(!token.empty());
				145	bool numerical = IsAsciiDigit(token[0]);
				146
				147	// String field
				148	if (!numerical) {
				149	if (!found_month) {
				150	for (int i = 0; i < kMonthsLen; ++i) {
				151	// Match prefix, so we could match January, etc
				152	if (StrNCaseCmp(token.c_str(), kMonths[i], 3) == 0) {
				153	exploded.month = i + 1;
				154	found_month = true;
				155	break;
				156	}
				157	}
				158	} else {
				159	// If we've gotten here, it means we've already found and parsed our
				160	// month, and we have another string, which we would expect to be the
				161	// the time zone name. According to the RFC and my experiments with
				162	// how sites format their expirations, we don't have much of a reason
				163	// to support timezones. We don't want to ever barf on user input,
				164	// but this DCHECK should pass for well-formed data.
				165	// DCHECK(token == "GMT");
				166	}
				167	// Numeric field w/ a colon
				168	} else if (token.find(':') != std::string::npos) {
				169	if (!found_time &&
[email protected]	d862fd9	2008-08-21 18:15:35	[diff] [blame]	170	#ifdef COMPILER_MSVC
				171	sscanf_s(
				172	#else
				173	sscanf(
				174	#endif
				175	token.c_str(), "%2u:%2u:%2u", &exploded.hour,
				176	&exploded.minute, &exploded.second) == 3) {
initial.commit	586acc5fe	2008-07-26 22:42:52	[diff] [blame]	177	found_time = true;
				178	} else {
				179	// We should only ever encounter one time-like thing. If we're here,
				180	// it means we've found a second, which shouldn't happen. We keep
				181	// the first. This check should be ok for well-formed input:
				182	// NOTREACHED();
				183	}
				184	// Numeric field
				185	} else {
				186	// Overflow with atoi() is unspecified, so we enforce a max length.
				187	if (!found_day_of_month && token.length() <= 2) {
				188	exploded.day_of_month = atoi(token.c_str());
				189	found_day_of_month = true;
				190	} else if (!found_year && token.length() <= 5) {
				191	exploded.year = atoi(token.c_str());
				192	found_year = true;
				193	} else {
				194	// If we're here, it means we've either found an extra numeric field,
				195	// or a numeric field which was too long. For well-formed input, the
				196	// following check would be reasonable:
				197	// NOTREACHED();
				198	}
				199	}
				200	}
				201
				202	if (!found_day_of_month \|\| !found_month \|\| !found_time \|\| !found_year) {
				203	// We didn't find all of the fields we need. For well-formed input, the
				204	// following check would be reasonable:
				205	// NOTREACHED() << "Cookie parse expiration failed: " << time_string;
				206	return Time();
				207	}
				208
				209	// Normalize the year to expand abbreviated years to the full year.
				210	if (exploded.year >= 69 && exploded.year <= 99)
				211	exploded.year += 1900;
				212	if (exploded.year >= 0 && exploded.year <= 68)
				213	exploded.year += 2000;
				214
				215	// If our values are within their correct ranges, we got our time.
				216	if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 &&
				217	exploded.month >= 1 && exploded.month <= 12 &&
				218	exploded.year >= 1601 && exploded.year <= 30827 &&
				219	exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) {
				220	return Time::FromUTCExploded(exploded);
				221	}
				222
				223	// One of our values was out of expected range. For well-formed input,
				224	// the following check would be reasonable:
				225	// NOTREACHED() << "Cookie exploded expiration failed: " << time_string;
				226
				227	return Time();
				228	}
				229
				230	// Determine the cookie domain key to use for setting the specified cookie.
				231	// On success returns true, and sets cookie_domain_key to either a
				232	// -host cookie key (ex: "google.com")
				233	// -domain cookie key (ex: ".google.com")
				234	static bool GetCookieDomainKey(const GURL& url,
				235	const CookieMonster::ParsedCookie& pc,
				236	std::string* cookie_domain_key) {
				237	const std::string url_host(url.host());
				238	if (!pc.HasDomain() \|\| pc.Domain().empty()) {
				239	// No domain was specified in cookie -- default to host cookie.
				240	*cookie_domain_key = url_host;
				241	DCHECK((*cookie_domain_key)[0] != '.');
				242	return true;
				243	}
				244
				245	// Get the normalized domain specified in cookie line.
				246	// Note: The RFC says we can reject a cookie if the domain
				247	// attribute does not start with a dot. IE/FF/Safari however, allow a cookie
				248	// of the form domain=my.domain.com, treating it the same as
				249	// domain=.my.domain.com -- for compatibility we do the same here. Firefox
				250	// also treats domain=.....my.domain.com like domain=.my.domain.com, but
				251	// neither IE nor Safari do this, and we don't either.
[email protected]	8ac1a75	2008-07-31 19:40:37	[diff] [blame]	252	std::string cookie_domain(net::CanonicalizeHost(pc.Domain(), NULL));
initial.commit	586acc5fe	2008-07-26 22:42:52	[diff] [blame]	253	if (cookie_domain.empty())
				254	return false;
				255	if (cookie_domain[0] != '.')
				256	cookie_domain = "." + cookie_domain;
				257
				258	// Ensure \|url\| and \|cookie_domain\| have the same domain+registry.
				259	const std::string url_domain_and_registry(
				260	RegistryControlledDomainService::GetDomainAndRegistry(url));
				261	if (url_domain_and_registry.empty())
				262	return false; // IP addresses/intranet hosts can't set domain cookies.
				263	const std::string cookie_domain_and_registry(
				264	RegistryControlledDomainService::GetDomainAndRegistry(cookie_domain));
				265	if (url_domain_and_registry != cookie_domain_and_registry)
				266	return false; // Can't set a cookie on a different domain + registry.
				267
				268	// Ensure \|url_host\| is \|cookie_domain\| or one of its subdomains. Given that
				269	// we know the domain+registry are the same from the above checks, this is
				270	// basically a simple string suffix check.
				271	if ((url_host.length() < cookie_domain.length()) ?
				272	(cookie_domain != ("." + url_host)) :
				273	url_host.compare(url_host.length() - cookie_domain.length(),
				274	cookie_domain.length(), cookie_domain))
				275	return false;
				276
				277
				278	*cookie_domain_key = cookie_domain;
				279	return true;
				280	}
				281
				282	static std::string CanonPath(const GURL& url,
				283	const CookieMonster::ParsedCookie& pc) {
				284	// The RFC says the path should be a prefix of the current URL path.
				285	// However, Mozilla allows you to set any path for compatibility with
				286	// broken websites. We unfortunately will mimic this behavior. We try
				287	// to be generous and accept cookies with an invalid path attribute, and
				288	// default the path to something reasonable.
				289
				290	// The path was supplied in the cookie, we'll take it.
				291	if (pc.HasPath() && !pc.Path().empty() && pc.Path()[0] == '/')
				292	return pc.Path();
				293
				294	// The path was not supplied in the cookie or invalid, we will default
				295	// to the current URL path.
				296	// """Defaults to the path of the request URL that generated the
				297	// Set-Cookie response, up to, but not including, the
				298	// right-most /."""
				299	// How would this work for a cookie on /? We will include it then.
				300	const std::string& url_path = url.path();
				301
				302	std::string::size_type idx = url_path.find_last_of('/');
				303
				304	// The cookie path was invalid or a single '/'.
				305	if (idx == 0 \|\| idx == std::string::npos)
				306	return std::string("/");
				307
				308	// Return up to the rightmost '/'.
				309	return url_path.substr(0, idx);
				310	}
				311
				312	static Time CanonExpiration(const CookieMonster::ParsedCookie& pc,
				313	const Time& current) {
				314	// First, try the Max-Age attribute.
				315	uint64 max_age = 0;
				316	if (pc.HasMaxAge() &&
[email protected]	d862fd9	2008-08-21 18:15:35	[diff] [blame]	317	#if defined(COMPILER_MSVC)
initial.commit	586acc5fe	2008-07-26 22:42:52	[diff] [blame]	318	sscanf_s(pc.MaxAge().c_str(), " %I64u", &max_age) == 1) {
[email protected]	d862fd9	2008-08-21 18:15:35	[diff] [blame]	319
				320	#else
				321	sscanf(pc.MaxAge().c_str(), " %llu", &max_age) == 1) {
				322	#endif
initial.commit	586acc5fe	2008-07-26 22:42:52	[diff] [blame]	323	return current + TimeDelta::FromSeconds(max_age);
				324	}
				325
				326	// Try the Expires attribute.
				327	if (pc.HasExpires())
				328	return CookieMonster::ParseCookieTime(pc.Expires());
				329
				330	// Invalid or no expiration, persistent cookie.
				331	return Time();
				332	}
				333
				334	static bool HasCookieableScheme(const GURL& url) {
				335	static const char* kCookieableSchemes[] = { "http", "https", "file" };
				336	static const int kCookieableSchemesLen = arraysize(kCookieableSchemes);
				337	static const int kCookieableSchemesFileIndex = 2;
				338
				339	// Make sure the request is on a cookie-able url scheme.
				340	for (int i = 0; i < kCookieableSchemesLen; ++i) {
				341	// We matched a scheme.
				342	if (url.SchemeIs(kCookieableSchemes[i])) {
				343	// This is file:// scheme
				344	if (i == kCookieableSchemesFileIndex)
				345	return CookieMonster::enable_file_scheme_;
				346	// We've matched a supported scheme.
				347	return true;
				348	}
				349	}
				350
				351	// The scheme didn't match any in our whitelist.
				352	COOKIE_DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme();
				353	return false;
				354	}
				355
				356	bool CookieMonster::SetCookie(const GURL& url,
				357	const std::string& cookie_line) {
				358	Time creation_date = CurrentTime();
				359	last_time_seen_ = creation_date;
				360	return SetCookieWithCreationTime(url, cookie_line, creation_date);
				361	}
				362
				363	bool CookieMonster::SetCookieWithCreationTime(const GURL& url,
				364	const std::string& cookie_line,
				365	const Time& creation_time) {
				366	DCHECK(!creation_time.is_null());
				367
				368	if (!HasCookieableScheme(url)) {
				369	DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme();
				370	return false;
				371	}
				372
				373	AutoLock autolock(lock_);
				374	InitIfNecessary();
				375
				376	COOKIE_DLOG(INFO) << "SetCookie() line: " << cookie_line;
				377
				378	// Parse the cookie.
				379	ParsedCookie pc(cookie_line);
				380
				381	if (!pc.IsValid()) {
				382	COOKIE_DLOG(WARNING) << "Couldn't parse cookie";
				383	return false;
				384	}
				385
				386	std::string cookie_domain;
				387	if (!GetCookieDomainKey(url, pc, &cookie_domain)) {
				388	return false;
				389	}
				390
				391	std::string cookie_path = CanonPath(url, pc);
				392
				393	scoped_ptr<CanonicalCookie> cc;
				394	Time cookie_expires = CanonExpiration(pc, creation_time);
				395
				396	cc.reset(new CanonicalCookie(pc.Name(), pc.Value(), cookie_path,
				397	pc.IsSecure(), pc.IsHttpOnly(),
				398	creation_time, !cookie_expires.is_null(),
				399	cookie_expires));
				400
				401	if (!cc.get()) {
				402	COOKIE_DLOG(WARNING) << "Failed to allocate CanonicalCookie";
				403	return false;
				404	}
				405
				406	// We should have only purged at most one matching cookie.
				407	int num_deleted = DeleteEquivalentCookies(cookie_domain, *cc);
[email protected]	d862fd9	2008-08-21 18:15:35	[diff] [blame]	408	DCHECK(num_deleted <= 1);
initial.commit	586acc5fe	2008-07-26 22:42:52	[diff] [blame]	409
				410	COOKIE_DLOG(INFO) << "SetCookie() cc: " << cc->DebugString();
				411
				412	// Realize that we might be setting an expired cookie, and the only point
				413	// was to delete the cookie which we've already done.
				414	if (!cc->IsExpired(creation_time))
				415	InternalInsertCookie(cookie_domain, cc.release(), true);
				416
				417	// We assume that hopefully setting a cookie will be less common than
				418	// querying a cookie. Since setting a cookie can put us over our limits,
				419	// make sure that we garbage collect... We can also make the assumption that
				420	// if a cookie was set, in the common case it will be used soon after,
				421	// and we will purge the expired cookies in GetCookies().
				422	GarbageCollect(creation_time, cookie_domain);
				423
				424	return true;
				425	}
				426
				427	void CookieMonster::SetCookies(const GURL& url,
				428	const std::vector<std::string>& cookies) {
				429	for (std::vector<std::string>::const_iterator iter = cookies.begin();
				430	iter != cookies.end(); ++iter)
				431	SetCookie(url, *iter);
				432	}
				433
				434	void CookieMonster::InternalInsertCookie(const std::string& key,
				435	CanonicalCookie* cc,
				436	bool sync_to_store) {
				437	if (cc->IsPersistent() && store_ && sync_to_store)
				438	store_->AddCookie(key, *cc);
				439	cookies_.insert(CookieMap::value_type(key, cc));
				440	}
				441
				442	void CookieMonster::InternalDeleteCookie(CookieMap::iterator it,
				443	bool sync_to_store) {
				444	CanonicalCookie* cc = it->second;
				445	COOKIE_DLOG(INFO) << "InternalDeleteCookie() cc: " << cc->DebugString();
				446	if (cc->IsPersistent() && store_ && sync_to_store)
				447	store_->DeleteCookie(*cc);
				448	cookies_.erase(it);
				449	delete cc;
				450	}
				451
				452	int CookieMonster::DeleteEquivalentCookies(const std::string& key,
				453	const CanonicalCookie& ecc) {
				454	int num_deleted = 0;
				455	for (CookieMapItPair its = cookies_.equal_range(key);
				456	its.first != its.second; ) {
				457	CookieMap::iterator curit = its.first;
				458	CanonicalCookie* cc = curit->second;
				459	++its.first;
				460
				461	// TODO while we're here, we might as well purge expired cookies too.
				462
				463	if (ecc.IsEquivalent(*cc)) {
				464	InternalDeleteCookie(curit, true);
				465	++num_deleted;
				466	#ifdef NDEBUG
				467	// We should only ever find a single equivalent cookie
				468	break;
				469	#endif
				470	}
				471	}
				472
				473	// Our internal state should be consistent, we should never have more
				474	// than one equivalent cookie, since they should overwrite each other.
				475	DCHECK(num_deleted <= 1);
				476
				477	return num_deleted;
				478	}
				479
				480	// TODO we should be sorting by last access time, however, right now
				481	// we're not saving an access time, so we're sorting by creation time.
				482	static bool OldestCookieSorter(const CookieMonster::CookieMap::iterator& it1,
				483	const CookieMonster::CookieMap::iterator& it2) {
				484	return it1->second->CreationDate() < it2->second->CreationDate();
				485	}
				486
				487	// is vector::size_type always going to be size_t?
				488	int CookieMonster::GarbageCollectRange(const Time& current,
				489	const CookieMapItPair& itpair,
				490	size_t num_max, size_t num_purge) {
				491	int num_deleted = 0;
				492
				493	// First, walk through and delete anything that's expired.
				494	// Save a list of iterators to the ones that weren't expired
				495	std::vector<CookieMap::iterator> cookie_its;
				496	for (CookieMap::iterator it = itpair.first, end = itpair.second; it != end;) {
				497	CookieMap::iterator curit = it;
				498	CanonicalCookie* cc = curit->second;
				499	++it;
				500
				501	if (cc->IsExpired(current)) {
				502	InternalDeleteCookie(curit, true);
				503	++num_deleted;
				504	} else {
				505	cookie_its.push_back(curit);
				506	}
				507	}
				508
				509	if (cookie_its.size() > num_max) {
				510	COOKIE_DLOG(INFO) << "GarbageCollectRange() Deep Garbage Collect.";
				511	num_purge += cookie_its.size() - num_max;
				512	// Sort the top N we want to purge.
				513	std::partial_sort(cookie_its.begin(), cookie_its.begin() + num_purge,
				514	cookie_its.end(), OldestCookieSorter);
				515
				516	// TODO should probably use an iterator and not an index.
				517	for (size_t i = 0; i < num_purge; ++i) {
				518	InternalDeleteCookie(cookie_its[i], true);
				519	++num_deleted;
				520	}
				521	}
				522
				523	return num_deleted;
				524	}
				525
				526	// TODO Whenever we delete, check last_cur_utc_...
				527	int CookieMonster::GarbageCollect(const Time& current,
				528	const std::string& key) {
				529	// Based off of the Mozilla defaults
				530	// It might seem scary to have a high purge value, but really it's not. You
				531	// just make sure that you increase the max to cover the increase in purge,
				532	// and we would have been purging the same amount of cookies. We're just
				533	// going through the garbage collection process less often.
				534	static const size_t kNumCookiesPerHost = 70; // ~50 cookies
				535	static const size_t kNumCookiesPerHostPurge = 20;
				536	static const size_t kNumCookiesTotal = 1100; // ~1000 cookies
				537	static const size_t kNumCookiesTotalPurge = 100;
				538
				539	int num_deleted = 0;
				540
				541	// Collect garbage for this key.
				542	if (cookies_.count(key) > kNumCookiesPerHost) {
				543	COOKIE_DLOG(INFO) << "GarbageCollect() key: " << key;
				544	num_deleted += GarbageCollectRange(current, cookies_.equal_range(key),
				545	kNumCookiesPerHost,
				546	kNumCookiesPerHostPurge);
				547	}
				548
				549	// Collect garbage for everything.
				550	if (cookies_.size() > kNumCookiesTotal) {
				551	COOKIE_DLOG(INFO) << "GarbageCollect() everything";
				552	num_deleted += GarbageCollectRange(current,
				553	CookieMapItPair(cookies_.begin(),
				554	cookies_.end()),
				555	kNumCookiesTotal, kNumCookiesTotalPurge);
				556	}
				557
				558	return num_deleted;
				559	}
				560
				561	int CookieMonster::DeleteAll(bool sync_to_store) {
				562	AutoLock autolock(lock_);
				563	InitIfNecessary();
				564
				565	int num_deleted = 0;
				566	for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) {
				567	CookieMap::iterator curit = it;
				568	++it;
				569	InternalDeleteCookie(curit, sync_to_store);
				570	++num_deleted;
				571	}
				572
				573	return num_deleted;
				574	}
				575
				576	int CookieMonster::DeleteAllCreatedBetween(const Time& delete_begin,
				577	const Time& delete_end,
				578	bool sync_to_store) {
				579	AutoLock autolock(lock_);
				580	InitIfNecessary();
				581
				582	int num_deleted = 0;
				583	for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) {
				584	CookieMap::iterator curit = it;
				585	CanonicalCookie* cc = curit->second;
				586	++it;
				587
				588	if (cc->CreationDate() >= delete_begin &&
				589	(delete_end.is_null() \|\| cc->CreationDate() < delete_end)) {
				590	InternalDeleteCookie(curit, sync_to_store);
				591	++num_deleted;
				592	}
				593	}
				594
				595	return num_deleted;
				596	}
				597
				598	int CookieMonster::DeleteAllCreatedAfter(const Time& delete_begin,
				599	bool sync_to_store) {
				600	return DeleteAllCreatedBetween(delete_begin, Time(), sync_to_store);
				601	}
				602
				603	bool CookieMonster::DeleteCookie(const std::string& domain,
				604	const CanonicalCookie& cookie,
				605	bool sync_to_store) {
				606	AutoLock autolock(lock_);
				607	InitIfNecessary();
				608
				609	for (CookieMapItPair its = cookies_.equal_range(domain);
				610	its.first != its.second; ++its.first) {
				611	// The creation date acts as our unique index...
				612	if (its.first->second->CreationDate() == cookie.CreationDate()) {
				613	InternalDeleteCookie(its.first, sync_to_store);
				614	return true;
				615	}
				616	}
				617	return false;
				618	}
				619
				620	// Mozilla sorts on the path length (longest first), and then it
				621	// sorts by creation time (oldest first).
				622	// The RFC says the sort order for the domain attribute is undefined.
				623	static bool CookieSorter(CookieMonster::CanonicalCookie* cc1,
				624	CookieMonster::CanonicalCookie* cc2) {
				625	if (cc1->Path().length() == cc2->Path().length())
				626	return cc1->CreationDate() < cc2->CreationDate();
				627	return cc1->Path().length() > cc2->Path().length();
				628	}
				629
				630	std::string CookieMonster::GetCookies(const GURL& url) {
				631	return GetCookiesWithOptions(url, NORMAL);
				632	}
				633
				634	// Currently our cookie datastructure is based on Mozilla's approach. We have a
				635	// hash keyed on the cookie's domain, and for any query we walk down the domain
				636	// components and probe for cookies until we reach the TLD, where we stop.
				637	// For example, a.b.blah.com, we would probe
				638	// - a.b.blah.com
				639	// - .a.b.blah.com (TODO should we check this first or second?)
				640	// - .b.blah.com
				641	// - .blah.com
				642	// There are some alternative datastructures we could try, like a
				643	// search/prefix trie, where we reverse the hostname and query for all
				644	// keys that are a prefix of our hostname. I think the hash probing
				645	// should be fast and simple enough for now.
				646	std::string CookieMonster::GetCookiesWithOptions(const GURL& url,
				647	CookieOptions options) {
				648	if (!HasCookieableScheme(url)) {
				649	DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme();
				650	return std::string();
				651	}
				652
				653	// Get the cookies for this host and its domain(s).
				654	std::vector<CanonicalCookie*> cookies;
				655	FindCookiesForHostAndDomain(url, options, &cookies);
				656	std::sort(cookies.begin(), cookies.end(), CookieSorter);
				657
				658	std::string cookie_line;
				659	for (std::vector<CanonicalCookie*>::const_iterator it = cookies.begin();
				660	it != cookies.end(); ++it) {
				661	if (it != cookies.begin())
				662	cookie_line += "; ";
				663	// In Mozilla if you set a cookie like AAAA, it will have an empty token
				664	// and a value of AAAA. When it sends the cookie back, it will send AAAA,
				665	// so we need to avoid sending =AAAA for a blank token value.
				666	if (!(*it)->Name().empty())
				667	cookie_line += (*it)->Name() + "=";
				668	cookie_line += (*it)->Value();
				669	}
				670
				671	COOKIE_DLOG(INFO) << "GetCookies() result: " << cookie_line;
				672
				673	return cookie_line;
				674	}
				675
				676	// TODO(deanm): We could have expired cookies that haven't been purged yet,
				677	// and exporting these would be inaccurate, for example in the cookie manager
				678	// it might show cookies that are actually expired already. We should do
				679	// a full garbage collection before ... There actually isn't a way to do
				680	// this right now (a forceful full GC), so we'll have to live with the
				681	// possibility of showing the user expired cookies. This shouldn't be very
				682	// common since most persistent cookies have a long lifetime.
				683	CookieMonster::CookieList CookieMonster::GetAllCookies() {
				684	AutoLock autolock(lock_);
				685	InitIfNecessary();
				686
				687	CookieList cookie_list;
				688
				689	for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end(); ++it) {
				690	cookie_list.push_back(CookieListPair(it->first, *it->second));
				691	}
				692
				693	return cookie_list;
				694	}
				695
				696	void CookieMonster::FindCookiesForHostAndDomain(
				697	const GURL& url,
				698	CookieOptions options,
				699	std::vector<CanonicalCookie> cookies) {
				700	AutoLock autolock(lock_);
				701	InitIfNecessary();
				702
				703	const Time current_time(CurrentTime());
				704
				705	// Query for the full host, For example: 'a.c.blah.com'.
				706	std::string key(url.host());
				707	FindCookiesForKey(key, url, options, current_time, cookies);
				708
				709	// See if we can search for domain cookies, i.e. if the host has a TLD + 1.
				710	const std::string domain(
				711	RegistryControlledDomainService::GetDomainAndRegistry(key));
				712	if (domain.empty())
				713	return;
				714	DCHECK_LE(domain.length(), key.length());
				715	DCHECK_EQ(0, key.compare(key.length() - domain.length(), domain.length(),
				716	domain));
				717
				718	// Walk through the string and query at the dot points (GURL should have
				719	// canonicalized the dots, so this should be safe). Stop once we reach the
				720	// domain + registry; we can't write cookies past this point, and with some
				721	// registrars other domains can, in which case we don't want to read their
				722	// cookies.
				723	for (key = "." + key; key.length() > domain.length(); ) {
				724	FindCookiesForKey(key, url, options, current_time, cookies);
				725	const size_t next_dot = key.find('.', 1); // Skip over leading dot.
				726	key.erase(0, next_dot);
				727	}
				728	}
				729
				730	void CookieMonster::FindCookiesForKey(
				731	const std::string& key,
				732	const GURL& url,
				733	CookieOptions options,
				734	const Time& current,
				735	std::vector<CanonicalCookie> cookies) {
				736	bool secure = url.SchemeIsSecure();
				737
				738	for (CookieMapItPair its = cookies_.equal_range(key);
				739	its.first != its.second; ) {
				740	CookieMap::iterator curit = its.first;
				741	CanonicalCookie* cc = curit->second;
				742	++its.first;
				743
				744	// If the cookie is expired, delete it.
				745	if (cc->IsExpired(current)) {
				746	InternalDeleteCookie(curit, true);
				747	continue;
				748	}
				749
				750	// Filter out HttpOnly cookies unless they where explicitly requested.
				751	if ((options & INCLUDE_HTTPONLY) == 0 && cc->IsHttpOnly())
				752	continue;
				753
				754	// Filter out secure cookies unless we're https.
				755	if (!secure && cc->IsSecure())
				756	continue;
				757
				758	if (!cc->IsOnPath(url.path()))
				759	continue;
				760
				761	// Congratulations Charlie, you passed the test!
				762	cookies->push_back(cc);
				763	}
				764	}
				765
				766
				767	CookieMonster::ParsedCookie::ParsedCookie(const std::string& cookie_line)
				768	: is_valid_(false),
				769	path_index_(0),
				770	domain_index_(0),
				771	expires_index_(0),
				772	maxage_index_(0),
				773	secure_index_(0),
				774	httponly_index_(0) {
				775
				776	if (cookie_line.size() > kMaxCookieSize) {
				777	LOG(INFO) << "Not parsing cookie, too large: " << cookie_line.size();
				778	return;
				779	}
				780
				781	ParseTokenValuePairs(cookie_line);
				782	if (pairs_.size() > 0) {
				783	is_valid_ = true;
				784	SetupAttributes();
				785	}
				786	}
				787
				788	// Returns true if \|c\| occurs in \|chars\|
				789	// TODO maybe make this take an iterator, could check for end also?
				790	static inline bool CharIsA(const char c, const char* chars) {
				791	return strchr(chars, c) != NULL;
				792	}
				793	// Seek the iterator to the first occurrence of a character in \|chars\|.
				794	// Returns true if it hit the end, false otherwise.
				795	static inline bool SeekTo(std::string::const_iterator* it,
				796	const std::string::const_iterator& end,
				797	const char* chars) {
				798	for (; it != end && !CharIsA(it, chars); ++(it));
				799	return *it == end;
				800	}
				801	// Seek the iterator to the first occurrence of a character not in \|chars\|.
				802	// Returns true if it hit the end, false otherwise.
				803	static inline bool SeekPast(std::string::const_iterator* it,
				804	const std::string::const_iterator& end,
				805	const char* chars) {
				806	for (; it != end && CharIsA(it, chars); ++(it));
				807	return *it == end;
				808	}
				809	static inline bool SeekBackPast(std::string::const_iterator* it,
				810	const std::string::const_iterator& end,
				811	const char* chars) {
				812	for (; it != end && CharIsA(it, chars); --(it));
				813	return *it == end;
				814	}
				815
				816	// Parse all token/value pairs and populate pairs_.
				817	void CookieMonster::ParsedCookie::ParseTokenValuePairs(
				818	const std::string& cookie_line) {
				819	static const char kTerminator[] = "\n\r\0";
				820	static const int kTerminatorLen = sizeof(kTerminator) - 1;
				821	static const char kWhitespace[] = " \t";
				822	static const char kQuoteTerminator[] = "\"";
				823	static const char kValueSeparator[] = ";";
				824	static const char kTokenSeparator[] = ";=";
				825
				826	pairs_.clear();
				827
				828	// Ok, here we go. We should be expecting to be starting somewhere
				829	// before the cookie line, not including any header name...
				830	std::string::const_iterator start = cookie_line.begin();
				831	std::string::const_iterator end = cookie_line.end();
				832	std::string::const_iterator it = start;
				833
				834	// TODO Make sure we're stripping \r\n in the network code. Then we
				835	// can log any unexpected terminators.
				836	std::string::size_type term_pos = cookie_line.find_first_of(
				837	std::string(kTerminator, kTerminatorLen));
				838	if (term_pos != std::string::npos) {
				839	// We found a character we should treat as an end of string.
				840	end = start + term_pos;
				841	}
				842
				843	for (int pair_num = 0; pair_num < kMaxPairs && it != end; ++pair_num) {
				844	TokenValuePair pair;
				845	std::string::const_iterator token_start, token_real_end, token_end;
				846
				847	// Seek past any whitespace before the "token" (the name).
				848	// token_start should point at the first character in the token
				849	if (SeekPast(&it, end, kWhitespace))
				850	break; // No token, whitespace or empty.
				851	token_start = it;
				852
				853	// Seek over the token, to the token separator.
				854	// token_real_end should point at the token separator, i.e. '='.
				855	// If it == end after the seek, we probably have a token-value.
				856	SeekTo(&it, end, kTokenSeparator);
				857	token_real_end = it;
				858
				859	// Ignore any whitespace between the token and the token separator.
				860	// token_end should point after the last interesting token character,
				861	// pointing at either whitespace, or at '=' (and equal to token_real_end).
				862	if (it != token_start) { // We could have an empty token name.
				863	--it; // Go back before the token separator.
				864	// Skip over any whitespace to the first non-whitespace character.
				865	SeekBackPast(&it, token_start, kWhitespace);
				866	// Point after it.
				867	++it;
				868	}
				869	token_end = it;
				870
				871	// Seek us back to the end of the token.
				872	it = token_real_end;
				873
				874	if (it == end \|\| *it != '=') {
				875	// We have a token-value, we didn't have any token name.
				876	if (pair_num == 0) {
				877	// For the first time around, we want to treat single values
				878	// as a value with an empty name. (Mozilla bug 169091).
				879	// IE seems to also have this behavior, ex "AAA", and "AAA=10" will
				880	// set 2 different cookies, and setting "BBB" will then replace "AAA".
				881	pair.first = "";
				882	// Rewind to the beginning of what we thought was the token name,
				883	// and let it get parsed as a value.
				884	it = token_start;
				885	} else {
				886	// Any not-first attribute we want to treat a value as a
				887	// name with an empty value... This is so something like
				888	// "secure;" will get parsed as a Token name, and not a value.
				889	pair.first = std::string(token_start, token_end);
				890	}
				891	} else {
				892	// We have a TOKEN=VALUE.
				893	pair.first = std::string(token_start, token_end);
				894	++it; // Skip past the '='.
				895	}
				896
				897	// OK, now try to parse a value.
				898	std::string::const_iterator value_start, value_end;
				899
				900	// Seek past any whitespace that might in-between the token and value.
				901	SeekPast(&it, end, kWhitespace);
				902	// value_start should point at the first character of the value.
				903	value_start = it;
				904
				905	// The value is double quoted, process <quoted-string>.
				906	if (it != end && *it == '"') {
				907	// Skip over the first double quote, and parse until
				908	// a terminating double quote or the end.
				909	for (++it; it != end && !CharIsA(*it, kQuoteTerminator); ++it) {
				910	// Allow an escaped \" in a double quoted string.
				911	if (*it == '\\') {
				912	++it;
				913	if (it == end)
				914	break;
				915	}
				916	}
				917
				918	SeekTo(&it, end, kValueSeparator);
				919	// We could seek to the end, that's ok.
				920	value_end = it;
				921	} else {
				922	// The value is non-quoted, process <token-value>.
				923	// Just look for ';' to terminate ('=' allowed).
				924	// We can hit the end, maybe they didn't terminate.
				925	SeekTo(&it, end, kValueSeparator);
				926
				927	// Ignore any whitespace between the value and the value separator
				928	if (it != value_start) { // Could have an empty value
				929	--it;
				930	SeekBackPast(&it, value_start, kWhitespace);
				931	++it;
				932	}
				933
				934	value_end = it;
				935	}
				936
				937	// OK, we're finished with a Token/Value.
				938	pair.second = std::string(value_start, value_end);
				939	// From RFC2109: "Attributes (names) (attr) are case-insensitive."
				940	if (pair_num != 0)
				941	StringToLowerASCII(&pair.first);
				942	pairs_.push_back(pair);
				943
				944	// We've processed a token/value pair, we're either at the end of
				945	// the string or a ValueSeparator like ';', which we want to skip.
				946	if (it != end)
				947	++it;
				948	}
				949	}
				950
				951	void CookieMonster::ParsedCookie::SetupAttributes() {
				952	static const char kPathTokenName[] = "path";
				953	static const char kDomainTokenName[] = "domain";
				954	static const char kExpiresTokenName[] = "expires";
				955	static const char kMaxAgeTokenName[] = "max-age";
				956	static const char kSecureTokenName[] = "secure";
				957	static const char kHttpOnlyTokenName[] = "httponly";
				958
				959	// We skip over the first token/value, the user supplied one.
				960	for (size_t i = 1; i < pairs_.size(); ++i) {
				961	if (pairs_[i].first == kPathTokenName)
				962	path_index_ = i;
				963	else if (pairs_[i].first == kDomainTokenName)
				964	domain_index_ = i;
				965	else if (pairs_[i].first == kExpiresTokenName)
				966	expires_index_ = i;
				967	else if (pairs_[i].first == kMaxAgeTokenName)
				968	maxage_index_ = i;
				969	else if (pairs_[i].first == kSecureTokenName)
				970	secure_index_ = i;
				971	else if (pairs_[i].first == kHttpOnlyTokenName)
				972	httponly_index_ = i;
				973	else { /* some attribute we don't know or don't care about. */ }
				974	}
				975	}
				976
				977	// Create a cookie-line for the cookie. For debugging only!
				978	// If we want to use this for something more than debugging, we
				979	// should rewrite it better...
				980	std::string CookieMonster::ParsedCookie::DebugString() const {
				981	std::string out;
				982	for (PairList::const_iterator it = pairs_.begin();
				983	it != pairs_.end(); ++it) {
				984	out.append(it->first);
				985	out.append("=");
				986	out.append(it->second);
				987	out.append("; ");
				988	}
				989	return out;
				990	}
				991
				992	bool CookieMonster::CanonicalCookie::IsOnPath(
				993	const std::string& url_path) const {
				994
				995	// A zero length would be unsafe for our trailing '/' checks, and
				996	// would also make no sense for our prefix match. The code that
				997	// creates a CanonicalCookie should make sure the path is never zero length,
				998	// but we double check anyway.
				999	if (path_.empty())
				1000	return false;
				1001
				1002	// The Mozilla code broke it into 3 cases, if it's strings lengths
				1003	// are less than, equal, or greater. I think this is simpler:
				1004
				1005	// Make sure the cookie path is a prefix of the url path. If the
				1006	// url path is shorter than the cookie path, then the cookie path
				1007	// can't be a prefix.
				1008	if (url_path.find(path_) != 0)
				1009	return false;
				1010
				1011	// Now we know that url_path is >= cookie_path, and that cookie_path
				1012	// is a prefix of url_path. If they are the are the same length then
				1013	// they are identical, otherwise we need an additional check:
				1014
				1015	// In order to avoid in correctly matching a cookie path of /blah
				1016	// with a request path of '/blahblah/', we need to make sure that either
				1017	// the cookie path ends in a trailing '/', or that we prefix up to a '/'
				1018	// in the url path. Since we know that the url path length is greater
				1019	// than the cookie path length, it's safe to index one byte past.
				1020	if (path_.length() != url_path.length() &&
				1021	path_[path_.length() - 1] != '/' &&
				1022	url_path[path_.length()] != '/')
				1023	return false;
				1024
				1025	return true;
				1026	}
				1027
				1028	std::string CookieMonster::CanonicalCookie::DebugString() const {
				1029	return StringPrintf("name: %s value: %s path: %s creation: %llu",
				1030	name_.c_str(), value_.c_str(), path_.c_str(),
				1031	creation_date_.ToTimeT());
				1032	}
[email protected]	8ac1a75	2008-07-31 19:40:37	[diff] [blame]	1033
				1034	} // namespace
license.bot	bf09a50	2008-08-24 00:55:55	[diff] [blame^]	1035