summaryrefslogtreecommitdiffstats
path: root/net/base/cookie_monster.cc
diff options
context:
space:
mode:
authorinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-26 22:42:52 +0000
committerinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-26 22:42:52 +0000
commit586acc5fe142f498261f52c66862fa417c3d52d2 (patch)
treec98b3417a883f2477029c8cd5888f4078681e24e /net/base/cookie_monster.cc
parenta814a8d55429605fe6d7045045cd25b6bf624580 (diff)
downloadchromium_src-586acc5fe142f498261f52c66862fa417c3d52d2.zip
chromium_src-586acc5fe142f498261f52c66862fa417c3d52d2.tar.gz
chromium_src-586acc5fe142f498261f52c66862fa417c3d52d2.tar.bz2
Add net to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@14 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base/cookie_monster.cc')
-rw-r--r--net/base/cookie_monster.cc1043
1 files changed, 1043 insertions, 0 deletions
diff --git a/net/base/cookie_monster.cc b/net/base/cookie_monster.cc
new file mode 100644
index 0000000..0483acb
--- /dev/null
+++ b/net/base/cookie_monster.cc
@@ -0,0 +1,1043 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Portions of this code based on Mozilla:
+// (netwerk/cookie/src/nsCookieService.cpp)
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 2003
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Daniel Witte (dwitte@stanford.edu)
+ * Michiel van Leeuwen (mvl@exedo.nl)
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "net/base/cookie_monster.h"
+
+#include <algorithm>
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+#include "base/scoped_ptr.h"
+#include "base/string_tokenizer.h"
+#include "base/string_util.h"
+#include "googleurl/src/gurl.h"
+#include "googleurl/src/url_canon.h"
+#include "net/base/net_util.h"
+#include "net/base/registry_controlled_domain.h"
+
+// #define COOKIE_LOGGING_ENABLED
+#ifdef COOKIE_LOGGING_ENABLED
+#define COOKIE_DLOG(severity) DLOG_IF(INFO, 1)
+#else
+#define COOKIE_DLOG(severity) DLOG_IF(INFO, 0)
+#endif
+
+/*static*/ bool CookieMonster::enable_file_scheme_ = false;
+
+// static
+void CookieMonster::EnableFileScheme() {
+ enable_file_scheme_ = true;
+}
+
+CookieMonster::CookieMonster()
+ : initialized_(false),
+ store_(NULL) {
+}
+
+CookieMonster::CookieMonster(PersistentCookieStore* store)
+ : initialized_(false),
+ store_(store) {
+}
+
+CookieMonster::~CookieMonster() {
+ DeleteAll(false);
+}
+
+void CookieMonster::InitStore() {
+ DCHECK(store_) << "Store must exist to initialize";
+
+ // Initialize the store and sync in any saved persistent cookies. We don't
+ // care if it's expired, insert it so it can be garbage collected, removed,
+ // and sync'd.
+ std::vector<KeyedCanonicalCookie> cookies;
+ store_->Load(&cookies);
+ for (std::vector<KeyedCanonicalCookie>::const_iterator it = cookies.begin();
+ it != cookies.end(); ++it) {
+ InternalInsertCookie(it->first, it->second, false);
+ }
+}
+
+// The system resolution is not high enough, so we can have multiple
+// set cookies that result in the same system time. When this happens, we
+// increment by one Time unit. Let's hope computers don't get too fast.
+Time CookieMonster::CurrentTime() {
+ return std::max(Time::Now(),
+ Time::FromInternalValue(last_time_seen_.ToInternalValue() + 1));
+}
+
+// Parse a cookie expiration time. We try to be lenient, but we need to
+// assume some order to distinguish the fields. The basic rules:
+// - The month name must be present and prefix the first 3 letters of the
+// full month name (jan for January, jun for June).
+// - If the year is <= 2 digits, it must occur after the day of month.
+// - The time must be of the format hh:mm:ss.
+// An average cookie expiration will look something like this:
+// Sat, 15-Apr-17 21:01:22 GMT
+Time CookieMonster::ParseCookieTime(const std::string& time_string) {
+ static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun",
+ "jul", "aug", "sep", "oct", "nov", "dec" };
+ static const int kMonthsLen = arraysize(kMonths);
+ // We want to be pretty liberal, and support most non-ascii and non-digit
+ // characters as a delimiter. We can't treat : as a delimiter, because it
+ // is the delimiter for hh:mm:ss, and we want to keep this field together.
+ // We make sure to include - and +, since they could prefix numbers.
+ // If the cookie attribute came in in quotes (ex expires="XXX"), the quotes
+ // will be preserved, and we will get them here. So we make sure to include
+ // quote characters, and also \ for anything that was internally escaped.
+ static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{|}~";
+
+ Time::Exploded exploded = {0};
+
+ StringTokenizer tokenizer(time_string, kDelimiters);
+
+ bool found_day_of_month = false;
+ bool found_month = false;
+ bool found_time = false;
+ bool found_year = false;
+
+ while (tokenizer.GetNext()) {
+ const std::string token = tokenizer.token();
+ DCHECK(!token.empty());
+ bool numerical = IsAsciiDigit(token[0]);
+
+ // String field
+ if (!numerical) {
+ if (!found_month) {
+ for (int i = 0; i < kMonthsLen; ++i) {
+ // Match prefix, so we could match January, etc
+ if (StrNCaseCmp(token.c_str(), kMonths[i], 3) == 0) {
+ exploded.month = i + 1;
+ found_month = true;
+ break;
+ }
+ }
+ } else {
+ // If we've gotten here, it means we've already found and parsed our
+ // month, and we have another string, which we would expect to be the
+ // the time zone name. According to the RFC and my experiments with
+ // how sites format their expirations, we don't have much of a reason
+ // to support timezones. We don't want to ever barf on user input,
+ // but this DCHECK should pass for well-formed data.
+ // DCHECK(token == "GMT");
+ }
+ // Numeric field w/ a colon
+ } else if (token.find(':') != std::string::npos) {
+ if (!found_time &&
+ sscanf_s(token.c_str(), "%2hu:%2hu:%2hu", &exploded.hour,
+ &exploded.minute, &exploded.second) == 3) {
+ found_time = true;
+ } else {
+ // We should only ever encounter one time-like thing. If we're here,
+ // it means we've found a second, which shouldn't happen. We keep
+ // the first. This check should be ok for well-formed input:
+ // NOTREACHED();
+ }
+ // Numeric field
+ } else {
+ // Overflow with atoi() is unspecified, so we enforce a max length.
+ if (!found_day_of_month && token.length() <= 2) {
+ exploded.day_of_month = atoi(token.c_str());
+ found_day_of_month = true;
+ } else if (!found_year && token.length() <= 5) {
+ exploded.year = atoi(token.c_str());
+ found_year = true;
+ } else {
+ // If we're here, it means we've either found an extra numeric field,
+ // or a numeric field which was too long. For well-formed input, the
+ // following check would be reasonable:
+ // NOTREACHED();
+ }
+ }
+ }
+
+ if (!found_day_of_month || !found_month || !found_time || !found_year) {
+ // We didn't find all of the fields we need. For well-formed input, the
+ // following check would be reasonable:
+ // NOTREACHED() << "Cookie parse expiration failed: " << time_string;
+ return Time();
+ }
+
+ // Normalize the year to expand abbreviated years to the full year.
+ if (exploded.year >= 69 && exploded.year <= 99)
+ exploded.year += 1900;
+ if (exploded.year >= 0 && exploded.year <= 68)
+ exploded.year += 2000;
+
+ // If our values are within their correct ranges, we got our time.
+ if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 &&
+ exploded.month >= 1 && exploded.month <= 12 &&
+ exploded.year >= 1601 && exploded.year <= 30827 &&
+ exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) {
+ return Time::FromUTCExploded(exploded);
+ }
+
+ // One of our values was out of expected range. For well-formed input,
+ // the following check would be reasonable:
+ // NOTREACHED() << "Cookie exploded expiration failed: " << time_string;
+
+ return Time();
+}
+
+// Determine the cookie domain key to use for setting the specified cookie.
+// On success returns true, and sets cookie_domain_key to either a
+// -host cookie key (ex: "google.com")
+// -domain cookie key (ex: ".google.com")
+static bool GetCookieDomainKey(const GURL& url,
+ const CookieMonster::ParsedCookie& pc,
+ std::string* cookie_domain_key) {
+ const std::string url_host(url.host());
+ if (!pc.HasDomain() || pc.Domain().empty()) {
+ // No domain was specified in cookie -- default to host cookie.
+ *cookie_domain_key = url_host;
+ DCHECK((*cookie_domain_key)[0] != '.');
+ return true;
+ }
+
+ // Get the normalized domain specified in cookie line.
+ // Note: The RFC says we can reject a cookie if the domain
+ // attribute does not start with a dot. IE/FF/Safari however, allow a cookie
+ // of the form domain=my.domain.com, treating it the same as
+ // domain=.my.domain.com -- for compatibility we do the same here. Firefox
+ // also treats domain=.....my.domain.com like domain=.my.domain.com, but
+ // neither IE nor Safari do this, and we don't either.
+ std::string cookie_domain(net_util::CanonicalizeHost(pc.Domain(), NULL));
+ if (cookie_domain.empty())
+ return false;
+ if (cookie_domain[0] != '.')
+ cookie_domain = "." + cookie_domain;
+
+ // Ensure |url| and |cookie_domain| have the same domain+registry.
+ const std::string url_domain_and_registry(
+ RegistryControlledDomainService::GetDomainAndRegistry(url));
+ if (url_domain_and_registry.empty())
+ return false; // IP addresses/intranet hosts can't set domain cookies.
+ const std::string cookie_domain_and_registry(
+ RegistryControlledDomainService::GetDomainAndRegistry(cookie_domain));
+ if (url_domain_and_registry != cookie_domain_and_registry)
+ return false; // Can't set a cookie on a different domain + registry.
+
+ // Ensure |url_host| is |cookie_domain| or one of its subdomains. Given that
+ // we know the domain+registry are the same from the above checks, this is
+ // basically a simple string suffix check.
+ if ((url_host.length() < cookie_domain.length()) ?
+ (cookie_domain != ("." + url_host)) :
+ url_host.compare(url_host.length() - cookie_domain.length(),
+ cookie_domain.length(), cookie_domain))
+ return false;
+
+
+ *cookie_domain_key = cookie_domain;
+ return true;
+}
+
+static std::string CanonPath(const GURL& url,
+ const CookieMonster::ParsedCookie& pc) {
+ // The RFC says the path should be a prefix of the current URL path.
+ // However, Mozilla allows you to set any path for compatibility with
+ // broken websites. We unfortunately will mimic this behavior. We try
+ // to be generous and accept cookies with an invalid path attribute, and
+ // default the path to something reasonable.
+
+ // The path was supplied in the cookie, we'll take it.
+ if (pc.HasPath() && !pc.Path().empty() && pc.Path()[0] == '/')
+ return pc.Path();
+
+ // The path was not supplied in the cookie or invalid, we will default
+ // to the current URL path.
+ // """Defaults to the path of the request URL that generated the
+ // Set-Cookie response, up to, but not including, the
+ // right-most /."""
+ // How would this work for a cookie on /? We will include it then.
+ const std::string& url_path = url.path();
+
+ std::string::size_type idx = url_path.find_last_of('/');
+
+ // The cookie path was invalid or a single '/'.
+ if (idx == 0 || idx == std::string::npos)
+ return std::string("/");
+
+ // Return up to the rightmost '/'.
+ return url_path.substr(0, idx);
+}
+
+static Time CanonExpiration(const CookieMonster::ParsedCookie& pc,
+ const Time& current) {
+ // First, try the Max-Age attribute.
+ uint64 max_age = 0;
+ if (pc.HasMaxAge() &&
+ sscanf_s(pc.MaxAge().c_str(), " %I64u", &max_age) == 1) {
+ return current + TimeDelta::FromSeconds(max_age);
+ }
+
+ // Try the Expires attribute.
+ if (pc.HasExpires())
+ return CookieMonster::ParseCookieTime(pc.Expires());
+
+ // Invalid or no expiration, persistent cookie.
+ return Time();
+}
+
+static bool HasCookieableScheme(const GURL& url) {
+ static const char* kCookieableSchemes[] = { "http", "https", "file" };
+ static const int kCookieableSchemesLen = arraysize(kCookieableSchemes);
+ static const int kCookieableSchemesFileIndex = 2;
+
+ // Make sure the request is on a cookie-able url scheme.
+ for (int i = 0; i < kCookieableSchemesLen; ++i) {
+ // We matched a scheme.
+ if (url.SchemeIs(kCookieableSchemes[i])) {
+ // This is file:// scheme
+ if (i == kCookieableSchemesFileIndex)
+ return CookieMonster::enable_file_scheme_;
+ // We've matched a supported scheme.
+ return true;
+ }
+ }
+
+ // The scheme didn't match any in our whitelist.
+ COOKIE_DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme();
+ return false;
+}
+
+bool CookieMonster::SetCookie(const GURL& url,
+ const std::string& cookie_line) {
+ Time creation_date = CurrentTime();
+ last_time_seen_ = creation_date;
+ return SetCookieWithCreationTime(url, cookie_line, creation_date);
+}
+
+bool CookieMonster::SetCookieWithCreationTime(const GURL& url,
+ const std::string& cookie_line,
+ const Time& creation_time) {
+ DCHECK(!creation_time.is_null());
+
+ if (!HasCookieableScheme(url)) {
+ DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme();
+ return false;
+ }
+
+ AutoLock autolock(lock_);
+ InitIfNecessary();
+
+ COOKIE_DLOG(INFO) << "SetCookie() line: " << cookie_line;
+
+ // Parse the cookie.
+ ParsedCookie pc(cookie_line);
+
+ if (!pc.IsValid()) {
+ COOKIE_DLOG(WARNING) << "Couldn't parse cookie";
+ return false;
+ }
+
+ std::string cookie_domain;
+ if (!GetCookieDomainKey(url, pc, &cookie_domain)) {
+ return false;
+ }
+
+ std::string cookie_path = CanonPath(url, pc);
+
+ scoped_ptr<CanonicalCookie> cc;
+ Time cookie_expires = CanonExpiration(pc, creation_time);
+
+ cc.reset(new CanonicalCookie(pc.Name(), pc.Value(), cookie_path,
+ pc.IsSecure(), pc.IsHttpOnly(),
+ creation_time, !cookie_expires.is_null(),
+ cookie_expires));
+
+ if (!cc.get()) {
+ COOKIE_DLOG(WARNING) << "Failed to allocate CanonicalCookie";
+ return false;
+ }
+
+ // We should have only purged at most one matching cookie.
+ int num_deleted = DeleteEquivalentCookies(cookie_domain, *cc);
+
+ COOKIE_DLOG(INFO) << "SetCookie() cc: " << cc->DebugString();
+
+ // Realize that we might be setting an expired cookie, and the only point
+ // was to delete the cookie which we've already done.
+ if (!cc->IsExpired(creation_time))
+ InternalInsertCookie(cookie_domain, cc.release(), true);
+
+ // We assume that hopefully setting a cookie will be less common than
+ // querying a cookie. Since setting a cookie can put us over our limits,
+ // make sure that we garbage collect... We can also make the assumption that
+ // if a cookie was set, in the common case it will be used soon after,
+ // and we will purge the expired cookies in GetCookies().
+ GarbageCollect(creation_time, cookie_domain);
+
+ return true;
+}
+
+void CookieMonster::SetCookies(const GURL& url,
+ const std::vector<std::string>& cookies) {
+ for (std::vector<std::string>::const_iterator iter = cookies.begin();
+ iter != cookies.end(); ++iter)
+ SetCookie(url, *iter);
+}
+
+void CookieMonster::InternalInsertCookie(const std::string& key,
+ CanonicalCookie* cc,
+ bool sync_to_store) {
+ if (cc->IsPersistent() && store_ && sync_to_store)
+ store_->AddCookie(key, *cc);
+ cookies_.insert(CookieMap::value_type(key, cc));
+}
+
+void CookieMonster::InternalDeleteCookie(CookieMap::iterator it,
+ bool sync_to_store) {
+ CanonicalCookie* cc = it->second;
+ COOKIE_DLOG(INFO) << "InternalDeleteCookie() cc: " << cc->DebugString();
+ if (cc->IsPersistent() && store_ && sync_to_store)
+ store_->DeleteCookie(*cc);
+ cookies_.erase(it);
+ delete cc;
+}
+
+int CookieMonster::DeleteEquivalentCookies(const std::string& key,
+ const CanonicalCookie& ecc) {
+ int num_deleted = 0;
+ for (CookieMapItPair its = cookies_.equal_range(key);
+ its.first != its.second; ) {
+ CookieMap::iterator curit = its.first;
+ CanonicalCookie* cc = curit->second;
+ ++its.first;
+
+ // TODO while we're here, we might as well purge expired cookies too.
+
+ if (ecc.IsEquivalent(*cc)) {
+ InternalDeleteCookie(curit, true);
+ ++num_deleted;
+#ifdef NDEBUG
+ // We should only ever find a single equivalent cookie
+ break;
+#endif
+ }
+ }
+
+ // Our internal state should be consistent, we should never have more
+ // than one equivalent cookie, since they should overwrite each other.
+ DCHECK(num_deleted <= 1);
+
+ return num_deleted;
+}
+
+// TODO we should be sorting by last access time, however, right now
+// we're not saving an access time, so we're sorting by creation time.
+static bool OldestCookieSorter(const CookieMonster::CookieMap::iterator& it1,
+ const CookieMonster::CookieMap::iterator& it2) {
+ return it1->second->CreationDate() < it2->second->CreationDate();
+}
+
+// is vector::size_type always going to be size_t?
+int CookieMonster::GarbageCollectRange(const Time& current,
+ const CookieMapItPair& itpair,
+ size_t num_max, size_t num_purge) {
+ int num_deleted = 0;
+
+ // First, walk through and delete anything that's expired.
+ // Save a list of iterators to the ones that weren't expired
+ std::vector<CookieMap::iterator> cookie_its;
+ for (CookieMap::iterator it = itpair.first, end = itpair.second; it != end;) {
+ CookieMap::iterator curit = it;
+ CanonicalCookie* cc = curit->second;
+ ++it;
+
+ if (cc->IsExpired(current)) {
+ InternalDeleteCookie(curit, true);
+ ++num_deleted;
+ } else {
+ cookie_its.push_back(curit);
+ }
+ }
+
+ if (cookie_its.size() > num_max) {
+ COOKIE_DLOG(INFO) << "GarbageCollectRange() Deep Garbage Collect.";
+ num_purge += cookie_its.size() - num_max;
+ // Sort the top N we want to purge.
+ std::partial_sort(cookie_its.begin(), cookie_its.begin() + num_purge,
+ cookie_its.end(), OldestCookieSorter);
+
+ // TODO should probably use an iterator and not an index.
+ for (size_t i = 0; i < num_purge; ++i) {
+ InternalDeleteCookie(cookie_its[i], true);
+ ++num_deleted;
+ }
+ }
+
+ return num_deleted;
+}
+
+// TODO Whenever we delete, check last_cur_utc_...
+int CookieMonster::GarbageCollect(const Time& current,
+ const std::string& key) {
+ // Based off of the Mozilla defaults
+ // It might seem scary to have a high purge value, but really it's not. You
+ // just make sure that you increase the max to cover the increase in purge,
+ // and we would have been purging the same amount of cookies. We're just
+ // going through the garbage collection process less often.
+ static const size_t kNumCookiesPerHost = 70; // ~50 cookies
+ static const size_t kNumCookiesPerHostPurge = 20;
+ static const size_t kNumCookiesTotal = 1100; // ~1000 cookies
+ static const size_t kNumCookiesTotalPurge = 100;
+
+ int num_deleted = 0;
+
+ // Collect garbage for this key.
+ if (cookies_.count(key) > kNumCookiesPerHost) {
+ COOKIE_DLOG(INFO) << "GarbageCollect() key: " << key;
+ num_deleted += GarbageCollectRange(current, cookies_.equal_range(key),
+ kNumCookiesPerHost,
+ kNumCookiesPerHostPurge);
+ }
+
+ // Collect garbage for everything.
+ if (cookies_.size() > kNumCookiesTotal) {
+ COOKIE_DLOG(INFO) << "GarbageCollect() everything";
+ num_deleted += GarbageCollectRange(current,
+ CookieMapItPair(cookies_.begin(),
+ cookies_.end()),
+ kNumCookiesTotal, kNumCookiesTotalPurge);
+ }
+
+ return num_deleted;
+}
+
+int CookieMonster::DeleteAll(bool sync_to_store) {
+ AutoLock autolock(lock_);
+ InitIfNecessary();
+
+ int num_deleted = 0;
+ for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) {
+ CookieMap::iterator curit = it;
+ ++it;
+ InternalDeleteCookie(curit, sync_to_store);
+ ++num_deleted;
+ }
+
+ return num_deleted;
+}
+
+int CookieMonster::DeleteAllCreatedBetween(const Time& delete_begin,
+ const Time& delete_end,
+ bool sync_to_store) {
+ AutoLock autolock(lock_);
+ InitIfNecessary();
+
+ int num_deleted = 0;
+ for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) {
+ CookieMap::iterator curit = it;
+ CanonicalCookie* cc = curit->second;
+ ++it;
+
+ if (cc->CreationDate() >= delete_begin &&
+ (delete_end.is_null() || cc->CreationDate() < delete_end)) {
+ InternalDeleteCookie(curit, sync_to_store);
+ ++num_deleted;
+ }
+ }
+
+ return num_deleted;
+}
+
+int CookieMonster::DeleteAllCreatedAfter(const Time& delete_begin,
+ bool sync_to_store) {
+ return DeleteAllCreatedBetween(delete_begin, Time(), sync_to_store);
+}
+
+bool CookieMonster::DeleteCookie(const std::string& domain,
+ const CanonicalCookie& cookie,
+ bool sync_to_store) {
+ AutoLock autolock(lock_);
+ InitIfNecessary();
+
+ for (CookieMapItPair its = cookies_.equal_range(domain);
+ its.first != its.second; ++its.first) {
+ // The creation date acts as our unique index...
+ if (its.first->second->CreationDate() == cookie.CreationDate()) {
+ InternalDeleteCookie(its.first, sync_to_store);
+ return true;
+ }
+ }
+ return false;
+}
+
+// Mozilla sorts on the path length (longest first), and then it
+// sorts by creation time (oldest first).
+// The RFC says the sort order for the domain attribute is undefined.
+static bool CookieSorter(CookieMonster::CanonicalCookie* cc1,
+ CookieMonster::CanonicalCookie* cc2) {
+ if (cc1->Path().length() == cc2->Path().length())
+ return cc1->CreationDate() < cc2->CreationDate();
+ return cc1->Path().length() > cc2->Path().length();
+}
+
+std::string CookieMonster::GetCookies(const GURL& url) {
+ return GetCookiesWithOptions(url, NORMAL);
+}
+
+// Currently our cookie datastructure is based on Mozilla's approach. We have a
+// hash keyed on the cookie's domain, and for any query we walk down the domain
+// components and probe for cookies until we reach the TLD, where we stop.
+// For example, a.b.blah.com, we would probe
+// - a.b.blah.com
+// - .a.b.blah.com (TODO should we check this first or second?)
+// - .b.blah.com
+// - .blah.com
+// There are some alternative datastructures we could try, like a
+// search/prefix trie, where we reverse the hostname and query for all
+// keys that are a prefix of our hostname. I think the hash probing
+// should be fast and simple enough for now.
+std::string CookieMonster::GetCookiesWithOptions(const GURL& url,
+ CookieOptions options) {
+ if (!HasCookieableScheme(url)) {
+ DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme();
+ return std::string();
+ }
+
+ // Get the cookies for this host and its domain(s).
+ std::vector<CanonicalCookie*> cookies;
+ FindCookiesForHostAndDomain(url, options, &cookies);
+ std::sort(cookies.begin(), cookies.end(), CookieSorter);
+
+ std::string cookie_line;
+ for (std::vector<CanonicalCookie*>::const_iterator it = cookies.begin();
+ it != cookies.end(); ++it) {
+ if (it != cookies.begin())
+ cookie_line += "; ";
+ // In Mozilla if you set a cookie like AAAA, it will have an empty token
+ // and a value of AAAA. When it sends the cookie back, it will send AAAA,
+ // so we need to avoid sending =AAAA for a blank token value.
+ if (!(*it)->Name().empty())
+ cookie_line += (*it)->Name() + "=";
+ cookie_line += (*it)->Value();
+ }
+
+ COOKIE_DLOG(INFO) << "GetCookies() result: " << cookie_line;
+
+ return cookie_line;
+}
+
+// TODO(deanm): We could have expired cookies that haven't been purged yet,
+// and exporting these would be inaccurate, for example in the cookie manager
+// it might show cookies that are actually expired already. We should do
+// a full garbage collection before ... There actually isn't a way to do
+// this right now (a forceful full GC), so we'll have to live with the
+// possibility of showing the user expired cookies. This shouldn't be very
+// common since most persistent cookies have a long lifetime.
+CookieMonster::CookieList CookieMonster::GetAllCookies() {
+ AutoLock autolock(lock_);
+ InitIfNecessary();
+
+ CookieList cookie_list;
+
+ for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end(); ++it) {
+ cookie_list.push_back(CookieListPair(it->first, *it->second));
+ }
+
+ return cookie_list;
+}
+
+void CookieMonster::FindCookiesForHostAndDomain(
+ const GURL& url,
+ CookieOptions options,
+ std::vector<CanonicalCookie*>* cookies) {
+ AutoLock autolock(lock_);
+ InitIfNecessary();
+
+ const Time current_time(CurrentTime());
+
+ // Query for the full host, For example: 'a.c.blah.com'.
+ std::string key(url.host());
+ FindCookiesForKey(key, url, options, current_time, cookies);
+
+ // See if we can search for domain cookies, i.e. if the host has a TLD + 1.
+ const std::string domain(
+ RegistryControlledDomainService::GetDomainAndRegistry(key));
+ if (domain.empty())
+ return;
+ DCHECK_LE(domain.length(), key.length());
+ DCHECK_EQ(0, key.compare(key.length() - domain.length(), domain.length(),
+ domain));
+
+ // Walk through the string and query at the dot points (GURL should have
+ // canonicalized the dots, so this should be safe). Stop once we reach the
+ // domain + registry; we can't write cookies past this point, and with some
+ // registrars other domains can, in which case we don't want to read their
+ // cookies.
+ for (key = "." + key; key.length() > domain.length(); ) {
+ FindCookiesForKey(key, url, options, current_time, cookies);
+ const size_t next_dot = key.find('.', 1); // Skip over leading dot.
+ key.erase(0, next_dot);
+ }
+}
+
+void CookieMonster::FindCookiesForKey(
+ const std::string& key,
+ const GURL& url,
+ CookieOptions options,
+ const Time& current,
+ std::vector<CanonicalCookie*>* cookies) {
+ bool secure = url.SchemeIsSecure();
+
+ for (CookieMapItPair its = cookies_.equal_range(key);
+ its.first != its.second; ) {
+ CookieMap::iterator curit = its.first;
+ CanonicalCookie* cc = curit->second;
+ ++its.first;
+
+ // If the cookie is expired, delete it.
+ if (cc->IsExpired(current)) {
+ InternalDeleteCookie(curit, true);
+ continue;
+ }
+
+ // Filter out HttpOnly cookies unless they where explicitly requested.
+ if ((options & INCLUDE_HTTPONLY) == 0 && cc->IsHttpOnly())
+ continue;
+
+ // Filter out secure cookies unless we're https.
+ if (!secure && cc->IsSecure())
+ continue;
+
+ if (!cc->IsOnPath(url.path()))
+ continue;
+
+ // Congratulations Charlie, you passed the test!
+ cookies->push_back(cc);
+ }
+}
+
+
+CookieMonster::ParsedCookie::ParsedCookie(const std::string& cookie_line)
+ : is_valid_(false),
+ path_index_(0),
+ domain_index_(0),
+ expires_index_(0),
+ maxage_index_(0),
+ secure_index_(0),
+ httponly_index_(0) {
+
+ if (cookie_line.size() > kMaxCookieSize) {
+ LOG(INFO) << "Not parsing cookie, too large: " << cookie_line.size();
+ return;
+ }
+
+ ParseTokenValuePairs(cookie_line);
+ if (pairs_.size() > 0) {
+ is_valid_ = true;
+ SetupAttributes();
+ }
+}
+
+// Returns true if |c| occurs in |chars|
+// TODO maybe make this take an iterator, could check for end also?
+static inline bool CharIsA(const char c, const char* chars) {
+ return strchr(chars, c) != NULL;
+}
+// Seek the iterator to the first occurrence of a character in |chars|.
+// Returns true if it hit the end, false otherwise.
+static inline bool SeekTo(std::string::const_iterator* it,
+ const std::string::const_iterator& end,
+ const char* chars) {
+ for (; *it != end && !CharIsA(**it, chars); ++(*it));
+ return *it == end;
+}
+// Seek the iterator to the first occurrence of a character not in |chars|.
+// Returns true if it hit the end, false otherwise.
+static inline bool SeekPast(std::string::const_iterator* it,
+ const std::string::const_iterator& end,
+ const char* chars) {
+ for (; *it != end && CharIsA(**it, chars); ++(*it));
+ return *it == end;
+}
+static inline bool SeekBackPast(std::string::const_iterator* it,
+ const std::string::const_iterator& end,
+ const char* chars) {
+ for (; *it != end && CharIsA(**it, chars); --(*it));
+ return *it == end;
+}
+
+// Parse all token/value pairs and populate pairs_.
+void CookieMonster::ParsedCookie::ParseTokenValuePairs(
+ const std::string& cookie_line) {
+ static const char kTerminator[] = "\n\r\0";
+ static const int kTerminatorLen = sizeof(kTerminator) - 1;
+ static const char kWhitespace[] = " \t";
+ static const char kQuoteTerminator[] = "\"";
+ static const char kValueSeparator[] = ";";
+ static const char kTokenSeparator[] = ";=";
+
+ pairs_.clear();
+
+ // Ok, here we go. We should be expecting to be starting somewhere
+ // before the cookie line, not including any header name...
+ std::string::const_iterator start = cookie_line.begin();
+ std::string::const_iterator end = cookie_line.end();
+ std::string::const_iterator it = start;
+
+ // TODO Make sure we're stripping \r\n in the network code. Then we
+ // can log any unexpected terminators.
+ std::string::size_type term_pos = cookie_line.find_first_of(
+ std::string(kTerminator, kTerminatorLen));
+ if (term_pos != std::string::npos) {
+ // We found a character we should treat as an end of string.
+ end = start + term_pos;
+ }
+
+ for (int pair_num = 0; pair_num < kMaxPairs && it != end; ++pair_num) {
+ TokenValuePair pair;
+ std::string::const_iterator token_start, token_real_end, token_end;
+
+ // Seek past any whitespace before the "token" (the name).
+ // token_start should point at the first character in the token
+ if (SeekPast(&it, end, kWhitespace))
+ break; // No token, whitespace or empty.
+ token_start = it;
+
+ // Seek over the token, to the token separator.
+ // token_real_end should point at the token separator, i.e. '='.
+ // If it == end after the seek, we probably have a token-value.
+ SeekTo(&it, end, kTokenSeparator);
+ token_real_end = it;
+
+ // Ignore any whitespace between the token and the token separator.
+ // token_end should point after the last interesting token character,
+ // pointing at either whitespace, or at '=' (and equal to token_real_end).
+ if (it != token_start) { // We could have an empty token name.
+ --it; // Go back before the token separator.
+ // Skip over any whitespace to the first non-whitespace character.
+ SeekBackPast(&it, token_start, kWhitespace);
+ // Point after it.
+ ++it;
+ }
+ token_end = it;
+
+ // Seek us back to the end of the token.
+ it = token_real_end;
+
+ if (it == end || *it != '=') {
+ // We have a token-value, we didn't have any token name.
+ if (pair_num == 0) {
+ // For the first time around, we want to treat single values
+ // as a value with an empty name. (Mozilla bug 169091).
+ // IE seems to also have this behavior, ex "AAA", and "AAA=10" will
+ // set 2 different cookies, and setting "BBB" will then replace "AAA".
+ pair.first = "";
+ // Rewind to the beginning of what we thought was the token name,
+ // and let it get parsed as a value.
+ it = token_start;
+ } else {
+ // Any not-first attribute we want to treat a value as a
+ // name with an empty value... This is so something like
+ // "secure;" will get parsed as a Token name, and not a value.
+ pair.first = std::string(token_start, token_end);
+ }
+ } else {
+ // We have a TOKEN=VALUE.
+ pair.first = std::string(token_start, token_end);
+ ++it; // Skip past the '='.
+ }
+
+ // OK, now try to parse a value.
+ std::string::const_iterator value_start, value_end;
+
+ // Seek past any whitespace that might in-between the token and value.
+ SeekPast(&it, end, kWhitespace);
+ // value_start should point at the first character of the value.
+ value_start = it;
+
+ // The value is double quoted, process <quoted-string>.
+ if (it != end && *it == '"') {
+ // Skip over the first double quote, and parse until
+ // a terminating double quote or the end.
+ for (++it; it != end && !CharIsA(*it, kQuoteTerminator); ++it) {
+ // Allow an escaped \" in a double quoted string.
+ if (*it == '\\') {
+ ++it;
+ if (it == end)
+ break;
+ }
+ }
+
+ SeekTo(&it, end, kValueSeparator);
+ // We could seek to the end, that's ok.
+ value_end = it;
+ } else {
+ // The value is non-quoted, process <token-value>.
+ // Just look for ';' to terminate ('=' allowed).
+ // We can hit the end, maybe they didn't terminate.
+ SeekTo(&it, end, kValueSeparator);
+
+ // Ignore any whitespace between the value and the value separator
+ if (it != value_start) { // Could have an empty value
+ --it;
+ SeekBackPast(&it, value_start, kWhitespace);
+ ++it;
+ }
+
+ value_end = it;
+ }
+
+ // OK, we're finished with a Token/Value.
+ pair.second = std::string(value_start, value_end);
+ // From RFC2109: "Attributes (names) (attr) are case-insensitive."
+ if (pair_num != 0)
+ StringToLowerASCII(&pair.first);
+ pairs_.push_back(pair);
+
+ // We've processed a token/value pair, we're either at the end of
+ // the string or a ValueSeparator like ';', which we want to skip.
+ if (it != end)
+ ++it;
+ }
+}
+
+void CookieMonster::ParsedCookie::SetupAttributes() {
+ static const char kPathTokenName[] = "path";
+ static const char kDomainTokenName[] = "domain";
+ static const char kExpiresTokenName[] = "expires";
+ static const char kMaxAgeTokenName[] = "max-age";
+ static const char kSecureTokenName[] = "secure";
+ static const char kHttpOnlyTokenName[] = "httponly";
+
+ // We skip over the first token/value, the user supplied one.
+ for (size_t i = 1; i < pairs_.size(); ++i) {
+ if (pairs_[i].first == kPathTokenName)
+ path_index_ = i;
+ else if (pairs_[i].first == kDomainTokenName)
+ domain_index_ = i;
+ else if (pairs_[i].first == kExpiresTokenName)
+ expires_index_ = i;
+ else if (pairs_[i].first == kMaxAgeTokenName)
+ maxage_index_ = i;
+ else if (pairs_[i].first == kSecureTokenName)
+ secure_index_ = i;
+ else if (pairs_[i].first == kHttpOnlyTokenName)
+ httponly_index_ = i;
+ else { /* some attribute we don't know or don't care about. */ }
+ }
+}
+
+// Create a cookie-line for the cookie. For debugging only!
+// If we want to use this for something more than debugging, we
+// should rewrite it better...
+std::string CookieMonster::ParsedCookie::DebugString() const {
+ std::string out;
+ for (PairList::const_iterator it = pairs_.begin();
+ it != pairs_.end(); ++it) {
+ out.append(it->first);
+ out.append("=");
+ out.append(it->second);
+ out.append("; ");
+ }
+ return out;
+}
+
+bool CookieMonster::CanonicalCookie::IsOnPath(
+ const std::string& url_path) const {
+
+ // A zero length would be unsafe for our trailing '/' checks, and
+ // would also make no sense for our prefix match. The code that
+ // creates a CanonicalCookie should make sure the path is never zero length,
+ // but we double check anyway.
+ if (path_.empty())
+ return false;
+
+ // The Mozilla code broke it into 3 cases, if it's strings lengths
+ // are less than, equal, or greater. I think this is simpler:
+
+ // Make sure the cookie path is a prefix of the url path. If the
+ // url path is shorter than the cookie path, then the cookie path
+ // can't be a prefix.
+ if (url_path.find(path_) != 0)
+ return false;
+
+ // Now we know that url_path is >= cookie_path, and that cookie_path
+ // is a prefix of url_path. If they are the are the same length then
+ // they are identical, otherwise we need an additional check:
+
+ // In order to avoid in correctly matching a cookie path of /blah
+ // with a request path of '/blahblah/', we need to make sure that either
+ // the cookie path ends in a trailing '/', or that we prefix up to a '/'
+ // in the url path. Since we know that the url path length is greater
+ // than the cookie path length, it's safe to index one byte past.
+ if (path_.length() != url_path.length() &&
+ path_[path_.length() - 1] != '/' &&
+ url_path[path_.length()] != '/')
+ return false;
+
+ return true;
+}
+
+std::string CookieMonster::CanonicalCookie::DebugString() const {
+ return StringPrintf("name: %s value: %s path: %s creation: %llu",
+ name_.c_str(), value_.c_str(), path_.c_str(),
+ creation_date_.ToTimeT());
+}