diff options
author | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-26 22:42:52 +0000 |
---|---|---|
committer | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-26 22:42:52 +0000 |
commit | 586acc5fe142f498261f52c66862fa417c3d52d2 (patch) | |
tree | c98b3417a883f2477029c8cd5888f4078681e24e /net/base/cookie_monster.cc | |
parent | a814a8d55429605fe6d7045045cd25b6bf624580 (diff) | |
download | chromium_src-586acc5fe142f498261f52c66862fa417c3d52d2.zip chromium_src-586acc5fe142f498261f52c66862fa417c3d52d2.tar.gz chromium_src-586acc5fe142f498261f52c66862fa417c3d52d2.tar.bz2 |
Add net to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@14 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base/cookie_monster.cc')
-rw-r--r-- | net/base/cookie_monster.cc | 1043 |
1 files changed, 1043 insertions, 0 deletions
diff --git a/net/base/cookie_monster.cc b/net/base/cookie_monster.cc new file mode 100644 index 0000000..0483acb --- /dev/null +++ b/net/base/cookie_monster.cc @@ -0,0 +1,1043 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Portions of this code based on Mozilla: +// (netwerk/cookie/src/nsCookieService.cpp) +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is mozilla.org code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 2003 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Daniel Witte (dwitte@stanford.edu) + * Michiel van Leeuwen (mvl@exedo.nl) + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "net/base/cookie_monster.h" + +#include <algorithm> + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/scoped_ptr.h" +#include "base/string_tokenizer.h" +#include "base/string_util.h" +#include "googleurl/src/gurl.h" +#include "googleurl/src/url_canon.h" +#include "net/base/net_util.h" +#include "net/base/registry_controlled_domain.h" + +// #define COOKIE_LOGGING_ENABLED +#ifdef COOKIE_LOGGING_ENABLED +#define COOKIE_DLOG(severity) DLOG_IF(INFO, 1) +#else +#define COOKIE_DLOG(severity) DLOG_IF(INFO, 0) +#endif + +/*static*/ bool CookieMonster::enable_file_scheme_ = false; + +// static +void CookieMonster::EnableFileScheme() { + enable_file_scheme_ = true; +} + +CookieMonster::CookieMonster() + : initialized_(false), + store_(NULL) { +} + +CookieMonster::CookieMonster(PersistentCookieStore* store) + : initialized_(false), + store_(store) { +} + +CookieMonster::~CookieMonster() { + DeleteAll(false); +} + +void CookieMonster::InitStore() { + DCHECK(store_) << "Store must exist to initialize"; + + // Initialize the store and sync in any saved persistent cookies. We don't + // care if it's expired, insert it so it can be garbage collected, removed, + // and sync'd. + std::vector<KeyedCanonicalCookie> cookies; + store_->Load(&cookies); + for (std::vector<KeyedCanonicalCookie>::const_iterator it = cookies.begin(); + it != cookies.end(); ++it) { + InternalInsertCookie(it->first, it->second, false); + } +} + +// The system resolution is not high enough, so we can have multiple +// set cookies that result in the same system time. When this happens, we +// increment by one Time unit. Let's hope computers don't get too fast. +Time CookieMonster::CurrentTime() { + return std::max(Time::Now(), + Time::FromInternalValue(last_time_seen_.ToInternalValue() + 1)); +} + +// Parse a cookie expiration time. We try to be lenient, but we need to +// assume some order to distinguish the fields. The basic rules: +// - The month name must be present and prefix the first 3 letters of the +// full month name (jan for January, jun for June). +// - If the year is <= 2 digits, it must occur after the day of month. +// - The time must be of the format hh:mm:ss. +// An average cookie expiration will look something like this: +// Sat, 15-Apr-17 21:01:22 GMT +Time CookieMonster::ParseCookieTime(const std::string& time_string) { + static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun", + "jul", "aug", "sep", "oct", "nov", "dec" }; + static const int kMonthsLen = arraysize(kMonths); + // We want to be pretty liberal, and support most non-ascii and non-digit + // characters as a delimiter. We can't treat : as a delimiter, because it + // is the delimiter for hh:mm:ss, and we want to keep this field together. + // We make sure to include - and +, since they could prefix numbers. + // If the cookie attribute came in in quotes (ex expires="XXX"), the quotes + // will be preserved, and we will get them here. So we make sure to include + // quote characters, and also \ for anything that was internally escaped. + static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{|}~"; + + Time::Exploded exploded = {0}; + + StringTokenizer tokenizer(time_string, kDelimiters); + + bool found_day_of_month = false; + bool found_month = false; + bool found_time = false; + bool found_year = false; + + while (tokenizer.GetNext()) { + const std::string token = tokenizer.token(); + DCHECK(!token.empty()); + bool numerical = IsAsciiDigit(token[0]); + + // String field + if (!numerical) { + if (!found_month) { + for (int i = 0; i < kMonthsLen; ++i) { + // Match prefix, so we could match January, etc + if (StrNCaseCmp(token.c_str(), kMonths[i], 3) == 0) { + exploded.month = i + 1; + found_month = true; + break; + } + } + } else { + // If we've gotten here, it means we've already found and parsed our + // month, and we have another string, which we would expect to be the + // the time zone name. According to the RFC and my experiments with + // how sites format their expirations, we don't have much of a reason + // to support timezones. We don't want to ever barf on user input, + // but this DCHECK should pass for well-formed data. + // DCHECK(token == "GMT"); + } + // Numeric field w/ a colon + } else if (token.find(':') != std::string::npos) { + if (!found_time && + sscanf_s(token.c_str(), "%2hu:%2hu:%2hu", &exploded.hour, + &exploded.minute, &exploded.second) == 3) { + found_time = true; + } else { + // We should only ever encounter one time-like thing. If we're here, + // it means we've found a second, which shouldn't happen. We keep + // the first. This check should be ok for well-formed input: + // NOTREACHED(); + } + // Numeric field + } else { + // Overflow with atoi() is unspecified, so we enforce a max length. + if (!found_day_of_month && token.length() <= 2) { + exploded.day_of_month = atoi(token.c_str()); + found_day_of_month = true; + } else if (!found_year && token.length() <= 5) { + exploded.year = atoi(token.c_str()); + found_year = true; + } else { + // If we're here, it means we've either found an extra numeric field, + // or a numeric field which was too long. For well-formed input, the + // following check would be reasonable: + // NOTREACHED(); + } + } + } + + if (!found_day_of_month || !found_month || !found_time || !found_year) { + // We didn't find all of the fields we need. For well-formed input, the + // following check would be reasonable: + // NOTREACHED() << "Cookie parse expiration failed: " << time_string; + return Time(); + } + + // Normalize the year to expand abbreviated years to the full year. + if (exploded.year >= 69 && exploded.year <= 99) + exploded.year += 1900; + if (exploded.year >= 0 && exploded.year <= 68) + exploded.year += 2000; + + // If our values are within their correct ranges, we got our time. + if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 && + exploded.month >= 1 && exploded.month <= 12 && + exploded.year >= 1601 && exploded.year <= 30827 && + exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) { + return Time::FromUTCExploded(exploded); + } + + // One of our values was out of expected range. For well-formed input, + // the following check would be reasonable: + // NOTREACHED() << "Cookie exploded expiration failed: " << time_string; + + return Time(); +} + +// Determine the cookie domain key to use for setting the specified cookie. +// On success returns true, and sets cookie_domain_key to either a +// -host cookie key (ex: "google.com") +// -domain cookie key (ex: ".google.com") +static bool GetCookieDomainKey(const GURL& url, + const CookieMonster::ParsedCookie& pc, + std::string* cookie_domain_key) { + const std::string url_host(url.host()); + if (!pc.HasDomain() || pc.Domain().empty()) { + // No domain was specified in cookie -- default to host cookie. + *cookie_domain_key = url_host; + DCHECK((*cookie_domain_key)[0] != '.'); + return true; + } + + // Get the normalized domain specified in cookie line. + // Note: The RFC says we can reject a cookie if the domain + // attribute does not start with a dot. IE/FF/Safari however, allow a cookie + // of the form domain=my.domain.com, treating it the same as + // domain=.my.domain.com -- for compatibility we do the same here. Firefox + // also treats domain=.....my.domain.com like domain=.my.domain.com, but + // neither IE nor Safari do this, and we don't either. + std::string cookie_domain(net_util::CanonicalizeHost(pc.Domain(), NULL)); + if (cookie_domain.empty()) + return false; + if (cookie_domain[0] != '.') + cookie_domain = "." + cookie_domain; + + // Ensure |url| and |cookie_domain| have the same domain+registry. + const std::string url_domain_and_registry( + RegistryControlledDomainService::GetDomainAndRegistry(url)); + if (url_domain_and_registry.empty()) + return false; // IP addresses/intranet hosts can't set domain cookies. + const std::string cookie_domain_and_registry( + RegistryControlledDomainService::GetDomainAndRegistry(cookie_domain)); + if (url_domain_and_registry != cookie_domain_and_registry) + return false; // Can't set a cookie on a different domain + registry. + + // Ensure |url_host| is |cookie_domain| or one of its subdomains. Given that + // we know the domain+registry are the same from the above checks, this is + // basically a simple string suffix check. + if ((url_host.length() < cookie_domain.length()) ? + (cookie_domain != ("." + url_host)) : + url_host.compare(url_host.length() - cookie_domain.length(), + cookie_domain.length(), cookie_domain)) + return false; + + + *cookie_domain_key = cookie_domain; + return true; +} + +static std::string CanonPath(const GURL& url, + const CookieMonster::ParsedCookie& pc) { + // The RFC says the path should be a prefix of the current URL path. + // However, Mozilla allows you to set any path for compatibility with + // broken websites. We unfortunately will mimic this behavior. We try + // to be generous and accept cookies with an invalid path attribute, and + // default the path to something reasonable. + + // The path was supplied in the cookie, we'll take it. + if (pc.HasPath() && !pc.Path().empty() && pc.Path()[0] == '/') + return pc.Path(); + + // The path was not supplied in the cookie or invalid, we will default + // to the current URL path. + // """Defaults to the path of the request URL that generated the + // Set-Cookie response, up to, but not including, the + // right-most /.""" + // How would this work for a cookie on /? We will include it then. + const std::string& url_path = url.path(); + + std::string::size_type idx = url_path.find_last_of('/'); + + // The cookie path was invalid or a single '/'. + if (idx == 0 || idx == std::string::npos) + return std::string("/"); + + // Return up to the rightmost '/'. + return url_path.substr(0, idx); +} + +static Time CanonExpiration(const CookieMonster::ParsedCookie& pc, + const Time& current) { + // First, try the Max-Age attribute. + uint64 max_age = 0; + if (pc.HasMaxAge() && + sscanf_s(pc.MaxAge().c_str(), " %I64u", &max_age) == 1) { + return current + TimeDelta::FromSeconds(max_age); + } + + // Try the Expires attribute. + if (pc.HasExpires()) + return CookieMonster::ParseCookieTime(pc.Expires()); + + // Invalid or no expiration, persistent cookie. + return Time(); +} + +static bool HasCookieableScheme(const GURL& url) { + static const char* kCookieableSchemes[] = { "http", "https", "file" }; + static const int kCookieableSchemesLen = arraysize(kCookieableSchemes); + static const int kCookieableSchemesFileIndex = 2; + + // Make sure the request is on a cookie-able url scheme. + for (int i = 0; i < kCookieableSchemesLen; ++i) { + // We matched a scheme. + if (url.SchemeIs(kCookieableSchemes[i])) { + // This is file:// scheme + if (i == kCookieableSchemesFileIndex) + return CookieMonster::enable_file_scheme_; + // We've matched a supported scheme. + return true; + } + } + + // The scheme didn't match any in our whitelist. + COOKIE_DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme(); + return false; +} + +bool CookieMonster::SetCookie(const GURL& url, + const std::string& cookie_line) { + Time creation_date = CurrentTime(); + last_time_seen_ = creation_date; + return SetCookieWithCreationTime(url, cookie_line, creation_date); +} + +bool CookieMonster::SetCookieWithCreationTime(const GURL& url, + const std::string& cookie_line, + const Time& creation_time) { + DCHECK(!creation_time.is_null()); + + if (!HasCookieableScheme(url)) { + DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme(); + return false; + } + + AutoLock autolock(lock_); + InitIfNecessary(); + + COOKIE_DLOG(INFO) << "SetCookie() line: " << cookie_line; + + // Parse the cookie. + ParsedCookie pc(cookie_line); + + if (!pc.IsValid()) { + COOKIE_DLOG(WARNING) << "Couldn't parse cookie"; + return false; + } + + std::string cookie_domain; + if (!GetCookieDomainKey(url, pc, &cookie_domain)) { + return false; + } + + std::string cookie_path = CanonPath(url, pc); + + scoped_ptr<CanonicalCookie> cc; + Time cookie_expires = CanonExpiration(pc, creation_time); + + cc.reset(new CanonicalCookie(pc.Name(), pc.Value(), cookie_path, + pc.IsSecure(), pc.IsHttpOnly(), + creation_time, !cookie_expires.is_null(), + cookie_expires)); + + if (!cc.get()) { + COOKIE_DLOG(WARNING) << "Failed to allocate CanonicalCookie"; + return false; + } + + // We should have only purged at most one matching cookie. + int num_deleted = DeleteEquivalentCookies(cookie_domain, *cc); + + COOKIE_DLOG(INFO) << "SetCookie() cc: " << cc->DebugString(); + + // Realize that we might be setting an expired cookie, and the only point + // was to delete the cookie which we've already done. + if (!cc->IsExpired(creation_time)) + InternalInsertCookie(cookie_domain, cc.release(), true); + + // We assume that hopefully setting a cookie will be less common than + // querying a cookie. Since setting a cookie can put us over our limits, + // make sure that we garbage collect... We can also make the assumption that + // if a cookie was set, in the common case it will be used soon after, + // and we will purge the expired cookies in GetCookies(). + GarbageCollect(creation_time, cookie_domain); + + return true; +} + +void CookieMonster::SetCookies(const GURL& url, + const std::vector<std::string>& cookies) { + for (std::vector<std::string>::const_iterator iter = cookies.begin(); + iter != cookies.end(); ++iter) + SetCookie(url, *iter); +} + +void CookieMonster::InternalInsertCookie(const std::string& key, + CanonicalCookie* cc, + bool sync_to_store) { + if (cc->IsPersistent() && store_ && sync_to_store) + store_->AddCookie(key, *cc); + cookies_.insert(CookieMap::value_type(key, cc)); +} + +void CookieMonster::InternalDeleteCookie(CookieMap::iterator it, + bool sync_to_store) { + CanonicalCookie* cc = it->second; + COOKIE_DLOG(INFO) << "InternalDeleteCookie() cc: " << cc->DebugString(); + if (cc->IsPersistent() && store_ && sync_to_store) + store_->DeleteCookie(*cc); + cookies_.erase(it); + delete cc; +} + +int CookieMonster::DeleteEquivalentCookies(const std::string& key, + const CanonicalCookie& ecc) { + int num_deleted = 0; + for (CookieMapItPair its = cookies_.equal_range(key); + its.first != its.second; ) { + CookieMap::iterator curit = its.first; + CanonicalCookie* cc = curit->second; + ++its.first; + + // TODO while we're here, we might as well purge expired cookies too. + + if (ecc.IsEquivalent(*cc)) { + InternalDeleteCookie(curit, true); + ++num_deleted; +#ifdef NDEBUG + // We should only ever find a single equivalent cookie + break; +#endif + } + } + + // Our internal state should be consistent, we should never have more + // than one equivalent cookie, since they should overwrite each other. + DCHECK(num_deleted <= 1); + + return num_deleted; +} + +// TODO we should be sorting by last access time, however, right now +// we're not saving an access time, so we're sorting by creation time. +static bool OldestCookieSorter(const CookieMonster::CookieMap::iterator& it1, + const CookieMonster::CookieMap::iterator& it2) { + return it1->second->CreationDate() < it2->second->CreationDate(); +} + +// is vector::size_type always going to be size_t? +int CookieMonster::GarbageCollectRange(const Time& current, + const CookieMapItPair& itpair, + size_t num_max, size_t num_purge) { + int num_deleted = 0; + + // First, walk through and delete anything that's expired. + // Save a list of iterators to the ones that weren't expired + std::vector<CookieMap::iterator> cookie_its; + for (CookieMap::iterator it = itpair.first, end = itpair.second; it != end;) { + CookieMap::iterator curit = it; + CanonicalCookie* cc = curit->second; + ++it; + + if (cc->IsExpired(current)) { + InternalDeleteCookie(curit, true); + ++num_deleted; + } else { + cookie_its.push_back(curit); + } + } + + if (cookie_its.size() > num_max) { + COOKIE_DLOG(INFO) << "GarbageCollectRange() Deep Garbage Collect."; + num_purge += cookie_its.size() - num_max; + // Sort the top N we want to purge. + std::partial_sort(cookie_its.begin(), cookie_its.begin() + num_purge, + cookie_its.end(), OldestCookieSorter); + + // TODO should probably use an iterator and not an index. + for (size_t i = 0; i < num_purge; ++i) { + InternalDeleteCookie(cookie_its[i], true); + ++num_deleted; + } + } + + return num_deleted; +} + +// TODO Whenever we delete, check last_cur_utc_... +int CookieMonster::GarbageCollect(const Time& current, + const std::string& key) { + // Based off of the Mozilla defaults + // It might seem scary to have a high purge value, but really it's not. You + // just make sure that you increase the max to cover the increase in purge, + // and we would have been purging the same amount of cookies. We're just + // going through the garbage collection process less often. + static const size_t kNumCookiesPerHost = 70; // ~50 cookies + static const size_t kNumCookiesPerHostPurge = 20; + static const size_t kNumCookiesTotal = 1100; // ~1000 cookies + static const size_t kNumCookiesTotalPurge = 100; + + int num_deleted = 0; + + // Collect garbage for this key. + if (cookies_.count(key) > kNumCookiesPerHost) { + COOKIE_DLOG(INFO) << "GarbageCollect() key: " << key; + num_deleted += GarbageCollectRange(current, cookies_.equal_range(key), + kNumCookiesPerHost, + kNumCookiesPerHostPurge); + } + + // Collect garbage for everything. + if (cookies_.size() > kNumCookiesTotal) { + COOKIE_DLOG(INFO) << "GarbageCollect() everything"; + num_deleted += GarbageCollectRange(current, + CookieMapItPair(cookies_.begin(), + cookies_.end()), + kNumCookiesTotal, kNumCookiesTotalPurge); + } + + return num_deleted; +} + +int CookieMonster::DeleteAll(bool sync_to_store) { + AutoLock autolock(lock_); + InitIfNecessary(); + + int num_deleted = 0; + for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) { + CookieMap::iterator curit = it; + ++it; + InternalDeleteCookie(curit, sync_to_store); + ++num_deleted; + } + + return num_deleted; +} + +int CookieMonster::DeleteAllCreatedBetween(const Time& delete_begin, + const Time& delete_end, + bool sync_to_store) { + AutoLock autolock(lock_); + InitIfNecessary(); + + int num_deleted = 0; + for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) { + CookieMap::iterator curit = it; + CanonicalCookie* cc = curit->second; + ++it; + + if (cc->CreationDate() >= delete_begin && + (delete_end.is_null() || cc->CreationDate() < delete_end)) { + InternalDeleteCookie(curit, sync_to_store); + ++num_deleted; + } + } + + return num_deleted; +} + +int CookieMonster::DeleteAllCreatedAfter(const Time& delete_begin, + bool sync_to_store) { + return DeleteAllCreatedBetween(delete_begin, Time(), sync_to_store); +} + +bool CookieMonster::DeleteCookie(const std::string& domain, + const CanonicalCookie& cookie, + bool sync_to_store) { + AutoLock autolock(lock_); + InitIfNecessary(); + + for (CookieMapItPair its = cookies_.equal_range(domain); + its.first != its.second; ++its.first) { + // The creation date acts as our unique index... + if (its.first->second->CreationDate() == cookie.CreationDate()) { + InternalDeleteCookie(its.first, sync_to_store); + return true; + } + } + return false; +} + +// Mozilla sorts on the path length (longest first), and then it +// sorts by creation time (oldest first). +// The RFC says the sort order for the domain attribute is undefined. +static bool CookieSorter(CookieMonster::CanonicalCookie* cc1, + CookieMonster::CanonicalCookie* cc2) { + if (cc1->Path().length() == cc2->Path().length()) + return cc1->CreationDate() < cc2->CreationDate(); + return cc1->Path().length() > cc2->Path().length(); +} + +std::string CookieMonster::GetCookies(const GURL& url) { + return GetCookiesWithOptions(url, NORMAL); +} + +// Currently our cookie datastructure is based on Mozilla's approach. We have a +// hash keyed on the cookie's domain, and for any query we walk down the domain +// components and probe for cookies until we reach the TLD, where we stop. +// For example, a.b.blah.com, we would probe +// - a.b.blah.com +// - .a.b.blah.com (TODO should we check this first or second?) +// - .b.blah.com +// - .blah.com +// There are some alternative datastructures we could try, like a +// search/prefix trie, where we reverse the hostname and query for all +// keys that are a prefix of our hostname. I think the hash probing +// should be fast and simple enough for now. +std::string CookieMonster::GetCookiesWithOptions(const GURL& url, + CookieOptions options) { + if (!HasCookieableScheme(url)) { + DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme(); + return std::string(); + } + + // Get the cookies for this host and its domain(s). + std::vector<CanonicalCookie*> cookies; + FindCookiesForHostAndDomain(url, options, &cookies); + std::sort(cookies.begin(), cookies.end(), CookieSorter); + + std::string cookie_line; + for (std::vector<CanonicalCookie*>::const_iterator it = cookies.begin(); + it != cookies.end(); ++it) { + if (it != cookies.begin()) + cookie_line += "; "; + // In Mozilla if you set a cookie like AAAA, it will have an empty token + // and a value of AAAA. When it sends the cookie back, it will send AAAA, + // so we need to avoid sending =AAAA for a blank token value. + if (!(*it)->Name().empty()) + cookie_line += (*it)->Name() + "="; + cookie_line += (*it)->Value(); + } + + COOKIE_DLOG(INFO) << "GetCookies() result: " << cookie_line; + + return cookie_line; +} + +// TODO(deanm): We could have expired cookies that haven't been purged yet, +// and exporting these would be inaccurate, for example in the cookie manager +// it might show cookies that are actually expired already. We should do +// a full garbage collection before ... There actually isn't a way to do +// this right now (a forceful full GC), so we'll have to live with the +// possibility of showing the user expired cookies. This shouldn't be very +// common since most persistent cookies have a long lifetime. +CookieMonster::CookieList CookieMonster::GetAllCookies() { + AutoLock autolock(lock_); + InitIfNecessary(); + + CookieList cookie_list; + + for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end(); ++it) { + cookie_list.push_back(CookieListPair(it->first, *it->second)); + } + + return cookie_list; +} + +void CookieMonster::FindCookiesForHostAndDomain( + const GURL& url, + CookieOptions options, + std::vector<CanonicalCookie*>* cookies) { + AutoLock autolock(lock_); + InitIfNecessary(); + + const Time current_time(CurrentTime()); + + // Query for the full host, For example: 'a.c.blah.com'. + std::string key(url.host()); + FindCookiesForKey(key, url, options, current_time, cookies); + + // See if we can search for domain cookies, i.e. if the host has a TLD + 1. + const std::string domain( + RegistryControlledDomainService::GetDomainAndRegistry(key)); + if (domain.empty()) + return; + DCHECK_LE(domain.length(), key.length()); + DCHECK_EQ(0, key.compare(key.length() - domain.length(), domain.length(), + domain)); + + // Walk through the string and query at the dot points (GURL should have + // canonicalized the dots, so this should be safe). Stop once we reach the + // domain + registry; we can't write cookies past this point, and with some + // registrars other domains can, in which case we don't want to read their + // cookies. + for (key = "." + key; key.length() > domain.length(); ) { + FindCookiesForKey(key, url, options, current_time, cookies); + const size_t next_dot = key.find('.', 1); // Skip over leading dot. + key.erase(0, next_dot); + } +} + +void CookieMonster::FindCookiesForKey( + const std::string& key, + const GURL& url, + CookieOptions options, + const Time& current, + std::vector<CanonicalCookie*>* cookies) { + bool secure = url.SchemeIsSecure(); + + for (CookieMapItPair its = cookies_.equal_range(key); + its.first != its.second; ) { + CookieMap::iterator curit = its.first; + CanonicalCookie* cc = curit->second; + ++its.first; + + // If the cookie is expired, delete it. + if (cc->IsExpired(current)) { + InternalDeleteCookie(curit, true); + continue; + } + + // Filter out HttpOnly cookies unless they where explicitly requested. + if ((options & INCLUDE_HTTPONLY) == 0 && cc->IsHttpOnly()) + continue; + + // Filter out secure cookies unless we're https. + if (!secure && cc->IsSecure()) + continue; + + if (!cc->IsOnPath(url.path())) + continue; + + // Congratulations Charlie, you passed the test! + cookies->push_back(cc); + } +} + + +CookieMonster::ParsedCookie::ParsedCookie(const std::string& cookie_line) + : is_valid_(false), + path_index_(0), + domain_index_(0), + expires_index_(0), + maxage_index_(0), + secure_index_(0), + httponly_index_(0) { + + if (cookie_line.size() > kMaxCookieSize) { + LOG(INFO) << "Not parsing cookie, too large: " << cookie_line.size(); + return; + } + + ParseTokenValuePairs(cookie_line); + if (pairs_.size() > 0) { + is_valid_ = true; + SetupAttributes(); + } +} + +// Returns true if |c| occurs in |chars| +// TODO maybe make this take an iterator, could check for end also? +static inline bool CharIsA(const char c, const char* chars) { + return strchr(chars, c) != NULL; +} +// Seek the iterator to the first occurrence of a character in |chars|. +// Returns true if it hit the end, false otherwise. +static inline bool SeekTo(std::string::const_iterator* it, + const std::string::const_iterator& end, + const char* chars) { + for (; *it != end && !CharIsA(**it, chars); ++(*it)); + return *it == end; +} +// Seek the iterator to the first occurrence of a character not in |chars|. +// Returns true if it hit the end, false otherwise. +static inline bool SeekPast(std::string::const_iterator* it, + const std::string::const_iterator& end, + const char* chars) { + for (; *it != end && CharIsA(**it, chars); ++(*it)); + return *it == end; +} +static inline bool SeekBackPast(std::string::const_iterator* it, + const std::string::const_iterator& end, + const char* chars) { + for (; *it != end && CharIsA(**it, chars); --(*it)); + return *it == end; +} + +// Parse all token/value pairs and populate pairs_. +void CookieMonster::ParsedCookie::ParseTokenValuePairs( + const std::string& cookie_line) { + static const char kTerminator[] = "\n\r\0"; + static const int kTerminatorLen = sizeof(kTerminator) - 1; + static const char kWhitespace[] = " \t"; + static const char kQuoteTerminator[] = "\""; + static const char kValueSeparator[] = ";"; + static const char kTokenSeparator[] = ";="; + + pairs_.clear(); + + // Ok, here we go. We should be expecting to be starting somewhere + // before the cookie line, not including any header name... + std::string::const_iterator start = cookie_line.begin(); + std::string::const_iterator end = cookie_line.end(); + std::string::const_iterator it = start; + + // TODO Make sure we're stripping \r\n in the network code. Then we + // can log any unexpected terminators. + std::string::size_type term_pos = cookie_line.find_first_of( + std::string(kTerminator, kTerminatorLen)); + if (term_pos != std::string::npos) { + // We found a character we should treat as an end of string. + end = start + term_pos; + } + + for (int pair_num = 0; pair_num < kMaxPairs && it != end; ++pair_num) { + TokenValuePair pair; + std::string::const_iterator token_start, token_real_end, token_end; + + // Seek past any whitespace before the "token" (the name). + // token_start should point at the first character in the token + if (SeekPast(&it, end, kWhitespace)) + break; // No token, whitespace or empty. + token_start = it; + + // Seek over the token, to the token separator. + // token_real_end should point at the token separator, i.e. '='. + // If it == end after the seek, we probably have a token-value. + SeekTo(&it, end, kTokenSeparator); + token_real_end = it; + + // Ignore any whitespace between the token and the token separator. + // token_end should point after the last interesting token character, + // pointing at either whitespace, or at '=' (and equal to token_real_end). + if (it != token_start) { // We could have an empty token name. + --it; // Go back before the token separator. + // Skip over any whitespace to the first non-whitespace character. + SeekBackPast(&it, token_start, kWhitespace); + // Point after it. + ++it; + } + token_end = it; + + // Seek us back to the end of the token. + it = token_real_end; + + if (it == end || *it != '=') { + // We have a token-value, we didn't have any token name. + if (pair_num == 0) { + // For the first time around, we want to treat single values + // as a value with an empty name. (Mozilla bug 169091). + // IE seems to also have this behavior, ex "AAA", and "AAA=10" will + // set 2 different cookies, and setting "BBB" will then replace "AAA". + pair.first = ""; + // Rewind to the beginning of what we thought was the token name, + // and let it get parsed as a value. + it = token_start; + } else { + // Any not-first attribute we want to treat a value as a + // name with an empty value... This is so something like + // "secure;" will get parsed as a Token name, and not a value. + pair.first = std::string(token_start, token_end); + } + } else { + // We have a TOKEN=VALUE. + pair.first = std::string(token_start, token_end); + ++it; // Skip past the '='. + } + + // OK, now try to parse a value. + std::string::const_iterator value_start, value_end; + + // Seek past any whitespace that might in-between the token and value. + SeekPast(&it, end, kWhitespace); + // value_start should point at the first character of the value. + value_start = it; + + // The value is double quoted, process <quoted-string>. + if (it != end && *it == '"') { + // Skip over the first double quote, and parse until + // a terminating double quote or the end. + for (++it; it != end && !CharIsA(*it, kQuoteTerminator); ++it) { + // Allow an escaped \" in a double quoted string. + if (*it == '\\') { + ++it; + if (it == end) + break; + } + } + + SeekTo(&it, end, kValueSeparator); + // We could seek to the end, that's ok. + value_end = it; + } else { + // The value is non-quoted, process <token-value>. + // Just look for ';' to terminate ('=' allowed). + // We can hit the end, maybe they didn't terminate. + SeekTo(&it, end, kValueSeparator); + + // Ignore any whitespace between the value and the value separator + if (it != value_start) { // Could have an empty value + --it; + SeekBackPast(&it, value_start, kWhitespace); + ++it; + } + + value_end = it; + } + + // OK, we're finished with a Token/Value. + pair.second = std::string(value_start, value_end); + // From RFC2109: "Attributes (names) (attr) are case-insensitive." + if (pair_num != 0) + StringToLowerASCII(&pair.first); + pairs_.push_back(pair); + + // We've processed a token/value pair, we're either at the end of + // the string or a ValueSeparator like ';', which we want to skip. + if (it != end) + ++it; + } +} + +void CookieMonster::ParsedCookie::SetupAttributes() { + static const char kPathTokenName[] = "path"; + static const char kDomainTokenName[] = "domain"; + static const char kExpiresTokenName[] = "expires"; + static const char kMaxAgeTokenName[] = "max-age"; + static const char kSecureTokenName[] = "secure"; + static const char kHttpOnlyTokenName[] = "httponly"; + + // We skip over the first token/value, the user supplied one. + for (size_t i = 1; i < pairs_.size(); ++i) { + if (pairs_[i].first == kPathTokenName) + path_index_ = i; + else if (pairs_[i].first == kDomainTokenName) + domain_index_ = i; + else if (pairs_[i].first == kExpiresTokenName) + expires_index_ = i; + else if (pairs_[i].first == kMaxAgeTokenName) + maxage_index_ = i; + else if (pairs_[i].first == kSecureTokenName) + secure_index_ = i; + else if (pairs_[i].first == kHttpOnlyTokenName) + httponly_index_ = i; + else { /* some attribute we don't know or don't care about. */ } + } +} + +// Create a cookie-line for the cookie. For debugging only! +// If we want to use this for something more than debugging, we +// should rewrite it better... +std::string CookieMonster::ParsedCookie::DebugString() const { + std::string out; + for (PairList::const_iterator it = pairs_.begin(); + it != pairs_.end(); ++it) { + out.append(it->first); + out.append("="); + out.append(it->second); + out.append("; "); + } + return out; +} + +bool CookieMonster::CanonicalCookie::IsOnPath( + const std::string& url_path) const { + + // A zero length would be unsafe for our trailing '/' checks, and + // would also make no sense for our prefix match. The code that + // creates a CanonicalCookie should make sure the path is never zero length, + // but we double check anyway. + if (path_.empty()) + return false; + + // The Mozilla code broke it into 3 cases, if it's strings lengths + // are less than, equal, or greater. I think this is simpler: + + // Make sure the cookie path is a prefix of the url path. If the + // url path is shorter than the cookie path, then the cookie path + // can't be a prefix. + if (url_path.find(path_) != 0) + return false; + + // Now we know that url_path is >= cookie_path, and that cookie_path + // is a prefix of url_path. If they are the are the same length then + // they are identical, otherwise we need an additional check: + + // In order to avoid in correctly matching a cookie path of /blah + // with a request path of '/blahblah/', we need to make sure that either + // the cookie path ends in a trailing '/', or that we prefix up to a '/' + // in the url path. Since we know that the url path length is greater + // than the cookie path length, it's safe to index one byte past. + if (path_.length() != url_path.length() && + path_[path_.length() - 1] != '/' && + url_path[path_.length()] != '/') + return false; + + return true; +} + +std::string CookieMonster::CanonicalCookie::DebugString() const { + return StringPrintf("name: %s value: %s path: %s creation: %llu", + name_.c_str(), value_.c_str(), path_.c_str(), + creation_date_.ToTimeT()); +} |