// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Brought to you by the letter D and the number 2. #ifndef NET_BASE_COOKIE_MONSTER_H_ #define NET_BASE_COOKIE_MONSTER_H_ #pragma once #include #include #include #include #include "base/basictypes.h" #include "base/gtest_prod_util.h" #include "base/histogram.h" #include "base/lock.h" #include "base/ref_counted.h" #include "base/scoped_ptr.h" #include "base/time.h" #include "net/base/cookie_store.h" class GURL; namespace net { // The cookie monster is the system for storing and retrieving cookies. It has // an in-memory list of all cookies, and synchronizes non-session cookies to an // optional permanent storage that implements the PersistentCookieStore // interface. // // This class IS thread-safe. Normally, it is only used on the I/O thread, but // is also accessed directly through Automation for UI testing. // // TODO(deanm) Implement CookieMonster, the cookie database. // - Verify that our domain enforcement and non-dotted handling is correct class CookieMonster : public CookieStore { public: class CanonicalCookie; class Delegate; class ParsedCookie; class PersistentCookieStore; // NOTE(deanm): // I benchmarked hash_multimap vs multimap. We're going to be query-heavy // so it would seem like hashing would help. However they were very // close, with multimap being a tiny bit faster. I think this is because // our map is at max around 1000 entries, and the additional complexity // for the hashing might not overcome the O(log(1000)) for querying // a multimap. Also, multimap is standard, another reason to use it. typedef std::multimap CookieMap; typedef std::pair CookieMapItPair; typedef std::vector CookieList; // The store passed in should not have had Init() called on it yet. This // class will take care of initializing it. The backing store is NOT owned by // this class, but it must remain valid for the duration of the cookie // monster's existence. If |store| is NULL, then no backing store will be // updated. If |delegate| is non-NULL, it will be notified on // creation/deletion of cookies. CookieMonster(PersistentCookieStore* store, Delegate* delegate); #ifdef UNIT_TEST CookieMonster(PersistentCookieStore* store, Delegate* delegate, int last_access_threshold_milliseconds) : initialized_(false), store_(store), last_access_threshold_(base::TimeDelta::FromMilliseconds( last_access_threshold_milliseconds)), delegate_(delegate), last_statistic_record_time_(base::Time::Now()) { InitializeHistograms(); SetDefaultCookieableSchemes(); } #endif // Parse the string with the cookie time (very forgivingly). static base::Time ParseCookieTime(const std::string& time_string); // Returns true if a domain string represents a host-only cookie, // i.e. it doesn't begin with a leading '.' character. static bool DomainIsHostOnly(const std::string& domain_string); // CookieStore implementation. virtual bool SetCookieWithOptions(const GURL& url, const std::string& cookie_line, const CookieOptions& options); virtual std::string GetCookiesWithOptions(const GURL& url, const CookieOptions& options); virtual void DeleteCookie(const GURL& url, const std::string& cookie_name); virtual CookieMonster* GetCookieMonster() { return this; } // Sets a cookie given explicit user-provided cookie attributes. The cookie // name, value, domain, etc. are each provided as separate strings. This // function expects each attribute to be well-formed. It will check for // disallowed characters (e.g. the ';' character is disallowed within the // cookie value attribute) and will return false without setting the cookie // if such characters are found. bool SetCookieWithDetails(const GURL& url, const std::string& name, const std::string& value, const std::string& domain, const std::string& path, const base::Time& expiration_time, bool secure, bool http_only); // Returns all the cookies, for use in management UI, etc. This does not mark // the cookies as having been accessed. CookieList GetAllCookies(); // Returns all the cookies, for use in management UI, etc. Filters results // using given url scheme, host / domain and path. This does not mark the // cookies as having been accessed. CookieList GetAllCookiesForURL(const GURL& url); // Delete all of the cookies. int DeleteAll(bool sync_to_store); // Delete all of the cookies that have a creation_date greater than or equal // to |delete_begin| and less than |delete_end| int DeleteAllCreatedBetween(const base::Time& delete_begin, const base::Time& delete_end, bool sync_to_store); // Delete all of the cookies that have a creation_date more recent than the // one passed into the function via |delete_after|. int DeleteAllCreatedAfter(const base::Time& delete_begin, bool sync_to_store); // Delete all cookies that match the host of the given URL // regardless of path. This includes all http_only and secure cookies, // but does not include any domain cookies that may apply to this host. // Returns the number of cookies deleted. int DeleteAllForHost(const GURL& url); // Delete one specific cookie. bool DeleteCanonicalCookie(const CanonicalCookie& cookie); // Override the default list of schemes that are allowed to be set in // this cookie store. Calling his overrides the value of // "enable_file_scheme_". // If this this method is called, it must be called before first use of // the instance (i.e. as part of the instance initialization process.) void SetCookieableSchemes(const char* schemes[], size_t num_schemes); // There are some unknowns about how to correctly handle file:// cookies, // and our implementation for this is not robust enough. This allows you // to enable support, but it should only be used for testing. Bug 1157243. // Must be called before creating a CookieMonster instance. static void EnableFileScheme(); static bool enable_file_scheme_; private: ~CookieMonster(); // Testing support. friend class CookieMonsterTest; FRIEND_TEST_ALL_PREFIXES(CookieMonsterTest, TestCookieDeleteAllCreatedAfterTimestamp); FRIEND_TEST_ALL_PREFIXES(CookieMonsterTest, TestCookieDeleteAllCreatedBetweenTimestamps); bool SetCookieWithCreationTime(const GURL& url, const std::string& cookie_line, const base::Time& creation_time); // Called by all non-static functions to ensure that the cookies store has // been initialized. This is not done during creating so it doesn't block // the window showing. // Note: this method should always be called with lock_ held. void InitIfNecessary() { if (!initialized_) { if (store_) InitStore(); initialized_ = true; } } // Initializes the backing store and reads existing cookies from it. // Should only be called by InitIfNecessary(). void InitStore(); // Checks that |cookies_| matches our invariants, and tries to repair any // inconsistencies. (In other words, it does not have duplicate cookies). void EnsureCookiesMapIsValid(); // Checks for any duplicate cookies for host |key|, which lie between // |begin| and |end|. If any are found, all but the most recent are deleted. // Returns the number of duplicate cookies that were deleted. int TrimDuplicateCookiesForHost(const std::string& key, CookieMap::iterator begin, CookieMap::iterator end); void SetDefaultCookieableSchemes(); void FindCookiesForHostAndDomain(const GURL& url, const CookieOptions& options, bool update_access_time, std::vector* cookies); void FindCookiesForKey(const std::string& key, const GURL& url, const CookieOptions& options, const base::Time& current, bool update_access_time, std::vector* cookies); // Delete any cookies that are equivalent to |ecc| (same path, key, etc). // If |skip_httponly| is true, httponly cookies will not be deleted. The // return value with be true if |skip_httponly| skipped an httponly cookie. // NOTE: There should never be more than a single matching equivalent cookie. bool DeleteAnyEquivalentCookie(const std::string& key, const CanonicalCookie& ecc, bool skip_httponly); void InternalInsertCookie(const std::string& key, CanonicalCookie* cc, bool sync_to_store); // Helper function that sets cookies with more control. // Not exposed as we don't want callers to have the ability // to specify (potentially duplicate) creation times. bool SetCookieWithCreationTimeAndOptions(const GURL& url, const std::string& cookie_line, const base::Time& creation_time, const CookieOptions& options); // Helper function that sets a canonical cookie, deleting equivalents and // performing garbage collection. bool SetCanonicalCookie(scoped_ptr* cc, const base::Time& creation_time, const CookieOptions& options); void InternalUpdateCookieAccessTime(CanonicalCookie* cc); enum DeletionCause { DELETE_COOKIE_EXPLICIT, DELETE_COOKIE_OVERWRITE, DELETE_COOKIE_EXPIRED, DELETE_COOKIE_EVICTED, DELETE_COOKIE_DUPLICATE_IN_BACKING_STORE, DELETE_COOKIE_DONT_RECORD, DELETE_COOKIE_LAST_ENTRY = DELETE_COOKIE_DONT_RECORD }; // |deletion_cause| argument is for collecting statistics. void InternalDeleteCookie(CookieMap::iterator it, bool sync_to_store, DeletionCause deletion_cause); // If the number of cookies for host |key|, or globally, are over preset // maximums, garbage collects, first for the host and then globally, as // described by GarbageCollectRange(). The limits can be found as constants // at the top of the function body. // // Returns the number of cookies deleted (useful for debugging). int GarbageCollect(const base::Time& current, const std::string& key); // Deletes all expired cookies in |itpair|; // then, if the number of remaining cookies is greater than |num_max|, // collects the least recently accessed cookies until // (|num_max| - |num_purge|) cookies remain. // // Returns the number of cookies deleted. int GarbageCollectRange(const base::Time& current, const CookieMapItPair& itpair, size_t num_max, size_t num_purge); // Helper for GarbageCollectRange(); can be called directly as well. Deletes // all expired cookies in |itpair|. If |cookie_its| is non-NULL, it is // populated with all the non-expired cookies from |itpair|. // // Returns the number of cookies deleted. int GarbageCollectExpired(const base::Time& current, const CookieMapItPair& itpair, std::vector* cookie_its); bool HasCookieableScheme(const GURL& url); // Statistics support // Record statistics every kRecordStatisticsIntervalSeconds of uptime. static const int kRecordStatisticsIntervalSeconds = 10 * 60; // This function should be called repeatedly, and will record // statistics if a sufficient time period has passed. void RecordPeriodicStats(const base::Time& current_time); // Histogram variables; see CookieMonster::InitializeHistograms() in // cookie_monster.cc for details. scoped_refptr histogram_expiration_duration_minutes_; scoped_refptr histogram_between_access_interval_minutes_; scoped_refptr histogram_evicted_last_access_minutes_; scoped_refptr histogram_count_; scoped_refptr histogram_number_duplicate_db_cookies_; scoped_refptr histogram_cookie_deletion_cause_; // Initialize the above variables; should only be called from // the constructor. void InitializeHistograms(); CookieMap cookies_; // Indicates whether the cookie store has been initialized. This happens // lazily in InitStoreIfNecessary(). bool initialized_; scoped_refptr store_; // The resolution of our time isn't enough, so we do something // ugly and increment when we've seen the same time twice. base::Time CurrentTime(); base::Time last_time_seen_; // Minimum delay after updating a cookie's LastAccessDate before we will // update it again. const base::TimeDelta last_access_threshold_; std::vector cookieable_schemes_; scoped_refptr delegate_; // Lock for thread-safety Lock lock_; base::Time last_statistic_record_time_; DISALLOW_COPY_AND_ASSIGN(CookieMonster); }; class CookieMonster::CanonicalCookie { public: // These constructors do no validation or canonicalization of their inputs; // the resulting CanonicalCookies should not be relied on to be canonical // unless the caller has done appropriate validation and canonicalization // themselves. CanonicalCookie() { } CanonicalCookie(const std::string& name, const std::string& value, const std::string& domain, const std::string& path, bool secure, bool httponly, const base::Time& creation, const base::Time& last_access, bool has_expires, const base::Time& expires) : name_(name), value_(value), domain_(domain), path_(path), creation_date_(creation), last_access_date_(last_access), expiry_date_(expires), has_expires_(has_expires), secure_(secure), httponly_(httponly) { } // This constructor does canonicalization but not validation. // The result of this constructor should not be relied on in contexts // in which pre-validation of the ParsedCookie has not been done. CanonicalCookie(const GURL& url, const ParsedCookie& pc); // Supports the default copy constructor. // Creates a canonical cookie from unparsed attribute values. // Canonicalizes and validates inputs. May return NULL if an attribute // value is invalid. static CanonicalCookie* Create( const GURL& url, const std::string& name, const std::string& value, const std::string& domain, const std::string& path, const base::Time& creation_time, const base::Time& expiration_time, bool secure, bool http_only); const std::string& Name() const { return name_; } const std::string& Value() const { return value_; } const std::string& Domain() const { return domain_; } const std::string& Path() const { return path_; } const base::Time& CreationDate() const { return creation_date_; } const base::Time& LastAccessDate() const { return last_access_date_; } bool DoesExpire() const { return has_expires_; } bool IsPersistent() const { return DoesExpire(); } const base::Time& ExpiryDate() const { return expiry_date_; } bool IsSecure() const { return secure_; } bool IsHttpOnly() const { return httponly_; } bool IsExpired(const base::Time& current) { return has_expires_ && current >= expiry_date_; } // Are the cookies considered equivalent in the eyes of RFC 2965. // The RFC says that name must match (case-sensitive), domain must // match (case insensitive), and path must match (case sensitive). // For the case insensitive domain compare, we rely on the domain // having been canonicalized (in // GetCookieDomainKeyWithString->CanonicalizeHost). bool IsEquivalent(const CanonicalCookie& ecc) const { // It seems like it would make sense to take secure and httponly into // account, but the RFC doesn't specify this. // NOTE: Keep this logic in-sync with TrimDuplicateCookiesForHost(). return (name_ == ecc.Name() && domain_ == ecc.Domain() && path_ == ecc.Path()); } void SetLastAccessDate(const base::Time& date) { last_access_date_ = date; } bool IsOnPath(const std::string& url_path) const; std::string DebugString() const; private: std::string name_; std::string value_; std::string domain_; std::string path_; base::Time creation_date_; base::Time last_access_date_; base::Time expiry_date_; bool has_expires_; bool secure_; bool httponly_; }; class CookieMonster::Delegate : public base::RefCountedThreadSafe { public: // Will be called when a cookie is added or removed. The function is passed // the respective |cookie| which was added to or removed from the cookies. // If |removed| is true, the cookie was deleted. virtual void OnCookieChanged(const CookieMonster::CanonicalCookie& cookie, bool removed) = 0; protected: friend class base::RefCountedThreadSafe; virtual ~Delegate() {} }; class CookieMonster::ParsedCookie { public: typedef std::pair TokenValuePair; typedef std::vector PairList; // The maximum length of a cookie string we will try to parse static const size_t kMaxCookieSize = 4096; // The maximum number of Token/Value pairs. Shouldn't have more than 8. static const int kMaxPairs = 16; // Construct from a cookie string like "BLAH=1; path=/; domain=.google.com" ParsedCookie(const std::string& cookie_line); ~ParsedCookie() { } // You should not call any other methods on the class if !IsValid bool IsValid() const { return is_valid_; } const std::string& Name() const { return pairs_[0].first; } const std::string& Token() const { return Name(); } const std::string& Value() const { return pairs_[0].second; } bool HasPath() const { return path_index_ != 0; } const std::string& Path() const { return pairs_[path_index_].second; } bool HasDomain() const { return domain_index_ != 0; } const std::string& Domain() const { return pairs_[domain_index_].second; } bool HasExpires() const { return expires_index_ != 0; } const std::string& Expires() const { return pairs_[expires_index_].second; } bool HasMaxAge() const { return maxage_index_ != 0; } const std::string& MaxAge() const { return pairs_[maxage_index_].second; } bool IsSecure() const { return secure_index_ != 0; } bool IsHttpOnly() const { return httponly_index_ != 0; } // Returns the number of attributes, for example, returning 2 for: // "BLAH=hah; path=/; domain=.google.com" size_t NumberOfAttributes() const { return pairs_.size() - 1; } // For debugging only! std::string DebugString() const; // Returns an iterator pointing to the first terminator character found in // the given string. static std::string::const_iterator FindFirstTerminator(const std::string& s); // Given iterators pointing to the beginning and end of a string segment, // returns as output arguments token_start and token_end to the start and end // positions of a cookie attribute token name parsed from the segment, and // updates the segment iterator to point to the next segment to be parsed. // If no token is found, the function returns false. static bool ParseToken(std::string::const_iterator* it, const std::string::const_iterator& end, std::string::const_iterator* token_start, std::string::const_iterator* token_end); // Given iterators pointing to the beginning and end of a string segment, // returns as output arguments value_start and value_end to the start and end // positions of a cookie attribute value parsed from the segment, and updates // the segment iterator to point to the next segment to be parsed. static void ParseValue(std::string::const_iterator* it, const std::string::const_iterator& end, std::string::const_iterator* value_start, std::string::const_iterator* value_end); // Same as the above functions, except the input is assumed to contain the // desired token/value and nothing else. static std::string ParseTokenString(const std::string& token); static std::string ParseValueString(const std::string& value); private: static const char kTerminator[]; static const int kTerminatorLen; static const char kWhitespace[]; static const char kValueSeparator[]; static const char kTokenSeparator[]; void ParseTokenValuePairs(const std::string& cookie_line); void SetupAttributes(); PairList pairs_; bool is_valid_; // These will default to 0, but that should never be valid since the // 0th index is the user supplied token/value, not an attribute. // We're really never going to have more than like 8 attributes, so we // could fit these into 3 bits each if we're worried about size... size_t path_index_; size_t domain_index_; size_t expires_index_; size_t maxage_index_; size_t secure_index_; size_t httponly_index_; DISALLOW_COPY_AND_ASSIGN(ParsedCookie); }; typedef base::RefCountedThreadSafe RefcountedPersistentCookieStore; class CookieMonster::PersistentCookieStore : public RefcountedPersistentCookieStore { public: virtual ~PersistentCookieStore() { } // Initializes the store and retrieves the existing cookies. This will be // called only once at startup. virtual bool Load(std::vector*) = 0; virtual void AddCookie(const std::string&, const CanonicalCookie&) = 0; virtual void UpdateCookieAccessTime(const CanonicalCookie&) = 0; virtual void DeleteCookie(const CanonicalCookie&) = 0; protected: PersistentCookieStore() { } private: DISALLOW_COPY_AND_ASSIGN(PersistentCookieStore); }; } // namespace net #endif // NET_BASE_COOKIE_MONSTER_H_