// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "net/cookies/cookie_util.h" #include #include #include "base/logging.h" #include "base/strings/string_tokenizer.h" #include "base/strings/string_util.h" #include "build/build_config.h" #include "net/base/registry_controlled_domains/registry_controlled_domain.h" #include "net/base/url_util.h" #include "url/gurl.h" namespace net { namespace cookie_util { bool DomainIsHostOnly(const std::string& domain_string) { return (domain_string.empty() || domain_string[0] != '.'); } std::string GetEffectiveDomain(const std::string& scheme, const std::string& host) { if (scheme == "http" || scheme == "https" || scheme == "ws" || scheme == "wss") { return registry_controlled_domains::GetDomainAndRegistry( host, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); } if (!DomainIsHostOnly(host)) return host.substr(1); return host; } bool GetCookieDomainWithString(const GURL& url, const std::string& domain_string, std::string* result) { const std::string url_host(url.host()); // If no domain was specified in the domain string, default to a host cookie. // We match IE/Firefox in allowing a domain=IPADDR if it matches the url // ip address hostname exactly. It should be treated as a host cookie. if (domain_string.empty() || (url.HostIsIPAddress() && url_host == domain_string)) { *result = url_host; DCHECK(DomainIsHostOnly(*result)); return true; } // Get the normalized domain specified in cookie line. url::CanonHostInfo ignored; std::string cookie_domain(CanonicalizeHost(domain_string, &ignored)); if (cookie_domain.empty()) return false; if (cookie_domain[0] != '.') cookie_domain = "." + cookie_domain; // Ensure |url| and |cookie_domain| have the same domain+registry. const std::string url_scheme(url.scheme()); const std::string url_domain_and_registry( GetEffectiveDomain(url_scheme, url_host)); if (url_domain_and_registry.empty()) { // We match IE/Firefox by treating an exact match between the domain // attribute and the request host to be treated as a host cookie. if (url_host == domain_string) { *result = url_host; DCHECK(DomainIsHostOnly(*result)); return true; } // Otherwise, IP addresses/intranet hosts/public suffixes can't set // domain cookies. return false; } const std::string cookie_domain_and_registry( GetEffectiveDomain(url_scheme, cookie_domain)); if (url_domain_and_registry != cookie_domain_and_registry) return false; // Can't set a cookie on a different domain + registry. // Ensure |url_host| is |cookie_domain| or one of its subdomains. Given that // we know the domain+registry are the same from the above checks, this is // basically a simple string suffix check. const bool is_suffix = (url_host.length() < cookie_domain.length()) ? (cookie_domain != ("." + url_host)) : (url_host.compare(url_host.length() - cookie_domain.length(), cookie_domain.length(), cookie_domain) != 0); if (is_suffix) return false; *result = cookie_domain; return true; } // Parse a cookie expiration time. We try to be lenient, but we need to // assume some order to distinguish the fields. The basic rules: // - The month name must be present and prefix the first 3 letters of the // full month name (jan for January, jun for June). // - If the year is <= 2 digits, it must occur after the day of month. // - The time must be of the format hh:mm:ss. // An average cookie expiration will look something like this: // Sat, 15-Apr-17 21:01:22 GMT base::Time ParseCookieTime(const std::string& time_string) { static const char* const kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec" }; static const int kMonthsLen = arraysize(kMonths); // We want to be pretty liberal, and support most non-ascii and non-digit // characters as a delimiter. We can't treat : as a delimiter, because it // is the delimiter for hh:mm:ss, and we want to keep this field together. // We make sure to include - and +, since they could prefix numbers. // If the cookie attribute came in in quotes (ex expires="XXX"), the quotes // will be preserved, and we will get them here. So we make sure to include // quote characters, and also \ for anything that was internally escaped. static const char kDelimiters[] = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{|}~"; base::Time::Exploded exploded = {0}; base::StringTokenizer tokenizer(time_string, kDelimiters); bool found_day_of_month = false; bool found_month = false; bool found_time = false; bool found_year = false; while (tokenizer.GetNext()) { const std::string token = tokenizer.token(); DCHECK(!token.empty()); bool numerical = base::IsAsciiDigit(token[0]); // String field if (!numerical) { if (!found_month) { for (int i = 0; i < kMonthsLen; ++i) { // Match prefix, so we could match January, etc if (base::StartsWith(token, base::StringPiece(kMonths[i], 3), base::CompareCase::INSENSITIVE_ASCII)) { exploded.month = i + 1; found_month = true; break; } } } else { // If we've gotten here, it means we've already found and parsed our // month, and we have another string, which we would expect to be the // the time zone name. According to the RFC and my experiments with // how sites format their expirations, we don't have much of a reason // to support timezones. We don't want to ever barf on user input, // but this DCHECK should pass for well-formed data. // DCHECK(token == "GMT"); } // Numeric field w/ a colon } else if (token.find(':') != std::string::npos) { if (!found_time && #ifdef COMPILER_MSVC sscanf_s( #else sscanf( #endif token.c_str(), "%2u:%2u:%2u", &exploded.hour, &exploded.minute, &exploded.second) == 3) { found_time = true; } else { // We should only ever encounter one time-like thing. If we're here, // it means we've found a second, which shouldn't happen. We keep // the first. This check should be ok for well-formed input: // NOTREACHED(); } // Numeric field } else { // Overflow with atoi() is unspecified, so we enforce a max length. if (!found_day_of_month && token.length() <= 2) { exploded.day_of_month = atoi(token.c_str()); found_day_of_month = true; } else if (!found_year && token.length() <= 5) { exploded.year = atoi(token.c_str()); found_year = true; } else { // If we're here, it means we've either found an extra numeric field, // or a numeric field which was too long. For well-formed input, the // following check would be reasonable: // NOTREACHED(); } } } if (!found_day_of_month || !found_month || !found_time || !found_year) { // We didn't find all of the fields we need. For well-formed input, the // following check would be reasonable: // NOTREACHED() << "Cookie parse expiration failed: " << time_string; return base::Time(); } // Normalize the year to expand abbreviated years to the full year. if (exploded.year >= 69 && exploded.year <= 99) exploded.year += 1900; if (exploded.year >= 0 && exploded.year <= 68) exploded.year += 2000; // If our values are within their correct ranges, we got our time. if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 && exploded.month >= 1 && exploded.month <= 12 && exploded.year >= 1601 && exploded.year <= 30827 && exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) { return base::Time::FromUTCExploded(exploded); } // One of our values was out of expected range. For well-formed input, // the following check would be reasonable: // NOTREACHED() << "Cookie exploded expiration failed: " << time_string; return base::Time(); } GURL CookieOriginToURL(const std::string& domain, bool is_https) { if (domain.empty()) return GURL(); const std::string scheme = is_https ? "https" : "http"; const std::string host = domain[0] == '.' ? domain.substr(1) : domain; return GURL(scheme + "://" + host); } void ParseRequestCookieLine(const std::string& header_value, ParsedRequestCookies* parsed_cookies) { std::string::const_iterator i = header_value.begin(); while (i != header_value.end()) { // Here we are at the beginning of a cookie. // Eat whitespace. while (i != header_value.end() && *i == ' ') ++i; if (i == header_value.end()) return; // Find cookie name. std::string::const_iterator cookie_name_beginning = i; while (i != header_value.end() && *i != '=') ++i; base::StringPiece cookie_name(cookie_name_beginning, i); // Find cookie value. base::StringPiece cookie_value; // Cookies may have no value, in this case '=' may or may not be there. if (i != header_value.end() && i + 1 != header_value.end()) { ++i; // Skip '='. std::string::const_iterator cookie_value_beginning = i; if (*i == '"') { ++i; // Skip '"'. while (i != header_value.end() && *i != '"') ++i; if (i == header_value.end()) return; ++i; // Skip '"'. cookie_value = base::StringPiece(cookie_value_beginning, i); // i points to character after '"', potentially a ';'. } else { while (i != header_value.end() && *i != ';') ++i; cookie_value = base::StringPiece(cookie_value_beginning, i); // i points to ';' or end of string. } } parsed_cookies->push_back(std::make_pair(cookie_name, cookie_value)); // Eat ';'. if (i != header_value.end()) ++i; } } std::string SerializeRequestCookieLine( const ParsedRequestCookies& parsed_cookies) { std::string buffer; for (ParsedRequestCookies::const_iterator i = parsed_cookies.begin(); i != parsed_cookies.end(); ++i) { if (!buffer.empty()) buffer.append("; "); buffer.append(i->first.begin(), i->first.end()); buffer.push_back('='); buffer.append(i->second.begin(), i->second.end()); } return buffer; } } // namespace cookie_util } // namespace net