diff options
Diffstat (limited to 'net/base')
99 files changed, 20347 insertions, 0 deletions
diff --git a/net/base/address_list.cc b/net/base/address_list.cc new file mode 100644 index 0000000..4281a7e --- /dev/null +++ b/net/base/address_list.cc @@ -0,0 +1,46 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/address_list.h" + +#include <ws2tcpip.h> +#include <wspiapi.h> // Needed for Win2k compat. + +namespace net { + +void AddressList::Adopt(struct addrinfo* head) { + data_ = new Data(); + data_->head = head; +} + +AddressList::Data::~Data() { + freeaddrinfo(head); +} + +} // namespace net diff --git a/net/base/address_list.h b/net/base/address_list.h new file mode 100644 index 0000000..d91137f --- /dev/null +++ b/net/base/address_list.h @@ -0,0 +1,61 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_ADDRESS_LIST_H_ +#define NET_BASE_ADDRESS_LIST_H_ + +#include "base/ref_counted.h" + +struct addrinfo; + +namespace net { + +// An AddressList object contains a linked list of addrinfo structures. This +// class is designed to be copied around by value. +class AddressList { + public: + // Adopt the given addrinfo list in place of the existing one if any. This + // hands over responsibility for freeing the addrinfo list to the AddressList + // object. + void Adopt(struct addrinfo* head); + + // Get access to the head of the addrinfo list. + const struct addrinfo* head() const { return data_->head; } + + private: + struct Data : public base::RefCountedThreadSafe<Data> { + ~Data(); + struct addrinfo* head; + }; + scoped_refptr<Data> data_; +}; + +} // namespace net + +#endif // NET_BASE_ADDRESS_LIST_H_ diff --git a/net/base/auth.h b/net/base/auth.h new file mode 100644 index 0000000..92bc315 --- /dev/null +++ b/net/base/auth.h @@ -0,0 +1,76 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_AUTH_H__ +#define NET_BASE_AUTH_H__ + +#include <string> + +#include "base/ref_counted.h" + +// Holds info about an authentication challenge that we may want to display +// to the user. +class AuthChallengeInfo : + public base::RefCountedThreadSafe<AuthChallengeInfo> { + public: + bool is_proxy; // true for Proxy-Authenticate, false for WWW-Authenticate. + std::wstring host; // the domain name of the server asking for auth + // (could be the proxy). + std::wstring scheme; // "Basic", "Digest", or whatever other method is used. + std::wstring realm; // the realm provided by the server, if there is one. + + private: + friend base::RefCountedThreadSafe<AuthChallengeInfo>; + ~AuthChallengeInfo() {} +}; + +//Authentication structures +enum AuthState { + AUTH_STATE_DONT_NEED_AUTH, + AUTH_STATE_NEED_AUTH, + AUTH_STATE_HAVE_AUTH, + AUTH_STATE_CANCELED +}; + +class AuthData : public base::RefCountedThreadSafe<AuthData> { + public: + AuthState state; // whether we need, have, or gave up on authentication. + std::wstring scheme; // the authentication scheme. + std::wstring username; // the username supplied to us for auth. + std::wstring password; // the password supplied to us for auth. + + // We wouldn't instantiate this class if we didn't need authentication. + AuthData() : state(AUTH_STATE_NEED_AUTH) {} + + private: + friend base::RefCountedThreadSafe<AuthData>; + ~AuthData() {} +}; + +#endif // NET_BASE_AUTH_H__ diff --git a/net/base/auth_cache.cc b/net/base/auth_cache.cc new file mode 100644 index 0000000..daa3afc --- /dev/null +++ b/net/base/auth_cache.cc @@ -0,0 +1,69 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/auth_cache.h" + +#include "base/string_util.h" +#include "googleurl/src/gurl.h" + +// Create an AuthCacheKey from url and auth_info. +// +// The cache key is made up of two components, separated by a slash /. +// 1. The host (proxy or server) requesting authentication. For a server, +// this component also includes the scheme (protocol) and port (if not +// the default port for the protocol) to distinguish between multiple +// servers running on the same computer. +// 2. The realm. +// +// The format of the cache key for proxy auth is: +// proxy-host/auth-realm +// The format of the cache key for server auth is: +// url-scheme://url-host[:url-port]/auth-realm + +// static +AuthCache::AuthCacheKey AuthCache::HttpKey( + const GURL& url, + const AuthChallengeInfo& auth_info) { + AuthCacheKey auth_cache_key; + if (auth_info.is_proxy) { + auth_cache_key = WideToASCII(auth_info.host); + auth_cache_key.append("/"); + } else { + // Take scheme, host, and port from the url. + auth_cache_key = url.GetOrigin().spec(); + // This ends with a "/". + } + auth_cache_key.append(WideToUTF8(auth_info.realm)); + return auth_cache_key; +} + +AuthData* AuthCache::Lookup(const AuthCacheKey& key) { + AuthCacheMap::iterator iter = cache_.find(key); + return (iter == cache_.end()) ? NULL : iter->second; +} diff --git a/net/base/auth_cache.h b/net/base/auth_cache.h new file mode 100644 index 0000000..d4357f6 --- /dev/null +++ b/net/base/auth_cache.h @@ -0,0 +1,84 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_AUTH_CACHE_H__ +#define NET_BASE_AUTH_CACHE_H__ + +#include <string> +#include <map> + +#include "net/base/auth.h" + +class GURL; + +// TODO(wtc): move AuthCache into the net namespace. + +// The AuthCache class is a simple cache structure to store authentication +// information for ftp or http/https sites. Provides lookup, addition, and +// validation of entries. +class AuthCache { + public: + AuthCache() {} + ~AuthCache() {} + + typedef std::string AuthCacheKey; + + // Return the key for looking up the auth data in the auth cache for HTTP, + // consisting of the scheme, host, and port of the request URL and the + // realm in the auth challenge. + static AuthCacheKey HttpKey(const GURL& url, + const AuthChallengeInfo& auth_info); + + // Check if we have authentication data for given key. The key parameter + // is input, consisting of the hostname and any other info (such as realm) + // appropriate for the protocol. Return the address of corresponding + // AuthData object (if found) or NULL (if not found). + AuthData* Lookup(const AuthCacheKey& key); + + // Add to the cache. If key already exists, this will overwrite. Both + // parameters are IN only. + void Add(const AuthCacheKey& key, AuthData* value) { + cache_[key] = value; + } + + // Called when we have an auth failure to remove + // the likely invalid credentials. + void Remove(const AuthCacheKey& key) { + cache_.erase(key); + } + + private: + typedef scoped_refptr<AuthData> AuthCacheValue; + typedef std::map<AuthCacheKey,AuthCacheValue> AuthCacheMap; + + // internal representation of cache, an STL map. + AuthCacheMap cache_; +}; + +#endif // NET_BASE_AUTH_CACHE_H__ diff --git a/net/base/auth_cache_unittest.cc b/net/base/auth_cache_unittest.cc new file mode 100644 index 0000000..b43b9ce --- /dev/null +++ b/net/base/auth_cache_unittest.cc @@ -0,0 +1,72 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "googleurl/src/gurl.h" +#include "net/base/auth_cache.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +class AuthCacheTest : public testing::Test { +}; + +} // namespace + +TEST(AuthCacheTest, HttpKey) { + scoped_refptr<AuthChallengeInfo> auth_info = new AuthChallengeInfo; + auth_info->is_proxy = false; // server auth + // auth_info->host is intentionally left empty. + auth_info->scheme = L"Basic"; + auth_info->realm = L"WallyWorld"; + + std::string url[] = { + "https://www.nowhere.org/dir/index.html", + "https://www.nowhere.org:443/dir/index.html", // default port + "https://www.nowhere.org:8443/dir/index.html", // non-default port + "https://www.nowhere.org", // no trailing slash + "https://foo:bar@www.nowhere.org/dir/index.html", // username:password + "https://www.nowhere.org/dir/index.html?id=965362", // query + "https://www.nowhere.org/dir/index.html#toc", // reference + }; + + std::string expected[] = { + "https://www.nowhere.org/WallyWorld", + "https://www.nowhere.org/WallyWorld", + "https://www.nowhere.org:8443/WallyWorld", + "https://www.nowhere.org/WallyWorld", + "https://www.nowhere.org/WallyWorld", + "https://www.nowhere.org/WallyWorld", + "https://www.nowhere.org/WallyWorld" + }; + + for (int i = 0; i < arraysize(url); i++) { + std::string key = AuthCache::HttpKey(GURL(url[i]), *auth_info); + EXPECT_EQ(expected[i], key); + } +} diff --git a/net/base/base64.cc b/net/base/base64.cc new file mode 100644 index 0000000..dcb5781 --- /dev/null +++ b/net/base/base64.cc @@ -0,0 +1,65 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/base64.h" + +#pragma warning(push) +#pragma warning(disable: 4267) +#include "third_party/modp_b64/modp_b64.h" +#pragma warning(pop) + +bool Base64Encode(const std::string& input, std::string* output) { + std::string temp; + temp.resize(modp_b64_encode_len(input.size())); // makes room for null byte + + // null terminates result since result is base64 text! + int input_size = static_cast<int>(input.size()); + int output_size= modp_b64_encode(&(temp[0]), input.data(), input_size); + if (output_size < 0) + return false; + + temp.resize(output_size); // strips off null byte + output->swap(temp); + return true; +} + +bool Base64Decode(const std::string& input, std::string* output) { + std::string temp; + temp.resize(modp_b64_decode_len(input.size())); + + // does not null terminate result since result is binary data! + int input_size = static_cast<int>(input.size()); + int output_size = modp_b64_decode(&(temp[0]), input.data(), input_size); + if (output_size < 0) + return false; + + temp.resize(output_size); + output->swap(temp); + return true; +} diff --git a/net/base/base64.h b/net/base/base64.h new file mode 100644 index 0000000..83511d8 --- /dev/null +++ b/net/base/base64.h @@ -0,0 +1,43 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_BASE64_H__ +#define NET_BASE_BASE64_H__ + +#include <string> + +// Encodes the input string in base64. Returns true if successful and false +// otherwise. The output string is only modified if successful. +bool Base64Encode(const std::string& input, std::string* output); + +// Decodes the base64 input string. Returns true if successful and false +// otherwise. The output string is only modified if successful. +bool Base64Decode(const std::string& input, std::string* output); + +#endif // NET_BASE_BASE64_H__ diff --git a/net/base/base64_unittest.cc b/net/base/base64_unittest.cc new file mode 100644 index 0000000..58b3d26 --- /dev/null +++ b/net/base/base64_unittest.cc @@ -0,0 +1,54 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/base64.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +class Base64Test : public testing::Test { +}; + +} // namespace + +TEST(Base64Test, Basic) { + const std::string kText = "hello world"; + const std::string kBase64Text = "aGVsbG8gd29ybGQ="; + + std::string encoded, decoded; + bool ok; + + ok = Base64Encode(kText, &encoded); + EXPECT_TRUE(ok); + EXPECT_EQ(kBase64Text, encoded); + + ok = Base64Decode(encoded, &decoded); + EXPECT_TRUE(ok); + EXPECT_EQ(kText, decoded); +} diff --git a/net/base/bzip2_filter.cc b/net/base/bzip2_filter.cc new file mode 100644 index 0000000..4ebcba6 --- /dev/null +++ b/net/base/bzip2_filter.cc @@ -0,0 +1,124 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <minmax.h> + +#include "base/logging.h" +#include "net/base/bzip2_filter.h" + +BZip2Filter::BZip2Filter() + : decoding_status_(DECODING_UNINITIALIZED), + bzip2_data_stream_(NULL) { +} + +BZip2Filter::~BZip2Filter() { + if (bzip2_data_stream_.get() && + decoding_status_ != DECODING_UNINITIALIZED) { + BZ2_bzDecompressEnd(bzip2_data_stream_.get()); + } +} + +bool BZip2Filter::InitDecoding(bool use_small_memory) { + if (decoding_status_ != DECODING_UNINITIALIZED) + return false; + + // Initialize zlib control block + bzip2_data_stream_.reset(new bz_stream); + if (!bzip2_data_stream_.get()) + return false; + memset(bzip2_data_stream_.get(), 0, sizeof(bz_stream)); + + int result = BZ2_bzDecompressInit(bzip2_data_stream_.get(), + 0, + use_small_memory ? 1 : 0); + + if (result != BZ_OK) + return false; + + decoding_status_ = DECODING_IN_PROGRESS; + return true; +} + +Filter::FilterStatus BZip2Filter::ReadFilteredData(char* dest_buffer, + int* dest_len) { + Filter::FilterStatus status = Filter::FILTER_ERROR; + + // check output + if (!dest_buffer || !dest_len || *dest_len <= 0) + return status; + + if (DECODING_DONE == decoding_status_) { + // this logic just follow gzip_filter, which be used to deal wth some + // server might send extra data after finish sending compress data + return CopyOut(dest_buffer, dest_len); + } + + if (decoding_status_ != DECODING_IN_PROGRESS) + return status; + + // Make sure we have valid input data + if (!next_stream_data_ || stream_data_len_ <= 0) + return status; + + // Fill in bzip2 control block + int ret, output_len = *dest_len; + *dest_len = 0; + + bzip2_data_stream_->next_in = next_stream_data_; + bzip2_data_stream_->avail_in = stream_data_len_; + bzip2_data_stream_->next_out = dest_buffer; + bzip2_data_stream_->avail_out = output_len; + + ret = BZ2_bzDecompress(bzip2_data_stream_.get()); + + // get real output length, rest data and rest data length + *dest_len = output_len - bzip2_data_stream_->avail_out; + + if (0 == bzip2_data_stream_->avail_in) { + next_stream_data_ = NULL; + stream_data_len_ = 0; + } else { + next_stream_data_ = bzip2_data_stream_->next_in; + stream_data_len_ = bzip2_data_stream_->avail_in; + } + + if (BZ_OK == ret) { + if (stream_data_len_) + status = Filter::FILTER_OK; + else + status = Filter::FILTER_NEED_MORE_DATA; + } else if (BZ_STREAM_END == ret) { + status = Filter::FILTER_DONE; + decoding_status_ = DECODING_DONE; + } else { + decoding_status_ = DECODING_ERROR; + } + + return status; +} diff --git a/net/base/bzip2_filter.h b/net/base/bzip2_filter.h new file mode 100644 index 0000000..69e5eeb --- /dev/null +++ b/net/base/bzip2_filter.h @@ -0,0 +1,108 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// BZip2Filter applies bzip2 content encoding/decoding to a datastream. +// Since it is a new feature, and no specification said what 's bzip2 content +// composed of in http protocol. So I assume with bzip2 encoding the content +// is full format, which means the content should carry complete bzip2 head, +// such as inlcude magic number 1(BZh), block size bit, magic number 2(0x31, +// 0x41, 0x59, 0x26, 0xx53, 0x59) +// Maybe need to inserts a bzlib2 header to the data stream before calling +// decompression functionality, but at now I do not meet this sort of real +// scenarios. So let's see the further requests. +// +// This BZip2Filter internally uses third_party/bzip2 library to do decoding. +// +// BZip2Filter is also a subclass of Filter. See the latter's header file filter.h +// for sample usage. + +#ifndef NET_BASE_BZIP2_FILTER_H__ +#define NET_BASE_BZIP2_FILTER_H__ + +#include "base/scoped_ptr.h" +#include "net/base/filter.h" +#include "third_party/bzip2/bzlib.h" + +class BZip2Filter : public Filter { + public: + BZip2Filter(); + + virtual ~BZip2Filter(); + + // Initializes filter decoding mode and internal control blocks. + // Parameter use_small_memory specifies whether use small memory + // to decompresss data. If small is nonzero, the bzip2 library will + // use an alternative decompression algorithm which uses less memory + // but at the cost of decompressing more slowly (roughly speaking, + // half the speed, but the maximum memory requirement drops to + // around 2300k). For more information, see doc in http://www.bzip.org. + // The function returns true if success and false otherwise. + // The filter can only be initialized once. + bool InitDecoding(bool use_small_memory); + + // Decodes the pre-filter data and writes the output into the dest_buffer + // passed in. + // The function returns FilterStatus. See filter.h for its description. + // + // Since BZ2_bzDecompress need a full BZip header for decompression, so + // the incoming data should have the full BZip header, otherwise this + // function will give you nothing with FILTER_ERROR. + // + // Upon entry, *dest_len is the total size (in number of chars) of the + // destination buffer. Upon exit, *dest_len is the actual number of chars + // written into the destination buffer. + // + // This function will fail if there is no pre-filter data in the + // stream_buffer_. On the other hand, *dest_len can be 0 upon successful + // return. For example, the internal zlib may process some pre-filter data + // but not produce output yet. + virtual FilterStatus ReadFilteredData(char* dest_buffer, int* dest_len); + + private: + enum DecodingStatus { + DECODING_UNINITIALIZED, + DECODING_IN_PROGRESS, + DECODING_DONE, + DECODING_ERROR + }; + + // Tracks the status of decoding. + // This variable is initialized by InitDecoding and updated only by + // ReadFilteredData. + DecodingStatus decoding_status_; + + // The control block of bzip which actually does the decoding. + // This data structure is initialized by InitDecoding and updated in + // ReadFilteredData. + scoped_ptr<bz_stream> bzip2_data_stream_; + + DISALLOW_EVIL_CONSTRUCTORS(BZip2Filter); +}; + +#endif // NET_BASE_BZIP2_FILTER_H__ diff --git a/net/base/bzip2_filter_unittest.cc b/net/base/bzip2_filter_unittest.cc new file mode 100644 index 0000000..5196e62 --- /dev/null +++ b/net/base/bzip2_filter_unittest.cc @@ -0,0 +1,394 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "minmax.h" + +#include <fstream> +#include <iostream> + +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/scoped_ptr.h" +#include "net/base/bzip2_filter.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/bzip2/bzlib.h" + +namespace { + +const char* kExtraData = "Test Data, More Test Data, Even More Data of Test"; +const int kExtraDataBufferSize = 49; +const int kDefaultBufferSize = 4096; +const int kSmallBufferSize = 128; +const int kMaxBufferSize = 1048576; // 1048576 == 2^20 == 1 MB + +const char kApplicationOctetStream[] = "application/octet-stream"; + +class BZip2FilterUnitTest : public testing::Test { + protected: + virtual void SetUp() { + bzip2_encode_buffer_ = NULL; + + // Get the path of source data file. + std::wstring file_path; + PathService::Get(base::DIR_SOURCE_ROOT, &file_path); + file_util::AppendToPath(&file_path, L"net"); + file_util::AppendToPath(&file_path, L"data"); + file_util::AppendToPath(&file_path, L"filter_unittests"); + file_util::AppendToPath(&file_path, L"google.txt"); + + // Read data from the file into buffer. + file_util::ReadFileToString(file_path, &source_buffer_); + + // Append the extra data to end of source + source_buffer_.append(kExtraData, kExtraDataBufferSize); + + // Encode the whole data with bzip2 for next testing + bzip2_data_stream_.reset(new bz_stream); + ASSERT_TRUE(bzip2_data_stream_.get()); + memset(bzip2_data_stream_.get(), 0, sizeof(bz_stream)); + + int result = BZ2_bzCompressInit(bzip2_data_stream_.get(), + 9, // 900k block size + 0, // quiet + 0); // default work factor + ASSERT_EQ(BZ_OK, result); + + bzip2_encode_buffer_ = new char[kDefaultBufferSize]; + ASSERT_TRUE(bzip2_encode_buffer_ != NULL); + bzip2_encode_len_ = kDefaultBufferSize; + + bzip2_data_stream_->next_in = const_cast<char*>(source_buffer()); + bzip2_data_stream_->avail_in = source_len(); + bzip2_data_stream_->next_out = bzip2_encode_buffer_; + bzip2_data_stream_->avail_out = bzip2_encode_len_; + do { + result = BZ2_bzCompress(bzip2_data_stream_.get(), BZ_FINISH); + } while (result == BZ_FINISH_OK); + + ASSERT_EQ(BZ_STREAM_END, result); + result = BZ2_bzCompressEnd(bzip2_data_stream_.get()); + ASSERT_EQ(BZ_OK, result); + bzip2_encode_len_ = bzip2_data_stream_->total_out_lo32; + + // Make sure we wrote something; otherwise not sure what to expect + ASSERT_GT(bzip2_encode_len_, 0); + ASSERT_LE(bzip2_encode_len_, kDefaultBufferSize); + } + + virtual void TearDown() { + delete[] bzip2_encode_buffer_; + bzip2_encode_buffer_ = NULL; + } + + // Use filter to decode compressed data, and compare the decoding result with + // the orginal Data. + // Parameters: Source and source_len are original data and its size. + // Encoded_source and encoded_source_len are compressed data and its size. + // Output_buffer_size specifies the size of buffer to read out data from + // filter. + // get_extra_data specifies whether get the extra data because maybe some server + // might send extra data after finish sending compress data + void DecodeAndCompareWithFilter(Filter* filter, + const char* source, + int source_len, + const char* encoded_source, + int encoded_source_len, + int output_buffer_size, + bool get_extra_data) { + // Make sure we have enough space to hold the decoding output. + ASSERT_LE(source_len, kDefaultBufferSize); + ASSERT_LE(output_buffer_size, kDefaultBufferSize); + + int total_output_len = kDefaultBufferSize; + if (get_extra_data) + total_output_len += kExtraDataBufferSize; + char decode_buffer[kDefaultBufferSize + kExtraDataBufferSize]; + char* decode_next = decode_buffer; + int decode_avail_size = total_output_len; + + const char* encode_next = encoded_source; + int encode_avail_size = encoded_source_len; + + Filter::FilterStatus code = Filter::FILTER_OK; + while (code != Filter::FILTER_DONE) { + int encode_data_len; + if (get_extra_data && !encode_avail_size) + break; + encode_data_len = min(encode_avail_size, filter->stream_buffer_size()); + memcpy(filter->stream_buffer(), encode_next, encode_data_len); + filter->FlushStreamBuffer(encode_data_len); + encode_next += encode_data_len; + encode_avail_size -= encode_data_len; + + while (1) { + int decode_data_len = min(decode_avail_size, output_buffer_size); + + code = filter->ReadFilteredData(decode_next, &decode_data_len); + decode_next += decode_data_len; + decode_avail_size -= decode_data_len; + + ASSERT_TRUE(code != Filter::FILTER_ERROR); + + if (code == Filter::FILTER_NEED_MORE_DATA || + code == Filter::FILTER_DONE) { + if (code == Filter::FILTER_DONE && get_extra_data) + code = Filter::FILTER_OK; + else + break; + } + } + } + + // Compare the decoding result with source data + int decode_total_data_len = total_output_len - decode_avail_size; + EXPECT_TRUE(decode_total_data_len == source_len); + EXPECT_EQ(memcmp(source, decode_buffer, source_len), 0); + } + + // Unsafe function to use filter to decode compressed data. + // Parameters: Source and source_len are compressed data and its size. + // Dest is the buffer for decoding results. Upon entry, *dest_len is the size + // of the dest buffer. Upon exit, *dest_len is the number of chars written + // into the buffer. + Filter::FilterStatus DecodeAllWithFilter(Filter* filter, + const char* source, + int source_len, + char* dest, + int* dest_len) { + memcpy(filter->stream_buffer(), source, source_len); + filter->FlushStreamBuffer(source_len); + return filter->ReadFilteredData(dest, dest_len); + } + + const char* source_buffer() const { return source_buffer_.data(); } + int source_len() const { return static_cast<int>(source_buffer_.size()) - kExtraDataBufferSize; } + + std::string source_buffer_; + + scoped_ptr<bz_stream> bzip2_data_stream_; + char* bzip2_encode_buffer_; + int bzip2_encode_len_; +}; + +}; // namespace + +// Basic scenario: decoding bzip2 data with big enough buffer. +TEST_F(BZip2FilterUnitTest, DecodeBZip2) { + // Decode the compressed data with filter + scoped_ptr<Filter> filter( + Filter::Factory("bzip2", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter.get()); + memcpy(filter->stream_buffer(), bzip2_encode_buffer_, bzip2_encode_len_); + filter->FlushStreamBuffer(bzip2_encode_len_); + + char bzip2_decode_buffer[kDefaultBufferSize]; + int bzip2_decode_size = kDefaultBufferSize; + Filter::FilterStatus result = + filter->ReadFilteredData(bzip2_decode_buffer, &bzip2_decode_size); + ASSERT_EQ(Filter::FILTER_DONE, result); + + // Compare the decoding result with source data + EXPECT_TRUE(bzip2_decode_size == source_len()); + EXPECT_EQ(memcmp(source_buffer(), bzip2_decode_buffer, source_len()), 0); +} + +// Tests we can call filter repeatedly to get all the data decoded. +// To do that, we create a filter with a small buffer that can not hold all +// the input data. +TEST_F(BZip2FilterUnitTest, DecodeWithSmallInputBuffer) { + scoped_ptr<Filter> filter( + Filter::Factory("bzip2", kApplicationOctetStream, kSmallBufferSize)); + ASSERT_TRUE(filter.get()); + DecodeAndCompareWithFilter(filter.get(), source_buffer(), source_len(), + bzip2_encode_buffer_, bzip2_encode_len_, + kDefaultBufferSize, false); +} + +// Tests we can decode when caller has small buffer to read out from filter. +TEST_F(BZip2FilterUnitTest, DecodeWithSmallOutputBuffer) { + scoped_ptr<Filter> filter( + Filter::Factory("bzip2", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter.get()); + DecodeAndCompareWithFilter(filter.get(), source_buffer(), source_len(), + bzip2_encode_buffer_, bzip2_encode_len_, + kSmallBufferSize, false); +} + +// Tests we can still decode with just 1 byte buffer in the filter. +// The purpose of this tests are two: (1) Verify filter can parse partial BZip2 +// header correctly. (2) Sometimes the filter will consume input without +// generating output. Verify filter can handle it correctly. +TEST_F(BZip2FilterUnitTest, DecodeWithOneByteInputBuffer) { + scoped_ptr<Filter> filter( + Filter::Factory("bzip2", kApplicationOctetStream, 1)); + ASSERT_TRUE(filter.get()); + DecodeAndCompareWithFilter(filter.get(), source_buffer(), source_len(), + bzip2_encode_buffer_, bzip2_encode_len_, + kDefaultBufferSize, false); +} + +// Tests we can still decode with just 1 byte buffer in the filter and just 1 +// byte buffer in the caller. +TEST_F(BZip2FilterUnitTest, DecodeWithOneByteInputAndOutputBuffer) { + scoped_ptr<Filter> filter( + Filter::Factory("bzip2", kApplicationOctetStream, 1)); + ASSERT_TRUE(filter.get()); + DecodeAndCompareWithFilter(filter.get(), source_buffer(), source_len(), + bzip2_encode_buffer_, bzip2_encode_len_, 1, false); +} + +// Decoding bzip2 stream with corrupted data. +TEST_F(BZip2FilterUnitTest, DecodeCorruptedData) { + char corrupt_data[kDefaultBufferSize]; + int corrupt_data_len = bzip2_encode_len_; + memcpy(corrupt_data, bzip2_encode_buffer_, bzip2_encode_len_); + + char corrupt_decode_buffer[kDefaultBufferSize]; + int corrupt_decode_size = kDefaultBufferSize; + + // Decode the correct data with filter + scoped_ptr<Filter> filter1( + Filter::Factory("bzip2", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter1.get()); + + Filter::FilterStatus code = DecodeAllWithFilter(filter1.get(), + corrupt_data, + corrupt_data_len, + corrupt_decode_buffer, + &corrupt_decode_size); + + // Expect failures + EXPECT_TRUE(code == Filter::FILTER_DONE); + + // Decode the corrupted data with filter + scoped_ptr<Filter> filter2( + Filter::Factory("bzip2", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter2.get()); + + int pos = corrupt_data_len / 2; + corrupt_data[pos] = !corrupt_data[pos]; + + code = DecodeAllWithFilter(filter2.get(), + corrupt_data, + corrupt_data_len, + corrupt_decode_buffer, + &corrupt_decode_size); + + // Expect failures + EXPECT_TRUE(code != Filter::FILTER_DONE); +} + +// Decoding bzip2 stream with missing data. +TEST_F(BZip2FilterUnitTest, DecodeMissingData) { + char corrupt_data[kDefaultBufferSize]; + int corrupt_data_len = bzip2_encode_len_; + memcpy(corrupt_data, bzip2_encode_buffer_, bzip2_encode_len_); + + int pos = corrupt_data_len / 2; + int len = corrupt_data_len - pos - 1; + memcpy(&corrupt_data[pos], &corrupt_data[pos+1], len); + --corrupt_data_len; + + // Decode the corrupted data with filter + scoped_ptr<Filter> filter( + Filter::Factory("bzip2", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter.get()); + char corrupt_decode_buffer[kDefaultBufferSize]; + int corrupt_decode_size = kDefaultBufferSize; + + Filter::FilterStatus code = DecodeAllWithFilter(filter.get(), + corrupt_data, + corrupt_data_len, + corrupt_decode_buffer, + &corrupt_decode_size); + // Expect failures + EXPECT_TRUE(code != Filter::FILTER_DONE); +} + +// Decoding bzip2 stream with corrupted header. +TEST_F(BZip2FilterUnitTest, DecodeCorruptedHeader) { + char corrupt_data[kDefaultBufferSize]; + int corrupt_data_len = bzip2_encode_len_; + memcpy(corrupt_data, bzip2_encode_buffer_, bzip2_encode_len_); + + corrupt_data[2] = !corrupt_data[2]; + + // Decode the corrupted data with filter + scoped_ptr<Filter> filter( + Filter::Factory("bzip2", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter.get()); + char corrupt_decode_buffer[kDefaultBufferSize]; + int corrupt_decode_size = kDefaultBufferSize; + + Filter::FilterStatus code = DecodeAllWithFilter(filter.get(), + corrupt_data, + corrupt_data_len, + corrupt_decode_buffer, + &corrupt_decode_size); + + // Expect failures + EXPECT_TRUE(code == Filter::FILTER_ERROR); +} + +// Tests we can decode all compress data and get extra data which is +// appended to compress data stream by some server when it finish +// sending compress data. +TEST_F(BZip2FilterUnitTest, DecodeWithExtraDataAndSmallOutputBuffer) { + char more_data[kDefaultBufferSize + kExtraDataBufferSize]; + int more_data_len = bzip2_encode_len_ + kExtraDataBufferSize; + memcpy(more_data, bzip2_encode_buffer_, bzip2_encode_len_); + memcpy(more_data + bzip2_encode_len_, kExtraData, kExtraDataBufferSize); + + scoped_ptr<Filter> filter( + Filter::Factory("bzip2", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter.get()); + DecodeAndCompareWithFilter(filter.get(), + source_buffer(), source_len() + kExtraDataBufferSize, + more_data, + more_data_len, + kSmallBufferSize, + true); +} + +TEST_F(BZip2FilterUnitTest, DecodeWithExtraDataAndSmallInputBuffer) { + char more_data[kDefaultBufferSize + kExtraDataBufferSize]; + int more_data_len = bzip2_encode_len_ + kExtraDataBufferSize; + memcpy(more_data, bzip2_encode_buffer_, bzip2_encode_len_); + memcpy(more_data + bzip2_encode_len_, kExtraData, kExtraDataBufferSize); + + scoped_ptr<Filter> filter( + Filter::Factory("bzip2", kApplicationOctetStream, kSmallBufferSize)); + ASSERT_TRUE(filter.get()); + DecodeAndCompareWithFilter(filter.get(), + source_buffer(), source_len() + kExtraDataBufferSize, + more_data, + more_data_len, + kDefaultBufferSize, + true); +} diff --git a/net/base/cert_status_flags.h b/net/base/cert_status_flags.h new file mode 100644 index 0000000..0812eb0 --- /dev/null +++ b/net/base/cert_status_flags.h @@ -0,0 +1,63 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_CERT_STATUS_FLAGS_H__ +#define NET_BASE_CERT_STATUS_FLAGS_H__ + +namespace net { + +// Status flags, such as errors and extended validation. +enum { + // Bits 0 to 15 are for errors. + CERT_STATUS_ALL_ERRORS = 0xFFFF, + CERT_STATUS_COMMON_NAME_INVALID = 1 << 0, + CERT_STATUS_DATE_INVALID = 1 << 1, + CERT_STATUS_AUTHORITY_INVALID = 1 << 2, + // 1 << 3 is reserved for ERR_CERT_CONTAINS_ERRORS (not useful with WinHTTP). + CERT_STATUS_NO_REVOCATION_MECHANISM = 1 << 4, + CERT_STATUS_UNABLE_TO_CHECK_REVOCATION = 1 << 5, + CERT_STATUS_REVOKED = 1 << 6, + CERT_STATUS_INVALID = 1 << 7, + + // Bits 16 to 30 are for non-error statuses. + CERT_STATUS_IS_EV = 1 << 16, + CERT_STATUS_REV_CHECKING_ENABLED = 1 << 17, + + // 1 << 31 (the sign bit) is reserved so that the cert status will never be + // negative. +}; + +// Returns true if the specified cert status has an error set. +static bool IsCertStatusError(int status) { + return (CERT_STATUS_ALL_ERRORS & status) != 0; +} + +} // namespace net + +#endif // NET_BASE_CERT_STATUS_FLAGS_H__ diff --git a/net/base/client_socket.h b/net/base/client_socket.h new file mode 100644 index 0000000..8661adc --- /dev/null +++ b/net/base/client_socket.h @@ -0,0 +1,71 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_CLIENT_SOCKET_H_ +#define NET_BASE_CLIENT_SOCKET_H_ + +#include "net/base/socket.h" + +namespace net { + +class ClientSocket : public Socket { + public: + // Called to establish a connection. Returns OK if the connection could be + // established synchronously. Otherwise, ERR_IO_PENDING is returned and the + // given callback will be notified asynchronously when the connection is + // established or when an error occurs. The result is some other error code + // if the connection could not be established. + // + // The socket's Read and Write methods may not be called until Connect + // succeeds. + // + // It is valid to call Connect on an already connected socket, in which case + // OK is simply returned. + // + // Connect may also be called again after a call to the Close method. + // + virtual int Connect(CompletionCallback* callback) = 0; + + // If a non-fatal error occurs during Connect, the consumer can call this + // method to re-Connect ignoring the error that occured. This call is only + // valid for certain errors. + virtual int ReconnectIgnoringLastError(CompletionCallback* callback) = 0; + + // Called to disconnect a connected socket. Does nothing if the socket is + // already disconnected. After calling Disconnect it is possible to call + // Connect again to establish a new connection. + virtual void Disconnect() = 0; + + // Called to test if the socket is connected. + virtual bool IsConnected() const = 0; +}; + +} // namespace net + +#endif // NET_BASE_CLIENT_SOCKET_H_ diff --git a/net/base/client_socket_factory.cc b/net/base/client_socket_factory.cc new file mode 100644 index 0000000..932fc0a0 --- /dev/null +++ b/net/base/client_socket_factory.cc @@ -0,0 +1,57 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/client_socket_factory.h" + +#include "base/singleton.h" +#include "net/base/ssl_client_socket.h" +#include "net/base/tcp_client_socket.h" + +namespace net { + +class DefaultClientSocketFactory : public ClientSocketFactory { + public: + virtual ClientSocket* CreateTCPClientSocket( + const AddressList& addresses) { + return new TCPClientSocket(addresses); + } + + virtual ClientSocket* CreateSSLClientSocket( + ClientSocket* transport_socket, + const std::string& hostname) { + return new SSLClientSocket(transport_socket, hostname); + } +}; + +// static +ClientSocketFactory* ClientSocketFactory::GetDefaultFactory() { + return Singleton<DefaultClientSocketFactory>::get(); +} + +} // namespace net diff --git a/net/base/client_socket_factory.h b/net/base/client_socket_factory.h new file mode 100644 index 0000000..9a1b412 --- /dev/null +++ b/net/base/client_socket_factory.h @@ -0,0 +1,57 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_CLIENT_SOCKET_FACTORY_H_ +#define NET_BASE_CLIENT_SOCKET_FACTORY_H_ + +#include <string> + +namespace net { + +class AddressList; +class ClientSocket; + +// An interface used to instantiate ClientSocket objects. Used to facilitate +// testing code with mock socket implementations. +class ClientSocketFactory { + public: + virtual ClientSocket* CreateTCPClientSocket( + const AddressList& addresses) = 0; + + virtual ClientSocket* CreateSSLClientSocket( + ClientSocket* transport_socket, + const std::string& hostname) = 0; + + // Returns the default ClientSocketFactory. + static ClientSocketFactory* GetDefaultFactory(); +}; + +} // namespace net + +#endif // NET_BASE_CLIENT_SOCKET_FACTORY_H_ diff --git a/net/base/completion_callback.h b/net/base/completion_callback.h new file mode 100644 index 0000000..f9c600c --- /dev/null +++ b/net/base/completion_callback.h @@ -0,0 +1,53 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_COMPLETION_CALLBACK_H__ +#define NET_BASE_COMPLETION_CALLBACK_H__ + +#include "base/task.h" + +namespace net { + +// A callback specialization that takes a single int parameter. Usually this +// is used to report a byte count or network error code. +typedef Callback1<int>::Type CompletionCallback; + +// Used to implement a CompletionCallback. +template <class T> +class CompletionCallbackImpl : + public CallbackImpl< T, void (T::*)(int), Tuple1<int> > { + public: + CompletionCallbackImpl(T* obj, void (T::* meth)(int)) + : CallbackImpl< T, void (T::*)(int), Tuple1<int> >::CallbackImpl(obj, meth) { + } +}; + +} // namespace net + +#endif // NET_BASE_COMPLETION_CALLBACK_H__ diff --git a/net/base/cookie_monster.cc b/net/base/cookie_monster.cc new file mode 100644 index 0000000..0483acb --- /dev/null +++ b/net/base/cookie_monster.cc @@ -0,0 +1,1043 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Portions of this code based on Mozilla: +// (netwerk/cookie/src/nsCookieService.cpp) +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is mozilla.org code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 2003 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Daniel Witte (dwitte@stanford.edu) + * Michiel van Leeuwen (mvl@exedo.nl) + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "net/base/cookie_monster.h" + +#include <algorithm> + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/scoped_ptr.h" +#include "base/string_tokenizer.h" +#include "base/string_util.h" +#include "googleurl/src/gurl.h" +#include "googleurl/src/url_canon.h" +#include "net/base/net_util.h" +#include "net/base/registry_controlled_domain.h" + +// #define COOKIE_LOGGING_ENABLED +#ifdef COOKIE_LOGGING_ENABLED +#define COOKIE_DLOG(severity) DLOG_IF(INFO, 1) +#else +#define COOKIE_DLOG(severity) DLOG_IF(INFO, 0) +#endif + +/*static*/ bool CookieMonster::enable_file_scheme_ = false; + +// static +void CookieMonster::EnableFileScheme() { + enable_file_scheme_ = true; +} + +CookieMonster::CookieMonster() + : initialized_(false), + store_(NULL) { +} + +CookieMonster::CookieMonster(PersistentCookieStore* store) + : initialized_(false), + store_(store) { +} + +CookieMonster::~CookieMonster() { + DeleteAll(false); +} + +void CookieMonster::InitStore() { + DCHECK(store_) << "Store must exist to initialize"; + + // Initialize the store and sync in any saved persistent cookies. We don't + // care if it's expired, insert it so it can be garbage collected, removed, + // and sync'd. + std::vector<KeyedCanonicalCookie> cookies; + store_->Load(&cookies); + for (std::vector<KeyedCanonicalCookie>::const_iterator it = cookies.begin(); + it != cookies.end(); ++it) { + InternalInsertCookie(it->first, it->second, false); + } +} + +// The system resolution is not high enough, so we can have multiple +// set cookies that result in the same system time. When this happens, we +// increment by one Time unit. Let's hope computers don't get too fast. +Time CookieMonster::CurrentTime() { + return std::max(Time::Now(), + Time::FromInternalValue(last_time_seen_.ToInternalValue() + 1)); +} + +// Parse a cookie expiration time. We try to be lenient, but we need to +// assume some order to distinguish the fields. The basic rules: +// - The month name must be present and prefix the first 3 letters of the +// full month name (jan for January, jun for June). +// - If the year is <= 2 digits, it must occur after the day of month. +// - The time must be of the format hh:mm:ss. +// An average cookie expiration will look something like this: +// Sat, 15-Apr-17 21:01:22 GMT +Time CookieMonster::ParseCookieTime(const std::string& time_string) { + static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun", + "jul", "aug", "sep", "oct", "nov", "dec" }; + static const int kMonthsLen = arraysize(kMonths); + // We want to be pretty liberal, and support most non-ascii and non-digit + // characters as a delimiter. We can't treat : as a delimiter, because it + // is the delimiter for hh:mm:ss, and we want to keep this field together. + // We make sure to include - and +, since they could prefix numbers. + // If the cookie attribute came in in quotes (ex expires="XXX"), the quotes + // will be preserved, and we will get them here. So we make sure to include + // quote characters, and also \ for anything that was internally escaped. + static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{|}~"; + + Time::Exploded exploded = {0}; + + StringTokenizer tokenizer(time_string, kDelimiters); + + bool found_day_of_month = false; + bool found_month = false; + bool found_time = false; + bool found_year = false; + + while (tokenizer.GetNext()) { + const std::string token = tokenizer.token(); + DCHECK(!token.empty()); + bool numerical = IsAsciiDigit(token[0]); + + // String field + if (!numerical) { + if (!found_month) { + for (int i = 0; i < kMonthsLen; ++i) { + // Match prefix, so we could match January, etc + if (StrNCaseCmp(token.c_str(), kMonths[i], 3) == 0) { + exploded.month = i + 1; + found_month = true; + break; + } + } + } else { + // If we've gotten here, it means we've already found and parsed our + // month, and we have another string, which we would expect to be the + // the time zone name. According to the RFC and my experiments with + // how sites format their expirations, we don't have much of a reason + // to support timezones. We don't want to ever barf on user input, + // but this DCHECK should pass for well-formed data. + // DCHECK(token == "GMT"); + } + // Numeric field w/ a colon + } else if (token.find(':') != std::string::npos) { + if (!found_time && + sscanf_s(token.c_str(), "%2hu:%2hu:%2hu", &exploded.hour, + &exploded.minute, &exploded.second) == 3) { + found_time = true; + } else { + // We should only ever encounter one time-like thing. If we're here, + // it means we've found a second, which shouldn't happen. We keep + // the first. This check should be ok for well-formed input: + // NOTREACHED(); + } + // Numeric field + } else { + // Overflow with atoi() is unspecified, so we enforce a max length. + if (!found_day_of_month && token.length() <= 2) { + exploded.day_of_month = atoi(token.c_str()); + found_day_of_month = true; + } else if (!found_year && token.length() <= 5) { + exploded.year = atoi(token.c_str()); + found_year = true; + } else { + // If we're here, it means we've either found an extra numeric field, + // or a numeric field which was too long. For well-formed input, the + // following check would be reasonable: + // NOTREACHED(); + } + } + } + + if (!found_day_of_month || !found_month || !found_time || !found_year) { + // We didn't find all of the fields we need. For well-formed input, the + // following check would be reasonable: + // NOTREACHED() << "Cookie parse expiration failed: " << time_string; + return Time(); + } + + // Normalize the year to expand abbreviated years to the full year. + if (exploded.year >= 69 && exploded.year <= 99) + exploded.year += 1900; + if (exploded.year >= 0 && exploded.year <= 68) + exploded.year += 2000; + + // If our values are within their correct ranges, we got our time. + if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 && + exploded.month >= 1 && exploded.month <= 12 && + exploded.year >= 1601 && exploded.year <= 30827 && + exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) { + return Time::FromUTCExploded(exploded); + } + + // One of our values was out of expected range. For well-formed input, + // the following check would be reasonable: + // NOTREACHED() << "Cookie exploded expiration failed: " << time_string; + + return Time(); +} + +// Determine the cookie domain key to use for setting the specified cookie. +// On success returns true, and sets cookie_domain_key to either a +// -host cookie key (ex: "google.com") +// -domain cookie key (ex: ".google.com") +static bool GetCookieDomainKey(const GURL& url, + const CookieMonster::ParsedCookie& pc, + std::string* cookie_domain_key) { + const std::string url_host(url.host()); + if (!pc.HasDomain() || pc.Domain().empty()) { + // No domain was specified in cookie -- default to host cookie. + *cookie_domain_key = url_host; + DCHECK((*cookie_domain_key)[0] != '.'); + return true; + } + + // Get the normalized domain specified in cookie line. + // Note: The RFC says we can reject a cookie if the domain + // attribute does not start with a dot. IE/FF/Safari however, allow a cookie + // of the form domain=my.domain.com, treating it the same as + // domain=.my.domain.com -- for compatibility we do the same here. Firefox + // also treats domain=.....my.domain.com like domain=.my.domain.com, but + // neither IE nor Safari do this, and we don't either. + std::string cookie_domain(net_util::CanonicalizeHost(pc.Domain(), NULL)); + if (cookie_domain.empty()) + return false; + if (cookie_domain[0] != '.') + cookie_domain = "." + cookie_domain; + + // Ensure |url| and |cookie_domain| have the same domain+registry. + const std::string url_domain_and_registry( + RegistryControlledDomainService::GetDomainAndRegistry(url)); + if (url_domain_and_registry.empty()) + return false; // IP addresses/intranet hosts can't set domain cookies. + const std::string cookie_domain_and_registry( + RegistryControlledDomainService::GetDomainAndRegistry(cookie_domain)); + if (url_domain_and_registry != cookie_domain_and_registry) + return false; // Can't set a cookie on a different domain + registry. + + // Ensure |url_host| is |cookie_domain| or one of its subdomains. Given that + // we know the domain+registry are the same from the above checks, this is + // basically a simple string suffix check. + if ((url_host.length() < cookie_domain.length()) ? + (cookie_domain != ("." + url_host)) : + url_host.compare(url_host.length() - cookie_domain.length(), + cookie_domain.length(), cookie_domain)) + return false; + + + *cookie_domain_key = cookie_domain; + return true; +} + +static std::string CanonPath(const GURL& url, + const CookieMonster::ParsedCookie& pc) { + // The RFC says the path should be a prefix of the current URL path. + // However, Mozilla allows you to set any path for compatibility with + // broken websites. We unfortunately will mimic this behavior. We try + // to be generous and accept cookies with an invalid path attribute, and + // default the path to something reasonable. + + // The path was supplied in the cookie, we'll take it. + if (pc.HasPath() && !pc.Path().empty() && pc.Path()[0] == '/') + return pc.Path(); + + // The path was not supplied in the cookie or invalid, we will default + // to the current URL path. + // """Defaults to the path of the request URL that generated the + // Set-Cookie response, up to, but not including, the + // right-most /.""" + // How would this work for a cookie on /? We will include it then. + const std::string& url_path = url.path(); + + std::string::size_type idx = url_path.find_last_of('/'); + + // The cookie path was invalid or a single '/'. + if (idx == 0 || idx == std::string::npos) + return std::string("/"); + + // Return up to the rightmost '/'. + return url_path.substr(0, idx); +} + +static Time CanonExpiration(const CookieMonster::ParsedCookie& pc, + const Time& current) { + // First, try the Max-Age attribute. + uint64 max_age = 0; + if (pc.HasMaxAge() && + sscanf_s(pc.MaxAge().c_str(), " %I64u", &max_age) == 1) { + return current + TimeDelta::FromSeconds(max_age); + } + + // Try the Expires attribute. + if (pc.HasExpires()) + return CookieMonster::ParseCookieTime(pc.Expires()); + + // Invalid or no expiration, persistent cookie. + return Time(); +} + +static bool HasCookieableScheme(const GURL& url) { + static const char* kCookieableSchemes[] = { "http", "https", "file" }; + static const int kCookieableSchemesLen = arraysize(kCookieableSchemes); + static const int kCookieableSchemesFileIndex = 2; + + // Make sure the request is on a cookie-able url scheme. + for (int i = 0; i < kCookieableSchemesLen; ++i) { + // We matched a scheme. + if (url.SchemeIs(kCookieableSchemes[i])) { + // This is file:// scheme + if (i == kCookieableSchemesFileIndex) + return CookieMonster::enable_file_scheme_; + // We've matched a supported scheme. + return true; + } + } + + // The scheme didn't match any in our whitelist. + COOKIE_DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme(); + return false; +} + +bool CookieMonster::SetCookie(const GURL& url, + const std::string& cookie_line) { + Time creation_date = CurrentTime(); + last_time_seen_ = creation_date; + return SetCookieWithCreationTime(url, cookie_line, creation_date); +} + +bool CookieMonster::SetCookieWithCreationTime(const GURL& url, + const std::string& cookie_line, + const Time& creation_time) { + DCHECK(!creation_time.is_null()); + + if (!HasCookieableScheme(url)) { + DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme(); + return false; + } + + AutoLock autolock(lock_); + InitIfNecessary(); + + COOKIE_DLOG(INFO) << "SetCookie() line: " << cookie_line; + + // Parse the cookie. + ParsedCookie pc(cookie_line); + + if (!pc.IsValid()) { + COOKIE_DLOG(WARNING) << "Couldn't parse cookie"; + return false; + } + + std::string cookie_domain; + if (!GetCookieDomainKey(url, pc, &cookie_domain)) { + return false; + } + + std::string cookie_path = CanonPath(url, pc); + + scoped_ptr<CanonicalCookie> cc; + Time cookie_expires = CanonExpiration(pc, creation_time); + + cc.reset(new CanonicalCookie(pc.Name(), pc.Value(), cookie_path, + pc.IsSecure(), pc.IsHttpOnly(), + creation_time, !cookie_expires.is_null(), + cookie_expires)); + + if (!cc.get()) { + COOKIE_DLOG(WARNING) << "Failed to allocate CanonicalCookie"; + return false; + } + + // We should have only purged at most one matching cookie. + int num_deleted = DeleteEquivalentCookies(cookie_domain, *cc); + + COOKIE_DLOG(INFO) << "SetCookie() cc: " << cc->DebugString(); + + // Realize that we might be setting an expired cookie, and the only point + // was to delete the cookie which we've already done. + if (!cc->IsExpired(creation_time)) + InternalInsertCookie(cookie_domain, cc.release(), true); + + // We assume that hopefully setting a cookie will be less common than + // querying a cookie. Since setting a cookie can put us over our limits, + // make sure that we garbage collect... We can also make the assumption that + // if a cookie was set, in the common case it will be used soon after, + // and we will purge the expired cookies in GetCookies(). + GarbageCollect(creation_time, cookie_domain); + + return true; +} + +void CookieMonster::SetCookies(const GURL& url, + const std::vector<std::string>& cookies) { + for (std::vector<std::string>::const_iterator iter = cookies.begin(); + iter != cookies.end(); ++iter) + SetCookie(url, *iter); +} + +void CookieMonster::InternalInsertCookie(const std::string& key, + CanonicalCookie* cc, + bool sync_to_store) { + if (cc->IsPersistent() && store_ && sync_to_store) + store_->AddCookie(key, *cc); + cookies_.insert(CookieMap::value_type(key, cc)); +} + +void CookieMonster::InternalDeleteCookie(CookieMap::iterator it, + bool sync_to_store) { + CanonicalCookie* cc = it->second; + COOKIE_DLOG(INFO) << "InternalDeleteCookie() cc: " << cc->DebugString(); + if (cc->IsPersistent() && store_ && sync_to_store) + store_->DeleteCookie(*cc); + cookies_.erase(it); + delete cc; +} + +int CookieMonster::DeleteEquivalentCookies(const std::string& key, + const CanonicalCookie& ecc) { + int num_deleted = 0; + for (CookieMapItPair its = cookies_.equal_range(key); + its.first != its.second; ) { + CookieMap::iterator curit = its.first; + CanonicalCookie* cc = curit->second; + ++its.first; + + // TODO while we're here, we might as well purge expired cookies too. + + if (ecc.IsEquivalent(*cc)) { + InternalDeleteCookie(curit, true); + ++num_deleted; +#ifdef NDEBUG + // We should only ever find a single equivalent cookie + break; +#endif + } + } + + // Our internal state should be consistent, we should never have more + // than one equivalent cookie, since they should overwrite each other. + DCHECK(num_deleted <= 1); + + return num_deleted; +} + +// TODO we should be sorting by last access time, however, right now +// we're not saving an access time, so we're sorting by creation time. +static bool OldestCookieSorter(const CookieMonster::CookieMap::iterator& it1, + const CookieMonster::CookieMap::iterator& it2) { + return it1->second->CreationDate() < it2->second->CreationDate(); +} + +// is vector::size_type always going to be size_t? +int CookieMonster::GarbageCollectRange(const Time& current, + const CookieMapItPair& itpair, + size_t num_max, size_t num_purge) { + int num_deleted = 0; + + // First, walk through and delete anything that's expired. + // Save a list of iterators to the ones that weren't expired + std::vector<CookieMap::iterator> cookie_its; + for (CookieMap::iterator it = itpair.first, end = itpair.second; it != end;) { + CookieMap::iterator curit = it; + CanonicalCookie* cc = curit->second; + ++it; + + if (cc->IsExpired(current)) { + InternalDeleteCookie(curit, true); + ++num_deleted; + } else { + cookie_its.push_back(curit); + } + } + + if (cookie_its.size() > num_max) { + COOKIE_DLOG(INFO) << "GarbageCollectRange() Deep Garbage Collect."; + num_purge += cookie_its.size() - num_max; + // Sort the top N we want to purge. + std::partial_sort(cookie_its.begin(), cookie_its.begin() + num_purge, + cookie_its.end(), OldestCookieSorter); + + // TODO should probably use an iterator and not an index. + for (size_t i = 0; i < num_purge; ++i) { + InternalDeleteCookie(cookie_its[i], true); + ++num_deleted; + } + } + + return num_deleted; +} + +// TODO Whenever we delete, check last_cur_utc_... +int CookieMonster::GarbageCollect(const Time& current, + const std::string& key) { + // Based off of the Mozilla defaults + // It might seem scary to have a high purge value, but really it's not. You + // just make sure that you increase the max to cover the increase in purge, + // and we would have been purging the same amount of cookies. We're just + // going through the garbage collection process less often. + static const size_t kNumCookiesPerHost = 70; // ~50 cookies + static const size_t kNumCookiesPerHostPurge = 20; + static const size_t kNumCookiesTotal = 1100; // ~1000 cookies + static const size_t kNumCookiesTotalPurge = 100; + + int num_deleted = 0; + + // Collect garbage for this key. + if (cookies_.count(key) > kNumCookiesPerHost) { + COOKIE_DLOG(INFO) << "GarbageCollect() key: " << key; + num_deleted += GarbageCollectRange(current, cookies_.equal_range(key), + kNumCookiesPerHost, + kNumCookiesPerHostPurge); + } + + // Collect garbage for everything. + if (cookies_.size() > kNumCookiesTotal) { + COOKIE_DLOG(INFO) << "GarbageCollect() everything"; + num_deleted += GarbageCollectRange(current, + CookieMapItPair(cookies_.begin(), + cookies_.end()), + kNumCookiesTotal, kNumCookiesTotalPurge); + } + + return num_deleted; +} + +int CookieMonster::DeleteAll(bool sync_to_store) { + AutoLock autolock(lock_); + InitIfNecessary(); + + int num_deleted = 0; + for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) { + CookieMap::iterator curit = it; + ++it; + InternalDeleteCookie(curit, sync_to_store); + ++num_deleted; + } + + return num_deleted; +} + +int CookieMonster::DeleteAllCreatedBetween(const Time& delete_begin, + const Time& delete_end, + bool sync_to_store) { + AutoLock autolock(lock_); + InitIfNecessary(); + + int num_deleted = 0; + for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) { + CookieMap::iterator curit = it; + CanonicalCookie* cc = curit->second; + ++it; + + if (cc->CreationDate() >= delete_begin && + (delete_end.is_null() || cc->CreationDate() < delete_end)) { + InternalDeleteCookie(curit, sync_to_store); + ++num_deleted; + } + } + + return num_deleted; +} + +int CookieMonster::DeleteAllCreatedAfter(const Time& delete_begin, + bool sync_to_store) { + return DeleteAllCreatedBetween(delete_begin, Time(), sync_to_store); +} + +bool CookieMonster::DeleteCookie(const std::string& domain, + const CanonicalCookie& cookie, + bool sync_to_store) { + AutoLock autolock(lock_); + InitIfNecessary(); + + for (CookieMapItPair its = cookies_.equal_range(domain); + its.first != its.second; ++its.first) { + // The creation date acts as our unique index... + if (its.first->second->CreationDate() == cookie.CreationDate()) { + InternalDeleteCookie(its.first, sync_to_store); + return true; + } + } + return false; +} + +// Mozilla sorts on the path length (longest first), and then it +// sorts by creation time (oldest first). +// The RFC says the sort order for the domain attribute is undefined. +static bool CookieSorter(CookieMonster::CanonicalCookie* cc1, + CookieMonster::CanonicalCookie* cc2) { + if (cc1->Path().length() == cc2->Path().length()) + return cc1->CreationDate() < cc2->CreationDate(); + return cc1->Path().length() > cc2->Path().length(); +} + +std::string CookieMonster::GetCookies(const GURL& url) { + return GetCookiesWithOptions(url, NORMAL); +} + +// Currently our cookie datastructure is based on Mozilla's approach. We have a +// hash keyed on the cookie's domain, and for any query we walk down the domain +// components and probe for cookies until we reach the TLD, where we stop. +// For example, a.b.blah.com, we would probe +// - a.b.blah.com +// - .a.b.blah.com (TODO should we check this first or second?) +// - .b.blah.com +// - .blah.com +// There are some alternative datastructures we could try, like a +// search/prefix trie, where we reverse the hostname and query for all +// keys that are a prefix of our hostname. I think the hash probing +// should be fast and simple enough for now. +std::string CookieMonster::GetCookiesWithOptions(const GURL& url, + CookieOptions options) { + if (!HasCookieableScheme(url)) { + DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme(); + return std::string(); + } + + // Get the cookies for this host and its domain(s). + std::vector<CanonicalCookie*> cookies; + FindCookiesForHostAndDomain(url, options, &cookies); + std::sort(cookies.begin(), cookies.end(), CookieSorter); + + std::string cookie_line; + for (std::vector<CanonicalCookie*>::const_iterator it = cookies.begin(); + it != cookies.end(); ++it) { + if (it != cookies.begin()) + cookie_line += "; "; + // In Mozilla if you set a cookie like AAAA, it will have an empty token + // and a value of AAAA. When it sends the cookie back, it will send AAAA, + // so we need to avoid sending =AAAA for a blank token value. + if (!(*it)->Name().empty()) + cookie_line += (*it)->Name() + "="; + cookie_line += (*it)->Value(); + } + + COOKIE_DLOG(INFO) << "GetCookies() result: " << cookie_line; + + return cookie_line; +} + +// TODO(deanm): We could have expired cookies that haven't been purged yet, +// and exporting these would be inaccurate, for example in the cookie manager +// it might show cookies that are actually expired already. We should do +// a full garbage collection before ... There actually isn't a way to do +// this right now (a forceful full GC), so we'll have to live with the +// possibility of showing the user expired cookies. This shouldn't be very +// common since most persistent cookies have a long lifetime. +CookieMonster::CookieList CookieMonster::GetAllCookies() { + AutoLock autolock(lock_); + InitIfNecessary(); + + CookieList cookie_list; + + for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end(); ++it) { + cookie_list.push_back(CookieListPair(it->first, *it->second)); + } + + return cookie_list; +} + +void CookieMonster::FindCookiesForHostAndDomain( + const GURL& url, + CookieOptions options, + std::vector<CanonicalCookie*>* cookies) { + AutoLock autolock(lock_); + InitIfNecessary(); + + const Time current_time(CurrentTime()); + + // Query for the full host, For example: 'a.c.blah.com'. + std::string key(url.host()); + FindCookiesForKey(key, url, options, current_time, cookies); + + // See if we can search for domain cookies, i.e. if the host has a TLD + 1. + const std::string domain( + RegistryControlledDomainService::GetDomainAndRegistry(key)); + if (domain.empty()) + return; + DCHECK_LE(domain.length(), key.length()); + DCHECK_EQ(0, key.compare(key.length() - domain.length(), domain.length(), + domain)); + + // Walk through the string and query at the dot points (GURL should have + // canonicalized the dots, so this should be safe). Stop once we reach the + // domain + registry; we can't write cookies past this point, and with some + // registrars other domains can, in which case we don't want to read their + // cookies. + for (key = "." + key; key.length() > domain.length(); ) { + FindCookiesForKey(key, url, options, current_time, cookies); + const size_t next_dot = key.find('.', 1); // Skip over leading dot. + key.erase(0, next_dot); + } +} + +void CookieMonster::FindCookiesForKey( + const std::string& key, + const GURL& url, + CookieOptions options, + const Time& current, + std::vector<CanonicalCookie*>* cookies) { + bool secure = url.SchemeIsSecure(); + + for (CookieMapItPair its = cookies_.equal_range(key); + its.first != its.second; ) { + CookieMap::iterator curit = its.first; + CanonicalCookie* cc = curit->second; + ++its.first; + + // If the cookie is expired, delete it. + if (cc->IsExpired(current)) { + InternalDeleteCookie(curit, true); + continue; + } + + // Filter out HttpOnly cookies unless they where explicitly requested. + if ((options & INCLUDE_HTTPONLY) == 0 && cc->IsHttpOnly()) + continue; + + // Filter out secure cookies unless we're https. + if (!secure && cc->IsSecure()) + continue; + + if (!cc->IsOnPath(url.path())) + continue; + + // Congratulations Charlie, you passed the test! + cookies->push_back(cc); + } +} + + +CookieMonster::ParsedCookie::ParsedCookie(const std::string& cookie_line) + : is_valid_(false), + path_index_(0), + domain_index_(0), + expires_index_(0), + maxage_index_(0), + secure_index_(0), + httponly_index_(0) { + + if (cookie_line.size() > kMaxCookieSize) { + LOG(INFO) << "Not parsing cookie, too large: " << cookie_line.size(); + return; + } + + ParseTokenValuePairs(cookie_line); + if (pairs_.size() > 0) { + is_valid_ = true; + SetupAttributes(); + } +} + +// Returns true if |c| occurs in |chars| +// TODO maybe make this take an iterator, could check for end also? +static inline bool CharIsA(const char c, const char* chars) { + return strchr(chars, c) != NULL; +} +// Seek the iterator to the first occurrence of a character in |chars|. +// Returns true if it hit the end, false otherwise. +static inline bool SeekTo(std::string::const_iterator* it, + const std::string::const_iterator& end, + const char* chars) { + for (; *it != end && !CharIsA(**it, chars); ++(*it)); + return *it == end; +} +// Seek the iterator to the first occurrence of a character not in |chars|. +// Returns true if it hit the end, false otherwise. +static inline bool SeekPast(std::string::const_iterator* it, + const std::string::const_iterator& end, + const char* chars) { + for (; *it != end && CharIsA(**it, chars); ++(*it)); + return *it == end; +} +static inline bool SeekBackPast(std::string::const_iterator* it, + const std::string::const_iterator& end, + const char* chars) { + for (; *it != end && CharIsA(**it, chars); --(*it)); + return *it == end; +} + +// Parse all token/value pairs and populate pairs_. +void CookieMonster::ParsedCookie::ParseTokenValuePairs( + const std::string& cookie_line) { + static const char kTerminator[] = "\n\r\0"; + static const int kTerminatorLen = sizeof(kTerminator) - 1; + static const char kWhitespace[] = " \t"; + static const char kQuoteTerminator[] = "\""; + static const char kValueSeparator[] = ";"; + static const char kTokenSeparator[] = ";="; + + pairs_.clear(); + + // Ok, here we go. We should be expecting to be starting somewhere + // before the cookie line, not including any header name... + std::string::const_iterator start = cookie_line.begin(); + std::string::const_iterator end = cookie_line.end(); + std::string::const_iterator it = start; + + // TODO Make sure we're stripping \r\n in the network code. Then we + // can log any unexpected terminators. + std::string::size_type term_pos = cookie_line.find_first_of( + std::string(kTerminator, kTerminatorLen)); + if (term_pos != std::string::npos) { + // We found a character we should treat as an end of string. + end = start + term_pos; + } + + for (int pair_num = 0; pair_num < kMaxPairs && it != end; ++pair_num) { + TokenValuePair pair; + std::string::const_iterator token_start, token_real_end, token_end; + + // Seek past any whitespace before the "token" (the name). + // token_start should point at the first character in the token + if (SeekPast(&it, end, kWhitespace)) + break; // No token, whitespace or empty. + token_start = it; + + // Seek over the token, to the token separator. + // token_real_end should point at the token separator, i.e. '='. + // If it == end after the seek, we probably have a token-value. + SeekTo(&it, end, kTokenSeparator); + token_real_end = it; + + // Ignore any whitespace between the token and the token separator. + // token_end should point after the last interesting token character, + // pointing at either whitespace, or at '=' (and equal to token_real_end). + if (it != token_start) { // We could have an empty token name. + --it; // Go back before the token separator. + // Skip over any whitespace to the first non-whitespace character. + SeekBackPast(&it, token_start, kWhitespace); + // Point after it. + ++it; + } + token_end = it; + + // Seek us back to the end of the token. + it = token_real_end; + + if (it == end || *it != '=') { + // We have a token-value, we didn't have any token name. + if (pair_num == 0) { + // For the first time around, we want to treat single values + // as a value with an empty name. (Mozilla bug 169091). + // IE seems to also have this behavior, ex "AAA", and "AAA=10" will + // set 2 different cookies, and setting "BBB" will then replace "AAA". + pair.first = ""; + // Rewind to the beginning of what we thought was the token name, + // and let it get parsed as a value. + it = token_start; + } else { + // Any not-first attribute we want to treat a value as a + // name with an empty value... This is so something like + // "secure;" will get parsed as a Token name, and not a value. + pair.first = std::string(token_start, token_end); + } + } else { + // We have a TOKEN=VALUE. + pair.first = std::string(token_start, token_end); + ++it; // Skip past the '='. + } + + // OK, now try to parse a value. + std::string::const_iterator value_start, value_end; + + // Seek past any whitespace that might in-between the token and value. + SeekPast(&it, end, kWhitespace); + // value_start should point at the first character of the value. + value_start = it; + + // The value is double quoted, process <quoted-string>. + if (it != end && *it == '"') { + // Skip over the first double quote, and parse until + // a terminating double quote or the end. + for (++it; it != end && !CharIsA(*it, kQuoteTerminator); ++it) { + // Allow an escaped \" in a double quoted string. + if (*it == '\\') { + ++it; + if (it == end) + break; + } + } + + SeekTo(&it, end, kValueSeparator); + // We could seek to the end, that's ok. + value_end = it; + } else { + // The value is non-quoted, process <token-value>. + // Just look for ';' to terminate ('=' allowed). + // We can hit the end, maybe they didn't terminate. + SeekTo(&it, end, kValueSeparator); + + // Ignore any whitespace between the value and the value separator + if (it != value_start) { // Could have an empty value + --it; + SeekBackPast(&it, value_start, kWhitespace); + ++it; + } + + value_end = it; + } + + // OK, we're finished with a Token/Value. + pair.second = std::string(value_start, value_end); + // From RFC2109: "Attributes (names) (attr) are case-insensitive." + if (pair_num != 0) + StringToLowerASCII(&pair.first); + pairs_.push_back(pair); + + // We've processed a token/value pair, we're either at the end of + // the string or a ValueSeparator like ';', which we want to skip. + if (it != end) + ++it; + } +} + +void CookieMonster::ParsedCookie::SetupAttributes() { + static const char kPathTokenName[] = "path"; + static const char kDomainTokenName[] = "domain"; + static const char kExpiresTokenName[] = "expires"; + static const char kMaxAgeTokenName[] = "max-age"; + static const char kSecureTokenName[] = "secure"; + static const char kHttpOnlyTokenName[] = "httponly"; + + // We skip over the first token/value, the user supplied one. + for (size_t i = 1; i < pairs_.size(); ++i) { + if (pairs_[i].first == kPathTokenName) + path_index_ = i; + else if (pairs_[i].first == kDomainTokenName) + domain_index_ = i; + else if (pairs_[i].first == kExpiresTokenName) + expires_index_ = i; + else if (pairs_[i].first == kMaxAgeTokenName) + maxage_index_ = i; + else if (pairs_[i].first == kSecureTokenName) + secure_index_ = i; + else if (pairs_[i].first == kHttpOnlyTokenName) + httponly_index_ = i; + else { /* some attribute we don't know or don't care about. */ } + } +} + +// Create a cookie-line for the cookie. For debugging only! +// If we want to use this for something more than debugging, we +// should rewrite it better... +std::string CookieMonster::ParsedCookie::DebugString() const { + std::string out; + for (PairList::const_iterator it = pairs_.begin(); + it != pairs_.end(); ++it) { + out.append(it->first); + out.append("="); + out.append(it->second); + out.append("; "); + } + return out; +} + +bool CookieMonster::CanonicalCookie::IsOnPath( + const std::string& url_path) const { + + // A zero length would be unsafe for our trailing '/' checks, and + // would also make no sense for our prefix match. The code that + // creates a CanonicalCookie should make sure the path is never zero length, + // but we double check anyway. + if (path_.empty()) + return false; + + // The Mozilla code broke it into 3 cases, if it's strings lengths + // are less than, equal, or greater. I think this is simpler: + + // Make sure the cookie path is a prefix of the url path. If the + // url path is shorter than the cookie path, then the cookie path + // can't be a prefix. + if (url_path.find(path_) != 0) + return false; + + // Now we know that url_path is >= cookie_path, and that cookie_path + // is a prefix of url_path. If they are the are the same length then + // they are identical, otherwise we need an additional check: + + // In order to avoid in correctly matching a cookie path of /blah + // with a request path of '/blahblah/', we need to make sure that either + // the cookie path ends in a trailing '/', or that we prefix up to a '/' + // in the url path. Since we know that the url path length is greater + // than the cookie path length, it's safe to index one byte past. + if (path_.length() != url_path.length() && + path_[path_.length() - 1] != '/' && + url_path[path_.length()] != '/') + return false; + + return true; +} + +std::string CookieMonster::CanonicalCookie::DebugString() const { + return StringPrintf("name: %s value: %s path: %s creation: %llu", + name_.c_str(), value_.c_str(), path_.c_str(), + creation_date_.ToTimeT()); +} diff --git a/net/base/cookie_monster.h b/net/base/cookie_monster.h new file mode 100644 index 0000000..cf7f2a6 --- /dev/null +++ b/net/base/cookie_monster.h @@ -0,0 +1,331 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Brought to you by the letter D and the number 2. + +#ifndef NET_BASE_COOKIE_MONSTER_H__ +#define NET_BASE_COOKIE_MONSTER_H__ + +#include <string> +#include <vector> +#include <utility> +#include <map> + +#include "base/basictypes.h" +#include "base/lock.h" +#include "base/time.h" + +class GURL; + +// The cookie monster is the system for storing and retrieving cookies. It has +// an in-memory list of all cookies, and synchronizes non-session cookies to an +// optional permanent storage that implements the PersistentCookieStore +// interface. +// +// This class IS thread-safe. Normally, it is only used on the I/O thread, but +// is also accessed directly through Automation for UI testing. +// +// TODO(deanm) Implement CookieMonster, the cookie database. +// - Verify that our domain enforcement and non-dotted handling is correct +// - Currently garbage collection is done on oldest CreationUTC, Mozilla +// purges cookies on last access time, which would require adding and +// keeping track of access times on a CanonicalCookie +class CookieMonster { + public: + class ParsedCookie; + class CanonicalCookie; + class PersistentCookieStore; + + // NOTE(deanm): + // I benchmarked hash_multimap vs multimap. We're going to be query-heavy + // so it would seem like hashing would help. However they were very + // close, with multimap being a tiny bit faster. I think this is because + // our map is at max around 1000 entries, and the additional complexity + // for the hashing might not overcome the O(log(1000)) for querying + // a multimap. Also, multimap is standard, another reason to use it. + typedef std::multimap<std::string, CanonicalCookie*> CookieMap; + typedef std::pair<CookieMap::iterator, CookieMap::iterator> CookieMapItPair; + typedef std::pair<std::string, CanonicalCookie*> KeyedCanonicalCookie; + typedef std::pair<std::string, CanonicalCookie> CookieListPair; + typedef std::vector<CookieListPair> CookieList; + + enum CookieOptions { + // Normal cookie behavior, decides which cookies to return based on + // the URL and whether it's https, etc. Never returns HttpOnly cookies + NORMAL = 0, + // Include HttpOnly cookies + INCLUDE_HTTPONLY = 1, + }; + + CookieMonster(); + + // The store passed in should not have had Init() called on it yet. This class + // will take care of initializing it. The backing store is NOT owned by this + // class, but it must remain valid for the duration of the cookie monster's + // existence. + CookieMonster(PersistentCookieStore* store); + + ~CookieMonster(); + + // Parse the string with the cookie time (very forgivingly). + static Time ParseCookieTime(const std::string& time_string); + + // Set a single cookie. Expects a cookie line, like "a=1; domain=b.com". + bool SetCookie(const GURL& url, const std::string& cookie_line); + // Sets a single cookie with a specific creation date. To set a cookie with + // a creation date of Now() use SetCookie() instead (it calls this function + // internally). + bool SetCookieWithCreationTime(const GURL& url, + const std::string& cookie_line, + const Time& creation_time); + // Set a vector of response cookie values for the same URL. + void SetCookies(const GURL& url, const std::vector<std::string>& cookies); + + // TODO what if the total size of all the cookies >4k, can we have a header + // that big or do we need multiple Cookie: headers? + // Simple interface, get a cookie string "a=b; c=d" for the given URL. + // It will _not_ return httponly cookies, see GetCookiesWithOptions + std::string GetCookies(const GURL& url); + std::string GetCookiesWithOptions(const GURL& url, CookieOptions options); + // Returns all the cookies, for use in management UI, etc. + CookieList GetAllCookies(); + + // Delete all of the cookies. + int DeleteAll(bool sync_to_store); + // Delete all of the cookies that have a creation_date greater than or equal + // to |delete_begin| and less than |delete_end| + int DeleteAllCreatedBetween(const Time& delete_begin, + const Time& delete_end, + bool sync_to_store); + // Delete all of the cookies that have a creation_date more recent than the + // one passed into the function via |delete_after|. + int DeleteAllCreatedAfter(const Time& delete_begin, bool sync_to_store); + + // Delete one specific cookie. + bool DeleteCookie(const std::string& domain, + const CanonicalCookie& cookie, + bool sync_to_store); + + // There are some unknowns about how to correctly handle file:// cookies, + // and our implementation for this is not robust enough. This allows you + // to enable support, but it should only be used for testing. Bug 1157243. + static void EnableFileScheme(); + static bool enable_file_scheme_; + + private: + // Called by all non-static functions to ensure that the cookies store has + // been initialized. This is not done during creating so it doesn't block + // the window showing. + // Note: this method should always be called with lock_ held. + void InitIfNecessary() { + if (!initialized_) { + if (store_) + InitStore(); + initialized_ = true; + } + } + + // Initializes the backing store and reads existing cookies from it. + // Should only be called by InitIfNecessary(). + void InitStore(); + + void FindCookiesForHostAndDomain(const GURL& url, + CookieOptions options, + std::vector<CanonicalCookie*>* cookies); + + void FindCookiesForKey(const std::string& key, + const GURL& url, + CookieOptions options, + const Time& current, + std::vector<CanonicalCookie*>* cookies); + + int DeleteEquivalentCookies(const std::string& key, + const CanonicalCookie& ecc); + + void InternalInsertCookie(const std::string& key, + CanonicalCookie* cc, + bool sync_to_store); + + void InternalDeleteCookie(CookieMap::iterator it, bool sync_to_store); + + // Enforce cookie maximum limits, purging expired and old cookies if needed + int GarbageCollect(const Time& current, const std::string& key); + int GarbageCollectRange(const Time& current, + const CookieMapItPair& itpair, + size_t num_max, + size_t num_purge); + + CookieMap cookies_; + + // Indicates whether the cookie store has been initialized. This happens + // lazily in InitStoreIfNecessary(). + bool initialized_; + + PersistentCookieStore* store_; + + // The resolution of our time isn't enough, so we do something + // ugly and increment when we've seen the same time twice. + Time CurrentTime(); + Time last_time_seen_; + + // Lock for thread-safety + Lock lock_; + + DISALLOW_EVIL_CONSTRUCTORS(CookieMonster); +}; + +class CookieMonster::ParsedCookie { + public: + typedef std::pair<std::string, std::string> TokenValuePair; + typedef std::vector<TokenValuePair> PairList; + + // The maximum length of a cookie string we will try to parse + static const int kMaxCookieSize = 4096; + // The maximum number of Token/Value pairs. Shouldn't have more than 8. + static const int kMaxPairs = 16; + + // Construct from a cookie string like "BLAH=1; path=/; domain=.google.com" + ParsedCookie(const std::string& cookie_line); + ~ParsedCookie() { } + + // You should not call any other methods on the class if !IsValid + bool IsValid() const { return is_valid_; } + + const std::string& Name() const { return pairs_[0].first; } + const std::string& Token() const { return Name(); } + const std::string& Value() const { return pairs_[0].second; } + + bool HasPath() const { return path_index_ != 0; } + const std::string& Path() const { return pairs_[path_index_].second; } + bool HasDomain() const { return domain_index_ != 0; } + const std::string& Domain() const { return pairs_[domain_index_].second; } + bool HasExpires() const { return expires_index_ != 0; } + const std::string& Expires() const { return pairs_[expires_index_].second; } + bool HasMaxAge() const { return maxage_index_ != 0; } + const std::string& MaxAge() const { return pairs_[maxage_index_].second; } + bool IsSecure() const { return secure_index_ != 0; } + bool IsHttpOnly() const { return httponly_index_ != 0; } + + // For debugging only! + std::string DebugString() const; + + private: + void ParseTokenValuePairs(const std::string& cookie_line); + void SetupAttributes(); + + PairList pairs_; + bool is_valid_; + // These will default to 0, but that should never be valid since the + // 0th index is the user supplied token/value, not an attribute. + // We're really never going to have more than like 8 attributes, so we + // could fit these into 3 bits each if we're worried about size... + size_t path_index_; + size_t domain_index_; + size_t expires_index_; + size_t maxage_index_; + size_t secure_index_; + size_t httponly_index_; + + DISALLOW_EVIL_CONSTRUCTORS(CookieMonster::ParsedCookie); +}; + + +class CookieMonster::CanonicalCookie { + public: + CanonicalCookie(const std::string& name, const std::string& value, + const std::string& path, bool secure, + bool httponly, const Time& creation, + bool has_expires, const Time& expires) + : name_(name), + value_(value), + path_(path), + secure_(secure), + httponly_(httponly), + creation_date_(creation), + has_expires_(has_expires), + expiry_date_(expires) { + } + + // Supports the default copy constructor. + + const std::string& Name() const { return name_; } + const std::string& Value() const { return value_; } + const std::string& Path() const { return path_; } + const Time& CreationDate() const { return creation_date_; } + bool DoesExpire() const { return has_expires_; } + bool IsPersistent() const { return DoesExpire(); } + const Time& ExpiryDate() const { return expiry_date_; } + bool IsSecure() const { return secure_; } + bool IsHttpOnly() const { return httponly_; } + + bool IsExpired(const Time& current) { + return has_expires_ && current >= expiry_date_; + } + + // Are the cookies considered equivalent in the eyes of the RFC. + // This says that the domain and path should string match identically. + bool IsEquivalent(const CanonicalCookie& ecc) const { + // It seems like it would make sense to take secure and httponly into + // account, but the RFC doesn't specify this. + return name_ == ecc.Name() && path_ == ecc.Path(); + } + + bool IsOnPath(const std::string& url_path) const; + + std::string DebugString() const; + private: + std::string name_; + std::string value_; + std::string path_; + Time creation_date_; + bool has_expires_; + Time expiry_date_; + bool secure_; + bool httponly_; +}; + +class CookieMonster::PersistentCookieStore { + public: + virtual ~PersistentCookieStore() { } + + // Initializes the store and retrieves the existing cookies. This will be + // called only once at startup. + virtual bool Load(std::vector<CookieMonster::KeyedCanonicalCookie>*) = 0; + + virtual void AddCookie(const std::string&, const CanonicalCookie&) = 0; + virtual void DeleteCookie(const CanonicalCookie&) = 0; + + protected: + PersistentCookieStore() { } + + private: + DISALLOW_EVIL_CONSTRUCTORS(CookieMonster::PersistentCookieStore); +}; + +#endif // NET_BASE_COOKIE_MONSTER_H__ diff --git a/net/base/cookie_monster_perftest.cc b/net/base/cookie_monster_perftest.cc new file mode 100644 index 0000000..d5d93a7 --- /dev/null +++ b/net/base/cookie_monster_perftest.cc @@ -0,0 +1,120 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/perftimer.h" +#include "base/string_util.h" +#include "net/base/cookie_monster.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "googleurl/src/gurl.h" + +namespace { + class ParsedCookieTest : public testing::Test { }; + class CookieMonsterTest : public testing::Test { }; +} + +static const int kNumCookies = 20000; +static const char kCookieLine[] = "A = \"b=;\\\"\" ;secure;;; httponly"; + +TEST(ParsedCookieTest, TestParseCookies) { + std::string cookie(kCookieLine); + PerfTimeLogger timer("Parsed_cookie_parse_cookies"); + for (int i = 0; i < kNumCookies; ++i) { + CookieMonster::ParsedCookie pc(cookie); + EXPECT_TRUE(pc.IsValid()); + } + timer.Done(); +} + +TEST(ParsedCookieTest, TestParseBigCookies) { + std::string cookie(3800, 'z'); + cookie += kCookieLine; + PerfTimeLogger timer("Parsed_cookie_parse_big_cookies"); + for (int i = 0; i < kNumCookies; ++i) { + CookieMonster::ParsedCookie pc(cookie); + EXPECT_TRUE(pc.IsValid()); + } + timer.Done(); +} + +static const GURL kUrlGoogle("http://www.google.izzle"); + +TEST(CookieMonsterTest, TestAddCookiesOnSingleHost) { + CookieMonster cm; + std::vector<std::string> cookies; + for (int i = 0; i < kNumCookies; i++) { + cookies.push_back(StringPrintf("a%03d=b", i)); + } + + // Add a bunch of cookies on a single host + PerfTimeLogger timer("Cookie_monster_add_single_host"); + for (std::vector<std::string>::const_iterator it = cookies.begin(); + it != cookies.end(); ++it) { + EXPECT_TRUE(cm.SetCookie(kUrlGoogle, *it)); + } + timer.Done(); + + PerfTimeLogger timer2("Cookie_monster_query_single_host"); + for (std::vector<std::string>::const_iterator it = cookies.begin(); + it != cookies.end(); ++it) { + cm.GetCookies(kUrlGoogle); + } + timer2.Done(); + + PerfTimeLogger timer3("Cookie_monster_deleteall_single_host"); + cm.DeleteAll(false); + timer3.Done(); +} + +TEST(CookieMonsterTest, TestAddCookieOnManyHosts) { + CookieMonster cm; + std::string cookie(kCookieLine); + std::vector<GURL> gurls; // just wanna have ffffuunnn + for (int i = 0; i < kNumCookies; ++i) { + gurls.push_back(GURL(StringPrintf("http://a%04d.izzle", i))); + } + + // Add a cookie on a bunch of host + PerfTimeLogger timer("Cookie_monster_add_many_hosts"); + for (std::vector<GURL>::const_iterator it = gurls.begin(); + it != gurls.end(); ++it) { + EXPECT_TRUE(cm.SetCookie(*it, cookie)); + } + timer.Done(); + + PerfTimeLogger timer2("Cookie_monster_query_many_hosts"); + for (std::vector<GURL>::const_iterator it = gurls.begin(); + it != gurls.end(); ++it) { + cm.GetCookies(*it); + } + timer2.Done(); + + PerfTimeLogger timer3("Cookie_monster_deleteall_many_hosts"); + cm.DeleteAll(false); + timer3.Done(); +} diff --git a/net/base/cookie_monster_unittest.cc b/net/base/cookie_monster_unittest.cc new file mode 100644 index 0000000..fbe1019 --- /dev/null +++ b/net/base/cookie_monster_unittest.cc @@ -0,0 +1,849 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <windows.h> +#include <time.h> + +#include <string> + +#include "base/string_util.h" +#include "base/time.h" +#include "base/basictypes.h" +#include "googleurl/src/gurl.h" +#include "net/base/cookie_monster.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + class ParsedCookieTest : public testing::Test { }; + class CookieMonsterTest : public testing::Test { }; +} + + +TEST(ParsedCookieTest, TestBasic) { + CookieMonster::ParsedCookie pc("a=b"); + EXPECT_TRUE(pc.IsValid()); + EXPECT_FALSE(pc.IsSecure()); + EXPECT_EQ(pc.Name(), "a"); + EXPECT_EQ(pc.Value(), "b"); +} + +TEST(ParsedCookieTest, TestQuoted) { + CookieMonster::ParsedCookie pc("a=\"b=;\"; path=\"/\""); + EXPECT_TRUE(pc.IsValid()); + EXPECT_FALSE(pc.IsSecure()); + EXPECT_TRUE(pc.HasPath()); + EXPECT_EQ(pc.Name(), "a"); + EXPECT_EQ(pc.Value(), "\"b=;\""); + // If a path was quoted, the path attribute keeps the quotes. This will + // make the cookie effectively useless, but path parameters aren't supposed + // to be quoted. Bug 1261605. + EXPECT_EQ(pc.Path(), "\"/\""); +} + +TEST(ParsedCookieTest, TestNameless) { + CookieMonster::ParsedCookie pc("BLAHHH; path=/; secure;"); + EXPECT_TRUE(pc.IsValid()); + EXPECT_TRUE(pc.IsSecure()); + EXPECT_TRUE(pc.HasPath()); + EXPECT_EQ(pc.Path(), "/"); + EXPECT_EQ(pc.Name(), ""); + EXPECT_EQ(pc.Value(), "BLAHHH"); +} + +TEST(ParsedCookieTest, TestAttributeCase) { + CookieMonster::ParsedCookie pc("BLAHHH; Path=/; sECuRe; httpONLY"); + EXPECT_TRUE(pc.IsValid()); + EXPECT_TRUE(pc.IsSecure()); + EXPECT_TRUE(pc.IsHttpOnly()); + EXPECT_TRUE(pc.HasPath()); + EXPECT_EQ(pc.Path(), "/"); + EXPECT_EQ(pc.Name(), ""); + EXPECT_EQ(pc.Value(), "BLAHHH"); +} + +TEST(ParsedCookieTest, TestDoubleQuotedNameless) { + CookieMonster::ParsedCookie pc("\"BLA\\\"HHH\"; path=/; secure;"); + EXPECT_TRUE(pc.IsValid()); + EXPECT_TRUE(pc.IsSecure()); + EXPECT_TRUE(pc.HasPath()); + EXPECT_EQ(pc.Path(), "/"); + EXPECT_EQ(pc.Name(), ""); + EXPECT_EQ(pc.Value(), "\"BLA\\\"HHH\""); +} + +TEST(ParsedCookieTest, QuoteOffTheEnd) { + CookieMonster::ParsedCookie pc("a=\"B"); + EXPECT_TRUE(pc.IsValid()); + EXPECT_EQ(pc.Name(), "a"); + EXPECT_EQ(pc.Value(), "\"B"); +} + +TEST(ParsedCookieTest, MissingName) { + CookieMonster::ParsedCookie pc("=ABC"); + EXPECT_TRUE(pc.IsValid()); + EXPECT_EQ(pc.Name(), ""); + EXPECT_EQ(pc.Value(), "ABC"); +} + +TEST(ParsedCookieTest, MissingValue) { + CookieMonster::ParsedCookie pc("ABC=; path = /wee"); + EXPECT_TRUE(pc.IsValid()); + EXPECT_EQ(pc.Name(), "ABC"); + EXPECT_EQ(pc.Value(), ""); + EXPECT_TRUE(pc.HasPath()); + EXPECT_EQ(pc.Path(), "/wee"); +} + +TEST(ParsedCookieTest, Whitespace) { + CookieMonster::ParsedCookie pc(" A = BC ;secure;;; httponly"); + EXPECT_TRUE(pc.IsValid()); + EXPECT_EQ(pc.Name(), "A"); + EXPECT_EQ(pc.Value(), "BC"); + EXPECT_FALSE(pc.HasPath()); + EXPECT_FALSE(pc.HasDomain()); + EXPECT_TRUE(pc.IsSecure()); + EXPECT_TRUE(pc.IsHttpOnly()); +} +TEST(ParsedCookieTest, MultipleEquals) { + CookieMonster::ParsedCookie pc(" A=== BC ;secure;;; httponly"); + EXPECT_TRUE(pc.IsValid()); + EXPECT_EQ(pc.Name(), "A"); + EXPECT_EQ(pc.Value(), "== BC"); + EXPECT_FALSE(pc.HasPath()); + EXPECT_FALSE(pc.HasDomain()); + EXPECT_TRUE(pc.IsSecure()); + EXPECT_TRUE(pc.IsHttpOnly()); +} + +TEST(ParsedCookieTest, TrailingWhitespace) { + CookieMonster::ParsedCookie pc("ANCUUID=zohNumRKgI0oxyhSsV3Z7D; " + "expires=Sun, 18-Apr-2027 21:06:29 GMT; " + "path=/ ; "); + EXPECT_TRUE(pc.IsValid()); + EXPECT_EQ(pc.Name(), "ANCUUID"); + EXPECT_TRUE(pc.HasExpires()); + EXPECT_TRUE(pc.HasPath()); + EXPECT_EQ(pc.Path(), "/"); + // TODO should export like NumAttributes() and make sure that the + // trailing whitespace doesn't end up as an empty attribute or something. +} + +TEST(ParsedCookieTest, TooManyPairs) { + std::string blankpairs; + blankpairs.resize(CookieMonster::ParsedCookie::kMaxPairs - 1, ';'); + + CookieMonster::ParsedCookie pc1(blankpairs + "secure"); + EXPECT_TRUE(pc1.IsValid()); + EXPECT_TRUE(pc1.IsSecure()); + + CookieMonster::ParsedCookie pc2(blankpairs + ";secure"); + EXPECT_TRUE(pc2.IsValid()); + EXPECT_FALSE(pc2.IsSecure()); +} + +// TODO some better test cases for invalid cookies. +TEST(ParsedCookieTest, InvalidWhitespace) { + CookieMonster::ParsedCookie pc(" "); + EXPECT_FALSE(pc.IsValid()); +} + +TEST(ParsedCookieTest, InvalidTooLong) { + std::string maxstr; + maxstr.resize(CookieMonster::ParsedCookie::kMaxCookieSize, 'a'); + + CookieMonster::ParsedCookie pc1(maxstr); + EXPECT_TRUE(pc1.IsValid()); + + CookieMonster::ParsedCookie pc2(maxstr + "A"); + EXPECT_FALSE(pc2.IsValid()); +} + +TEST(ParsedCookieTest, InvalidEmpty) { + CookieMonster::ParsedCookie pc(""); + EXPECT_FALSE(pc.IsValid()); +} + +TEST(ParsedCookieTest, EmbeddedTerminator) { + CookieMonster::ParsedCookie pc1("AAA=BB\0ZYX"); + CookieMonster::ParsedCookie pc2("AAA=BB\rZYX"); + CookieMonster::ParsedCookie pc3("AAA=BB\nZYX"); + EXPECT_TRUE(pc1.IsValid()); + EXPECT_EQ(pc1.Name(), "AAA"); + EXPECT_EQ(pc1.Value(), "BB"); + EXPECT_TRUE(pc2.IsValid()); + EXPECT_EQ(pc2.Name(), "AAA"); + EXPECT_EQ(pc2.Value(), "BB"); + EXPECT_TRUE(pc3.IsValid()); + EXPECT_EQ(pc3.Name(), "AAA"); + EXPECT_EQ(pc3.Value(), "BB"); +} + +static const char kUrlGoogle[] = "http://www.google.izzle"; +static const char kUrlGoogleSecure[] = "https://www.google.izzle"; +static const char kUrlFtp[] = "ftp://ftp.google.izzle/"; +static const char kValidCookieLine[] = "A=B; path=/"; +static const char kValidDomainCookieLine[] = "A=B; path=/; domain=google.izzle"; + +TEST(CookieMonsterTest, DomainTest) { + GURL url_google(kUrlGoogle); + + CookieMonster cm; + EXPECT_TRUE(cm.SetCookie(url_google, "A=B")); + EXPECT_EQ(cm.GetCookies(url_google), "A=B"); + EXPECT_TRUE(cm.SetCookie(url_google, "C=D; domain=.google.izzle")); + EXPECT_EQ(cm.GetCookies(url_google), "A=B; C=D"); + + // Verify that A=B was set as a host cookie rather than a domain + // cookie -- should not be accessible from a sub sub-domain. + EXPECT_EQ(cm.GetCookies(GURL("http://foo.www.google.izzle")), "C=D"); + + // Test and make sure we find domain cookies on the same domain. + EXPECT_TRUE(cm.SetCookie(url_google, "E=F; domain=.www.google.izzle")); + EXPECT_EQ(cm.GetCookies(url_google), "A=B; C=D; E=F"); + + // Test setting a domain= that doesn't start w/ a dot, should + // treat it as a domain cookie, as if there was a pre-pended dot. + EXPECT_TRUE(cm.SetCookie(url_google, "G=H; domain=www.google.izzle")); + EXPECT_EQ(cm.GetCookies(url_google), "A=B; C=D; E=F; G=H"); + + // Test domain enforcement, should fail on a sub-domain or something too deep. + EXPECT_FALSE(cm.SetCookie(url_google, "I=J; domain=.izzle")); + EXPECT_EQ(cm.GetCookies(GURL("http://a.izzle")), ""); + EXPECT_FALSE(cm.SetCookie(url_google, "K=L; domain=.bla.www.google.izzle")); + EXPECT_EQ(cm.GetCookies(GURL("http://bla.www.google.izzle")), + "C=D; E=F; G=H"); + EXPECT_EQ(cm.GetCookies(url_google), "A=B; C=D; E=F; G=H"); +} + +// FireFox recognizes domains containing trailing periods as valid. +// IE and Safari do not. Assert the expected policy here. +TEST(CookieMonsterTest, DomainWithTrailingDotTest) { + CookieMonster cm; + GURL url_google("http://www.google.com"); + + EXPECT_FALSE(cm.SetCookie(url_google, "a=1; domain=.www.google.com.")); + EXPECT_FALSE(cm.SetCookie(url_google, "b=2; domain=.www.google.com..")); + EXPECT_EQ(cm.GetCookies(url_google), ""); +} + +// Test that cookies can bet set on higher level domains. +// http://b/issue?id=896491 +TEST(CookieMonsterTest, ValidSubdomainTest) { + CookieMonster cm; + GURL url_abcd("http://a.b.c.d.com"); + GURL url_bcd("http://b.c.d.com"); + GURL url_cd("http://c.d.com"); + GURL url_d("http://d.com"); + + EXPECT_TRUE(cm.SetCookie(url_abcd, "a=1; domain=.a.b.c.d.com")); + EXPECT_TRUE(cm.SetCookie(url_abcd, "b=2; domain=.b.c.d.com")); + EXPECT_TRUE(cm.SetCookie(url_abcd, "c=3; domain=.c.d.com")); + EXPECT_TRUE(cm.SetCookie(url_abcd, "d=4; domain=.d.com")); + + EXPECT_EQ(cm.GetCookies(url_abcd), "a=1; b=2; c=3; d=4"); + EXPECT_EQ(cm.GetCookies(url_bcd), "b=2; c=3; d=4"); + EXPECT_EQ(cm.GetCookies(url_cd), "c=3; d=4"); + EXPECT_EQ(cm.GetCookies(url_d), "d=4"); + + // Check that the same cookie can exist on different sub-domains. + EXPECT_TRUE(cm.SetCookie(url_bcd, "X=bcd; domain=.b.c.d.com")); + EXPECT_TRUE(cm.SetCookie(url_bcd, "X=cd; domain=.c.d.com")); + EXPECT_EQ(cm.GetCookies(url_bcd), "b=2; c=3; d=4; X=bcd; X=cd"); + EXPECT_EQ(cm.GetCookies(url_cd), "c=3; d=4; X=cd"); +} + +// Test that setting a cookie which specifies an invalid domain has +// no side-effect. An invalid domain in this context is one which does +// not match the originating domain. +// http://b/issue?id=896472 +TEST(CookieMonsterTest, InvalidDomainTest) { + { + CookieMonster cm; + GURL url_foobar("http://foo.bar.com"); + + // More specific sub-domain than allowed. + EXPECT_FALSE(cm.SetCookie(url_foobar, "a=1; domain=.yo.foo.bar.com")); + + EXPECT_FALSE(cm.SetCookie(url_foobar, "b=2; domain=.foo.com")); + EXPECT_FALSE(cm.SetCookie(url_foobar, "c=3; domain=.bar.foo.com")); + + // Different TLD, but the rest is a substring. + EXPECT_FALSE(cm.SetCookie(url_foobar, "d=4; domain=.foo.bar.com.net")); + + // A substring that isn't really a parent domain. + EXPECT_FALSE(cm.SetCookie(url_foobar, "e=5; domain=ar.com")); + + // Completely invalid domains: + EXPECT_FALSE(cm.SetCookie(url_foobar, "f=6; domain=.")); + EXPECT_FALSE(cm.SetCookie(url_foobar, "g=7; domain=/")); + EXPECT_FALSE(cm.SetCookie(url_foobar, "h=8; domain=http://foo.bar.com")); + EXPECT_FALSE(cm.SetCookie(url_foobar, "i=9; domain=..foo.bar.com")); + EXPECT_FALSE(cm.SetCookie(url_foobar, "j=10; domain=..bar.com")); + + // Make sure there isn't something quirky in the domain canonicalization + // that supports full URL semantics. + EXPECT_FALSE(cm.SetCookie(url_foobar, "k=11; domain=.foo.bar.com?blah")); + EXPECT_FALSE(cm.SetCookie(url_foobar, "l=12; domain=.foo.bar.com/blah")); + EXPECT_FALSE(cm.SetCookie(url_foobar, "m=13; domain=.foo.bar.com:80")); + EXPECT_FALSE(cm.SetCookie(url_foobar, "n=14; domain=.foo.bar.com:")); + EXPECT_FALSE(cm.SetCookie(url_foobar, "o=15; domain=.foo.bar.com#sup")); + + EXPECT_EQ(cm.GetCookies(url_foobar), ""); + } + + { + // Make sure the cookie code hasn't gotten its subdomain string handling + // reversed, missed a suffix check, etc. It's important here that the two + // hosts below have the same domain + registry. + CookieMonster cm; + GURL url_foocom("http://foo.com.com"); + EXPECT_FALSE(cm.SetCookie(url_foocom, "a=1; domain=.foo.com.com.com")); + EXPECT_EQ(cm.GetCookies(url_foocom), ""); + } +} + +// Test the behavior of omitting dot prefix from domain, should +// function the same as FireFox. +// http://b/issue?id=889898 +TEST(CookieMonsterTest, DomainWithoutLeadingDotTest) { + { // The omission of dot results in setting a domain cookie. + CookieMonster cm; + GURL url_hosted("http://manage.hosted.filefront.com"); + GURL url_filefront("http://www.filefront.com"); + EXPECT_TRUE(cm.SetCookie(url_hosted, "sawAd=1; domain=filefront.com")); + EXPECT_EQ(cm.GetCookies(url_hosted), "sawAd=1"); + EXPECT_EQ(cm.GetCookies(url_filefront), "sawAd=1"); + } + + { // Even when the domains match exactly, don't consider it host cookie. + CookieMonster cm; + GURL url("http://www.google.com"); + EXPECT_TRUE(cm.SetCookie(url, "a=1; domain=www.google.com")); + EXPECT_EQ(cm.GetCookies(url), "a=1"); + EXPECT_EQ(cm.GetCookies(GURL("http://sub.www.google.com")), "a=1"); + EXPECT_EQ(cm.GetCookies(GURL("http://something-else.com")), ""); + } +} + +// Test that the domain specified in cookie string is treated case-insensitive +// http://b/issue?id=896475. +TEST(CookieMonsterTest, CaseInsensitiveDomainTest) { + CookieMonster cm; + GURL url_google("http://www.google.com"); + EXPECT_TRUE(cm.SetCookie(url_google, "a=1; domain=.GOOGLE.COM")); + EXPECT_TRUE(cm.SetCookie(url_google, "b=2; domain=.wWw.gOOgLE.coM")); + EXPECT_EQ(cm.GetCookies(url_google), "a=1; b=2"); +} + +TEST(CookieMonsterTest, TestIpAddress) { + GURL url_ip("http://1.2.3.4/weee"); + { + CookieMonster cm; + EXPECT_TRUE(cm.SetCookie(url_ip, kValidCookieLine)); + EXPECT_EQ(cm.GetCookies(url_ip), "A=B"); + } + + { // IP addresses should not be able to set domain cookies. + CookieMonster cm; + EXPECT_FALSE(cm.SetCookie(url_ip, "b=2; domain=.1.2.3.4")); + EXPECT_FALSE(cm.SetCookie(url_ip, "c=3; domain=.3.4")); + EXPECT_EQ(cm.GetCookies(url_ip), ""); + } +} + +// Test host cookies, and setting of cookies on TLD. +TEST(CookieMonsterTest, TestNonDottedAndTLD) { + { + CookieMonster cm; + GURL url("http://com/"); + // Allow setting on "com", (but only as a host cookie). + EXPECT_TRUE(cm.SetCookie(url, "a=1")); + EXPECT_FALSE(cm.SetCookie(url, "b=2; domain=.com")); + EXPECT_FALSE(cm.SetCookie(url, "c=3; domain=com")); + EXPECT_EQ(cm.GetCookies(url), "a=1"); + // Make sure it doesn't show up for a normal .com, it should be a host + // not a domain cookie. + EXPECT_EQ(cm.GetCookies(GURL("http://hopefully-no-cookies.com/")), ""); + EXPECT_EQ(cm.GetCookies(GURL("http://.com/")), ""); + } + + { // http://com. should be treated the same as http://com. + CookieMonster cm; + GURL url("http://com./index.html"); + EXPECT_TRUE(cm.SetCookie(url, "a=1")); + EXPECT_EQ(cm.GetCookies(url), "a=1"); + EXPECT_EQ(cm.GetCookies(GURL("http://hopefully-no-cookies.com./")), ""); + } + + { // Should not be able to set host cookie from a subdomain. + CookieMonster cm; + GURL url("http://a.b"); + EXPECT_FALSE(cm.SetCookie(url, "a=1; domain=.b")); + EXPECT_FALSE(cm.SetCookie(url, "b=2; domain=b")); + EXPECT_EQ(cm.GetCookies(url), ""); + } + + { // Same test as above, but explicitly on a known TLD (com). + CookieMonster cm; + GURL url("http://google.com"); + EXPECT_FALSE(cm.SetCookie(url, "a=1; domain=.com")); + EXPECT_FALSE(cm.SetCookie(url, "b=2; domain=com")); + EXPECT_EQ(cm.GetCookies(url), ""); + } + + { // Make sure can't set cookie on TLD which is dotted. + CookieMonster cm; + GURL url("http://google.co.uk"); + EXPECT_FALSE(cm.SetCookie(url, "a=1; domain=.co.uk")); + EXPECT_FALSE(cm.SetCookie(url, "b=2; domain=.uk")); + EXPECT_EQ(cm.GetCookies(url), ""); + EXPECT_EQ(cm.GetCookies(GURL("http://something-else.co.uk")), ""); + EXPECT_EQ(cm.GetCookies(GURL("http://something-else.uk")), ""); + } + + { // Intranet URLs should only be able to set host cookies. + CookieMonster cm; + GURL url("http://b"); + EXPECT_TRUE(cm.SetCookie(url, "a=1")); + EXPECT_FALSE(cm.SetCookie(url, "b=2; domain=.b")); + EXPECT_FALSE(cm.SetCookie(url, "c=3; domain=b")); + EXPECT_EQ(cm.GetCookies(url), "a=1"); + } +} + +// Test reading/writing cookies when the domain ends with a period, +// as in "www.google.com." +TEST(CookieMonsterTest, TestHostEndsWithDot) { + CookieMonster cm; + GURL url("http://www.google.com"); + GURL url_with_dot("http://www.google.com."); + EXPECT_TRUE(cm.SetCookie(url, "a=1")); + EXPECT_EQ(cm.GetCookies(url), "a=1"); + + // Do not share cookie space with the dot version of domain. + // Note: this is not what FireFox does, but it _is_ what IE+Safari do. + EXPECT_FALSE(cm.SetCookie(url, "b=2; domain=.www.google.com.")); + EXPECT_EQ(cm.GetCookies(url), "a=1"); + + EXPECT_TRUE(cm.SetCookie(url_with_dot, "b=2; domain=.google.com.")); + EXPECT_EQ(cm.GetCookies(url_with_dot), "b=2"); + + // Make sure there weren't any side effects. + EXPECT_EQ(cm.GetCookies(GURL("http://hopefully-no-cookies.com/")), ""); + EXPECT_EQ(cm.GetCookies(GURL("http://.com/")), ""); +} + +TEST(CookieMonsterTest, InvalidScheme) { + CookieMonster cm; + EXPECT_FALSE(cm.SetCookie(GURL(kUrlFtp), kValidCookieLine)); +} + +TEST(CookieMonsterTest, InvalidScheme_Read) { + CookieMonster cm; + EXPECT_TRUE(cm.SetCookie(GURL(kUrlGoogle), kValidDomainCookieLine)); + EXPECT_EQ(cm.GetCookies(GURL(kUrlFtp)), ""); +} + +TEST(CookieMonsterTest, PathTest) { + std::string url("http://www.google.izzle"); + CookieMonster cm; + EXPECT_TRUE(cm.SetCookie(GURL(url), "A=B; path=/wee")); + EXPECT_EQ(cm.GetCookies(GURL(url + "/wee")), "A=B"); + EXPECT_EQ(cm.GetCookies(GURL(url + "/wee/")), "A=B"); + EXPECT_EQ(cm.GetCookies(GURL(url + "/wee/war")), "A=B"); + EXPECT_EQ(cm.GetCookies(GURL(url + "/wee/war/more/more")), "A=B"); + EXPECT_EQ(cm.GetCookies(GURL(url + "/weehee")), ""); + EXPECT_EQ(cm.GetCookies(GURL(url + "/")), ""); + + // If we add a 0 length path, it should default to / + EXPECT_TRUE(cm.SetCookie(GURL(url), "A=C; path=")); + EXPECT_EQ(cm.GetCookies(GURL(url + "/wee")), "A=B; A=C"); + EXPECT_EQ(cm.GetCookies(GURL(url + "/")), "A=C"); +} + +TEST(CookieMonsterTest, HttpOnlyTest) { + GURL url_google(kUrlGoogle); + CookieMonster cm; + EXPECT_TRUE(cm.SetCookie(url_google, "A=B; httponly")); + EXPECT_EQ(cm.GetCookies(url_google), ""); + EXPECT_EQ(cm.GetCookiesWithOptions(url_google, + CookieMonster::INCLUDE_HTTPONLY), "A=B"); +} + +// From: http://support.microsoft.com/kb/167296. +static void UnixTimeToFileTime(time_t t, LPFILETIME pft) { + uint64 ll; + + ll = Int32x32To64(t, 10000000) + 116444736000000000; + pft->dwLowDateTime = (DWORD)ll; + pft->dwHighDateTime = (DWORD)(ll >> 32); +} + +static uint64 UnixTimeToUTC(time_t t) { + FILETIME ftime; + LARGE_INTEGER li; + UnixTimeToFileTime(t, &ftime); + li.LowPart = ftime.dwLowDateTime; + li.HighPart = ftime.dwHighDateTime; + return li.QuadPart; +} + +TEST(CookieMonsterTest, TestCookieDateParsing) { + const struct { + const char* str; + const bool valid; + const time_t epoch; + } tests[] = { + { "Sat, 15-Apr-17 21:01:22 GMT", true, 1492290082 }, + { "Thu, 19-Apr-2007 16:00:00 GMT", true, 1176998400 }, + { "Wed, 25 Apr 2007 21:02:13 GMT", true, 1177534933 }, + { "Thu, 19/Apr\\2007 16:00:00 GMT", true, 1176998400 }, + { "Fri, 1 Jan 2010 01:01:50 GMT", true, 1262307710 }, + { "Wednesday, 1-Jan-2003 00:00:00 GMT", true, 1041379200 }, + { ", 1-Jan-2003 00:00:00 GMT", true, 1041379200 }, + { " 1-Jan-2003 00:00:00 GMT", true, 1041379200 }, + { "1-Jan-2003 00:00:00 GMT", true, 1041379200 }, + { "Wed,18-Apr-07 22:50:12 GMT", true, 1176936612 }, + { "WillyWonka , 18-Apr-07 22:50:12 GMT", true, 1176936612 }, + { "WillyWonka , 18-Apr-07 22:50:12", true, 1176936612 }, + { "WillyWonka , 18-apr-07 22:50:12", true, 1176936612 }, + { "Mon, 18-Apr-1977 22:50:13 GMT", true, 230251813 }, + { "Mon, 18-Apr-77 22:50:13 GMT", true, 230251813 }, + // If the cookie came in with the expiration quoted (which in terms of + // the RFC you shouldn't do), we will get string quoted. Bug 1261605. + { "\"Sat, 15-Apr-17\\\"21:01:22\\\"GMT\"", true, 1492290082 }, + // Test with full month names and partial names. + { "Partyday, 18- April-07 22:50:12", true, 1176936612 }, + { "Partyday, 18 - Apri-07 22:50:12", true, 1176936612 }, + { "Wednes, 1-Januar-2003 00:00:00 GMT", true, 1041379200 }, + // Test that we always take GMT even with other time zones or bogus + // values. The RFC says everything should be GMT, and in the worst case + // we are 24 hours off because of zone issues. + { "Sat, 15-Apr-17 21:01:22", true, 1492290082 }, + { "Sat, 15-Apr-17 21:01:22 GMT-2", true, 1492290082 }, + { "Sat, 15-Apr-17 21:01:22 GMT BLAH", true, 1492290082 }, + { "Sat, 15-Apr-17 21:01:22 GMT-0400", true, 1492290082 }, + { "Sat, 15-Apr-17 21:01:22 GMT-0400 (EDT)",true, 1492290082 }, + { "Sat, 15-Apr-17 21:01:22 DST", true, 1492290082 }, + { "Sat, 15-Apr-17 21:01:22 -0400", true, 1492290082 }, + { "Sat, 15-Apr-17 21:01:22 (hello there)", true, 1492290082 }, + // Test that if we encounter multiple : fields, that we take the first + // that correctly parses. + { "Sat, 15-Apr-17 21:01:22 11:22:33", true, 1492290082 }, + { "Sat, 15-Apr-17 ::00 21:01:22", true, 1492290082 }, + { "Sat, 15-Apr-17 boink:z 21:01:22", true, 1492290082 }, + // We take the first, which in this case is invalid. + { "Sat, 15-Apr-17 91:22:33 21:01:22", false, 0 }, + // amazon.com formats their cookie expiration like this. + { "Thu Apr 18 22:50:12 2007 GMT", true, 1176936612 }, + // Test that hh:mm:ss can occur anywhere. + { "22:50:12 Thu Apr 18 2007 GMT", true, 1176936612 }, + { "Thu 22:50:12 Apr 18 2007 GMT", true, 1176936612 }, + { "Thu Apr 22:50:12 18 2007 GMT", true, 1176936612 }, + { "Thu Apr 18 22:50:12 2007 GMT", true, 1176936612 }, + { "Thu Apr 18 2007 22:50:12 GMT", true, 1176936612 }, + { "Thu Apr 18 2007 GMT 22:50:12", true, 1176936612 }, + // Test that the day and year can be anywhere if they are unambigious. + { "Sat, 15-Apr-17 21:01:22 GMT", true, 1492290082 }, + { "15-Sat, Apr-17 21:01:22 GMT", true, 1492290082 }, + { "15-Sat, Apr 21:01:22 GMT 17", true, 1492290082 }, + { "15-Sat, Apr 21:01:22 GMT 2017", true, 1492290082 }, + { "15 Apr 21:01:22 2017", true, 1492290082 }, + { "15 17 Apr 21:01:22", true, 1492290082 }, + { "Apr 15 17 21:01:22", true, 1492290082 }, + { "Apr 15 21:01:22 17", true, 1492290082 }, + { "2017 April 15 21:01:22", true, 1492290082 }, + { "15 April 2017 21:01:22", true, 1492290082 }, + // Some invalid dates + { "98 April 17 21:01:22", false, 0 }, + { "Thu, 012-Aug-2008 20:49:07 GMT", false, 0 }, + { "Thu, 12-Aug-31841 20:49:07 GMT", false, 0 }, + { "Thu, 12-Aug-9999999999 20:49:07 GMT", false, 0 }, + { "Thu, 999999999999-Aug-2007 20:49:07 GMT", false, 0 }, + { "Thu, 12-Aug-2007 20:61:99999999999 GMT", false, 0 }, + { "IAintNoDateFool", false, 0 }, + }; + + Time parsed_time; + for (int i = 0; i < arraysize(tests); ++i) { + parsed_time = CookieMonster::ParseCookieTime(tests[i].str); + if (!tests[i].valid) { + EXPECT_FALSE(!parsed_time.is_null()) << tests[i].str; + continue; + } + EXPECT_TRUE(!parsed_time.is_null()) << tests[i].str; + EXPECT_EQ(parsed_time.ToTimeT(), tests[i].epoch) << tests[i].str; + } +} + +TEST(CookieMonsterTest, TestCookieDeletion) { + GURL url_google(kUrlGoogle); + CookieMonster cm; + + // Create a session cookie. + EXPECT_TRUE(cm.SetCookie(url_google, kValidCookieLine)); + EXPECT_EQ(cm.GetCookies(url_google), "A=B"); + // Delete it via Max-Age. + EXPECT_TRUE(cm.SetCookie(url_google, + std::string(kValidCookieLine) + "; max-age=0")); + EXPECT_EQ(cm.GetCookies(url_google), ""); + + // Create a session cookie. + EXPECT_TRUE(cm.SetCookie(url_google, kValidCookieLine)); + EXPECT_EQ(cm.GetCookies(url_google), "A=B"); + // Delete it via Expires. + EXPECT_TRUE(cm.SetCookie(url_google, + std::string(kValidCookieLine) + + "; expires=Mon, 18-Apr-1977 22:50:13 GMT")); + EXPECT_EQ(cm.GetCookies(url_google), ""); + + // Create a persistent cookie. + EXPECT_TRUE(cm.SetCookie(url_google, + std::string(kValidCookieLine) + + "; expires=Mon, 18-Apr-22 22:50:13 GMT")); + EXPECT_EQ(cm.GetCookies(url_google), "A=B"); + // Delete it via Max-Age. + EXPECT_TRUE(cm.SetCookie(url_google, + std::string(kValidCookieLine) + "; max-age=0")); + EXPECT_EQ(cm.GetCookies(url_google), ""); + + // Create a persistent cookie. + EXPECT_TRUE(cm.SetCookie(url_google, + std::string(kValidCookieLine) + + "; expires=Mon, 18-Apr-22 22:50:13 GMT")); + EXPECT_EQ(cm.GetCookies(url_google), "A=B"); + // Delete it via Expires. + EXPECT_TRUE(cm.SetCookie(url_google, + std::string(kValidCookieLine) + + "; expires=Mon, 18-Apr-1977 22:50:13 GMT")); + EXPECT_EQ(cm.GetCookies(url_google), ""); +} + +TEST(CookieMonsterTest, TestCookieDeleteAll) { + GURL url_google(kUrlGoogle); + CookieMonster cm; + + EXPECT_TRUE(cm.SetCookie(url_google, kValidCookieLine)); + EXPECT_EQ(cm.GetCookies(url_google), "A=B"); + + EXPECT_TRUE(cm.SetCookie(url_google, "C=D")); + EXPECT_EQ(cm.GetCookies(url_google), "A=B; C=D"); + + EXPECT_EQ(cm.DeleteAll(false), 2); + EXPECT_EQ(cm.GetCookies(url_google), ""); +} + +TEST(CookieMonsterTest, TestCookieDeleteAllCreatedAfterTimestamp) { + GURL url_google(kUrlGoogle); + CookieMonster cm; + Time now = Time::Now(); + + // Nothing has been added so nothing should be deleted. + EXPECT_EQ(0, cm.DeleteAllCreatedAfter(now - TimeDelta::FromDays(99), false)); + + // Create 3 cookies with creation date of today, yesterday and the day before. + EXPECT_TRUE(cm.SetCookieWithCreationTime(url_google, "T-0=Now", now)); + EXPECT_TRUE(cm.SetCookieWithCreationTime(url_google, "T-1=Yesterday", + now - TimeDelta::FromDays(1))); + EXPECT_TRUE(cm.SetCookieWithCreationTime(url_google, "T-2=DayBefore", + now - TimeDelta::FromDays(2))); + + // Try to delete everything from now onwards. + EXPECT_EQ(1, cm.DeleteAllCreatedAfter(now, false)); + // Now delete the one cookie created in the last day. + EXPECT_EQ(1, cm.DeleteAllCreatedAfter(now - TimeDelta::FromDays(1), false)); + // Now effectively delete all cookies just created (1 is remaining). + EXPECT_EQ(1, cm.DeleteAllCreatedAfter(now - TimeDelta::FromDays(99), false)); + + // Make sure everything is gone. + EXPECT_EQ(0, cm.DeleteAllCreatedAfter(Time(), false)); + // Really make sure everything is gone. + EXPECT_EQ(0, cm.DeleteAll(false)); +} + +TEST(CookieMonsterTest, TestCookieDeleteAllCreatedBetweenTimestamps) { + GURL url_google(kUrlGoogle); + CookieMonster cm; + Time now = Time::Now(); + + // Nothing has been added so nothing should be deleted. + EXPECT_EQ(0, cm.DeleteAllCreatedAfter(now - TimeDelta::FromDays(99), false)); + + // Create 3 cookies with creation date of today, yesterday and the day before. + EXPECT_TRUE(cm.SetCookieWithCreationTime(url_google, "T-0=Now", now)); + EXPECT_TRUE(cm.SetCookieWithCreationTime(url_google, "T-1=Yesterday", + now - TimeDelta::FromDays(1))); + EXPECT_TRUE(cm.SetCookieWithCreationTime(url_google, "T-2=DayBefore", + now - TimeDelta::FromDays(2))); + EXPECT_TRUE(cm.SetCookieWithCreationTime(url_google, "T-3=ThreeDays", + now - TimeDelta::FromDays(3))); + EXPECT_TRUE(cm.SetCookieWithCreationTime(url_google, "T-7=LastWeek", + now - TimeDelta::FromDays(7))); + + // Try to delete threedays and the daybefore. + EXPECT_EQ(2, cm.DeleteAllCreatedBetween(now - TimeDelta::FromDays(3), + now - TimeDelta::FromDays(1), + false)); + + // Try to delete yesterday, also make sure that delete_end is not + // inclusive. + EXPECT_EQ(1, cm.DeleteAllCreatedBetween(now - TimeDelta::FromDays(2), + now, + false)); + + // Make sure the delete_begin is inclusive. + EXPECT_EQ(1, cm.DeleteAllCreatedBetween(now - TimeDelta::FromDays(7), + now, + false)); + + // Delete the last (now) item. + EXPECT_EQ(1, cm.DeleteAllCreatedAfter(Time(), false)); + + // Really make sure everything is gone. + EXPECT_EQ(0, cm.DeleteAll(false)); +} + +TEST(CookieMonsterTest, TestSecure) { + GURL url_google(kUrlGoogle); + GURL url_google_secure(kUrlGoogleSecure); + CookieMonster cm; + + EXPECT_TRUE(cm.SetCookie(url_google, "A=B")); + EXPECT_EQ(cm.GetCookies(url_google), "A=B"); + EXPECT_EQ(cm.GetCookies(url_google_secure), "A=B"); + + EXPECT_TRUE(cm.SetCookie(url_google_secure, "A=B; secure")); + // The secure should overwrite the non-secure. + EXPECT_EQ(cm.GetCookies(url_google), ""); + EXPECT_EQ(cm.GetCookies(url_google_secure), "A=B"); + + EXPECT_TRUE(cm.SetCookie(url_google_secure, "D=E; secure")); + EXPECT_EQ(cm.GetCookies(url_google), ""); + EXPECT_EQ(cm.GetCookies(url_google_secure), "A=B; D=E"); + + EXPECT_TRUE(cm.SetCookie(url_google_secure, "A=B")); + // The non-secure should overwrite the secure. + EXPECT_EQ(cm.GetCookies(url_google), "A=B"); + EXPECT_EQ(cm.GetCookies(url_google_secure), "D=E; A=B"); +} + +static int CountInString(const std::string& str, char c) { + int count = 0; + for (std::string::const_iterator it = str.begin(); + it != str.end(); ++it) { + if (*it == c) + ++count; + } + return count; +} + +TEST(CookieMonsterTest, TestHostGarbageCollection) { + GURL url_google(kUrlGoogle); + CookieMonster cm; + // Add a bunch of cookies on a single host, should purge them. + for (int i = 0; i < 101; i++) { + std::string cookie = StringPrintf("a%03d=b", i); + EXPECT_TRUE(cm.SetCookie(url_google, cookie)); + std::string cookies = cm.GetCookies(url_google); + // Make sure we find it in the cookies. + EXPECT_TRUE(cookies.find(cookie) != std::string::npos); + // Count the number of cookies. + EXPECT_LE(CountInString(cookies, '='), 70); + } +} + +TEST(CookieMonsterTest, TestTotalGarbageCollection) { + CookieMonster cm; + // Add a bunch of cookies on a bunch of host, some should get purged. + for (int i = 0; i < 2000; ++i) { + GURL url(StringPrintf("http://a%04d.izzle", i)); + EXPECT_TRUE(cm.SetCookie(url, "a=b")); + EXPECT_EQ(cm.GetCookies(url), "a=b"); + } + + // Check that cookies that still exist. + for (int i = 0; i < 2000; ++i) { + GURL url(StringPrintf("http://a%04d.izzle", i)); + if (i < 900) { + // Cookies should have gotten purged. + EXPECT_TRUE(cm.GetCookies(url).empty()); + } else if (i > 1100) { + // Cookies should still be around. + EXPECT_FALSE(cm.GetCookies(url).empty()); + } + } +} + +// Formerly NetUtilTest.CookieTest back when we used wininet's cookie handling. +TEST(CookieMonsterTest, NetUtilCookieTest) { + const GURL test_url(L"http://mojo.jojo.google.izzle/"); + + CookieMonster cm; + + EXPECT_TRUE(cm.SetCookie(test_url, "foo=bar")); + std::string value = cm.GetCookies(test_url); + EXPECT_EQ("foo=bar", value); + + // test that we can retrieve all cookies: + EXPECT_TRUE(cm.SetCookie(test_url, "x=1")); + EXPECT_TRUE(cm.SetCookie(test_url, "y=2")); + + std::string result = cm.GetCookies(test_url); + EXPECT_FALSE(result.empty()); + EXPECT_TRUE(result.find("x=1") != std::string::npos) << result; + EXPECT_TRUE(result.find("y=2") != std::string::npos) << result; +} + +static bool FindAndDeleteCookie(CookieMonster& cm, const std::string& domain, + const std::string& name) { + CookieMonster::CookieList cookies = cm.GetAllCookies(); + for (CookieMonster::CookieList::iterator it = cookies.begin(); + it != cookies.end(); ++it) + if (it->first == domain && it->second.Name() == name) + return cm.DeleteCookie(domain, it->second, false); + return false; +} + +TEST(CookieMonsterTest, TestDeleteSingleCookie) { + GURL url_google(kUrlGoogle); + + CookieMonster cm; + EXPECT_TRUE(cm.SetCookie(url_google, "A=B")); + EXPECT_TRUE(cm.SetCookie(url_google, "C=D")); + EXPECT_TRUE(cm.SetCookie(url_google, "E=F")); + EXPECT_EQ("A=B; C=D; E=F", cm.GetCookies(url_google)); + + EXPECT_TRUE(FindAndDeleteCookie(cm, url_google.host(), "C")); + EXPECT_EQ("A=B; E=F", cm.GetCookies(url_google)); + + EXPECT_FALSE(FindAndDeleteCookie(cm, "random.host", "E")); + EXPECT_EQ("A=B; E=F", cm.GetCookies(url_google)); +} + +// TODO test overwrite cookie diff --git a/net/base/cookie_policy.cc b/net/base/cookie_policy.cc new file mode 100644 index 0000000..f0b2199e --- /dev/null +++ b/net/base/cookie_policy.cc @@ -0,0 +1,66 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/logging.h" +#include "net/base/cookie_policy.h" +#include "net/base/registry_controlled_domain.h" + +bool CookiePolicy::CanGetCookies(const GURL& url, const GURL& policy_url) { + switch (type_) { + case CookiePolicy::ALLOW_ALL_COOKIES: + return true; + case CookiePolicy::BLOCK_THIRD_PARTY_COOKIES: + return true; + case CookiePolicy::BLOCK_ALL_COOKIES: + return false; + default: + NOTREACHED(); + return false; + } +} + +bool CookiePolicy::CanSetCookie(const GURL& url, const GURL& policy_url) { + switch (type_) { + case CookiePolicy::ALLOW_ALL_COOKIES: + return true; + case CookiePolicy::BLOCK_THIRD_PARTY_COOKIES: + if (policy_url.is_empty()) + return true; // Empty policy URL should indicate a first-party request + + return RegistryControlledDomainService::SameDomainOrHost(url, policy_url); + case CookiePolicy::BLOCK_ALL_COOKIES: + return false; + default: + NOTREACHED(); + return false; + } +} + +CookiePolicy::CookiePolicy() + : type_(CookiePolicy::ALLOW_ALL_COOKIES) { }
\ No newline at end of file diff --git a/net/base/cookie_policy.h b/net/base/cookie_policy.h new file mode 100644 index 0000000..0523c95 --- /dev/null +++ b/net/base/cookie_policy.h @@ -0,0 +1,72 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_COOKIE_POLICY_H__ +#define NET_BASE_COOKIE_POLICY_H__ + +#include "googleurl/src/gurl.h" + +// The CookiePolicy class implements third-party cookie blocking. +class CookiePolicy { + public: + // Consult the user's third-party cookie blocking preferences to determine + // whether the URL's cookies can be read if the top-level window is policy_url + bool CanGetCookies(const GURL& url, const GURL& policy_url); + + // Consult the user's third-party cookie blocking preferences to determine + // whether the URL's cookies can be set if the top-level window is policy_url + bool CanSetCookie(const GURL& url, const GURL& policy_url); + + enum Type { + ALLOW_ALL_COOKIES = 0, // do not perform any cookie blocking + BLOCK_THIRD_PARTY_COOKIES, // prevent third-party cookies from being sent + BLOCK_ALL_COOKIES // disable cookies + }; + + static bool ValidType(int32 type) { + return type >= ALLOW_ALL_COOKIES && type <= BLOCK_ALL_COOKIES; + } + + static Type FromInt(int32 type) { + return static_cast<Type>(type); + } + + // Sets the current policy to enforce. This should be called when the user's + // preferences change. + void SetType(Type type) { type_ = type; } + + CookiePolicy(); + + private: + Type type_; + + DISALLOW_EVIL_CONSTRUCTORS(CookiePolicy); +}; + +#endif // NET_BASE_COOKIE_POLICY_H__ diff --git a/net/base/cookie_policy_unittest.cc b/net/base/cookie_policy_unittest.cc new file mode 100644 index 0000000..e63ec24 --- /dev/null +++ b/net/base/cookie_policy_unittest.cc @@ -0,0 +1,115 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/cookie_policy.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +class CookiePolicyTest : public testing::Test { + public: + CookiePolicyTest() + : url_google_("http://www.google.izzle"), + url_google_secure_("https://www.google.izzle"), + url_google_mail_("http://mail.google.izzle"), + url_google_analytics_("http://www.googleanalytics.izzle") { } + protected: + GURL url_google_; + GURL url_google_secure_; + GURL url_google_mail_; + GURL url_google_analytics_; +}; + +} // namespace + +TEST_F(CookiePolicyTest, DefaultPolicyTest) { + CookiePolicy cp; + EXPECT_TRUE(cp.CanGetCookies(url_google_, url_google_)); + EXPECT_TRUE(cp.CanGetCookies(url_google_, url_google_secure_)); + EXPECT_TRUE(cp.CanGetCookies(url_google_, url_google_mail_)); + EXPECT_TRUE(cp.CanGetCookies(url_google_, url_google_analytics_)); + EXPECT_TRUE(cp.CanGetCookies(url_google_, GURL())); + + EXPECT_TRUE(cp.CanSetCookie(url_google_, url_google_)); + EXPECT_TRUE(cp.CanSetCookie(url_google_, url_google_secure_)); + EXPECT_TRUE(cp.CanSetCookie(url_google_, url_google_mail_)); + EXPECT_TRUE(cp.CanSetCookie(url_google_, url_google_analytics_)); + EXPECT_TRUE(cp.CanSetCookie(url_google_, GURL())); +} + +TEST_F(CookiePolicyTest, AllowAllCookiesTest) { + CookiePolicy cp; + cp.SetType(CookiePolicy::ALLOW_ALL_COOKIES); + + EXPECT_TRUE(cp.CanGetCookies(url_google_, url_google_)); + EXPECT_TRUE(cp.CanGetCookies(url_google_, url_google_secure_)); + EXPECT_TRUE(cp.CanGetCookies(url_google_, url_google_mail_)); + EXPECT_TRUE(cp.CanGetCookies(url_google_, url_google_analytics_)); + EXPECT_TRUE(cp.CanGetCookies(url_google_, GURL())); + + EXPECT_TRUE(cp.CanSetCookie(url_google_, url_google_)); + EXPECT_TRUE(cp.CanSetCookie(url_google_, url_google_secure_)); + EXPECT_TRUE(cp.CanSetCookie(url_google_, url_google_mail_)); + EXPECT_TRUE(cp.CanSetCookie(url_google_, url_google_analytics_)); + EXPECT_TRUE(cp.CanSetCookie(url_google_, GURL())); +} + +TEST_F(CookiePolicyTest, BlockThirdPartyCookiesTest) { + CookiePolicy cp; + cp.SetType(CookiePolicy::BLOCK_THIRD_PARTY_COOKIES); + + EXPECT_TRUE(cp.CanGetCookies(url_google_, url_google_)); + EXPECT_TRUE(cp.CanGetCookies(url_google_, url_google_secure_)); + EXPECT_TRUE(cp.CanGetCookies(url_google_, url_google_mail_)); + EXPECT_TRUE(cp.CanGetCookies(url_google_, url_google_analytics_)); + EXPECT_TRUE(cp.CanGetCookies(url_google_, GURL())); + + EXPECT_TRUE(cp.CanSetCookie(url_google_, url_google_)); + EXPECT_TRUE(cp.CanSetCookie(url_google_, url_google_secure_)); + EXPECT_TRUE(cp.CanSetCookie(url_google_, url_google_mail_)); + EXPECT_FALSE(cp.CanSetCookie(url_google_, url_google_analytics_)); + EXPECT_TRUE(cp.CanSetCookie(url_google_, GURL())); +} + +TEST_F(CookiePolicyTest, BlockAllCookiesTest) { + CookiePolicy cp; + cp.SetType(CookiePolicy::BLOCK_ALL_COOKIES); + + EXPECT_FALSE(cp.CanGetCookies(url_google_, url_google_)); + EXPECT_FALSE(cp.CanGetCookies(url_google_, url_google_secure_)); + EXPECT_FALSE(cp.CanGetCookies(url_google_, url_google_mail_)); + EXPECT_FALSE(cp.CanGetCookies(url_google_, url_google_analytics_)); + EXPECT_FALSE(cp.CanGetCookies(url_google_, GURL())); + + EXPECT_FALSE(cp.CanSetCookie(url_google_, url_google_)); + EXPECT_FALSE(cp.CanSetCookie(url_google_, url_google_secure_)); + EXPECT_FALSE(cp.CanSetCookie(url_google_, url_google_mail_)); + EXPECT_FALSE(cp.CanSetCookie(url_google_, url_google_analytics_)); + EXPECT_FALSE(cp.CanSetCookie(url_google_, GURL())); +}
\ No newline at end of file diff --git a/net/base/data_url.cc b/net/base/data_url.cc new file mode 100644 index 0000000..cf8e239 --- /dev/null +++ b/net/base/data_url.cc @@ -0,0 +1,121 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// NOTE: based loosely on mozilla's nsDataChannel.cpp + +#include <algorithm> + +#include "net/base/data_url.h" + +#include "base/string_util.h" +#include "googleurl/src/gurl.h" +#include "net/base/base64.h" +#include "net/base/escape.h" + +/*static*/ +bool DataURL::Parse(const GURL& url, std::string* mime_type, + std::string* charset, std::string* data) { + std::string::const_iterator begin = url.spec().begin(); + std::string::const_iterator end = url.spec().end(); + + std::string::const_iterator after_colon = std::find(begin, end, ':'); + if (after_colon == end) + return false; + ++after_colon; + + // first, find the start of the data + std::string::const_iterator comma = std::find(after_colon, end, ','); + if (comma == end) + return false; + + const char kBase64Tag[] = ";base64"; + std::string::const_iterator it = + std::search(after_colon, comma, kBase64Tag, + kBase64Tag + sizeof(kBase64Tag)-1); + + bool base64_encoded = (it != comma); + + if (comma != after_colon) { + // everything else is content type + std::string::const_iterator semi_colon = std::find(after_colon, comma, ';'); + if (semi_colon != after_colon) { + mime_type->assign(after_colon, semi_colon); + StringToLowerASCII(mime_type); + } + if (semi_colon != comma) { + const char kCharsetTag[] = "charset="; + it = std::search(semi_colon + 1, comma, kCharsetTag, + kCharsetTag + sizeof(kCharsetTag)-1); + if (it != comma) + charset->assign(it + sizeof(kCharsetTag)-1, comma); + } + } + + // fallback to defaults if nothing specified in the URL: + if (mime_type->empty()) + mime_type->assign("text/plain"); + if (charset->empty()) + charset->assign("US-ASCII"); + + // Preserve spaces if dealing with text or xml input, same as mozilla: + // https://bugzilla.mozilla.org/show_bug.cgi?id=138052 + // but strip them otherwise: + // https://bugzilla.mozilla.org/show_bug.cgi?id=37200 + // (Spaces in a data URL should be escaped, which is handled below, so any + // spaces now are wrong. People expect to be able to enter them in the URL + // bar for text, and it can't hurt, so we allow it.) + std::string temp_data = std::string(comma + 1, end); + + // For base64, we may have url-escaped whitespace which is not part + // of the data, and should be stripped. Otherwise, the escaped whitespace + // could be part of the payload, so don't strip it. + if (base64_encoded) { + temp_data = UnescapeURLComponent(temp_data, + UnescapeRule::SPACES | UnescapeRule::PERCENTS); + } + + // Strip whitespace. + if (base64_encoded || !(mime_type->compare(0, 5, "text/") == 0 || + mime_type->find("xml") != std::string::npos)) { + temp_data.erase(std::remove_if(temp_data.begin(), temp_data.end(), + IsAsciiWhitespace<wchar_t>), + temp_data.end()); + } + + if (!base64_encoded) { + temp_data = UnescapeURLComponent(temp_data, + UnescapeRule::SPACES | UnescapeRule::PERCENTS); + } + + if (base64_encoded) + return Base64Decode(temp_data, data); + + temp_data.swap(*data); + return true; +} diff --git a/net/base/data_url.h b/net/base/data_url.h new file mode 100644 index 0000000..06c3ab1 --- /dev/null +++ b/net/base/data_url.h @@ -0,0 +1,63 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <string> + +class GURL; + +// See RFC 2397 for a complete description of the 'data' URL scheme. +// +// Briefly, a 'data' URL has the form: +// +// data:[<mediatype>][;base64],<data> +// +// The <mediatype> is an Internet media type specification (with optional +// parameters.) The appearance of ";base64" means that the data is encoded as +// base64. Without ";base64", the data (as a sequence of octets) is represented +// using ASCII encoding for octets inside the range of safe URL characters and +// using the standard %xx hex encoding of URLs for octets outside that range. +// If <mediatype> is omitted, it defaults to text/plain;charset=US-ASCII. As a +// shorthand, "text/plain" can be omitted but the charset parameter supplied. +// +class DataURL { + public: + // This method can be used to parse a 'data' URL into its component pieces. + // + // The resulting mime_type is normalized to lowercase. The data is the + // decoded data (e.g.., if the data URL specifies base64 encoding, then the + // returned data is base64 decoded, and any %-escaped bytes are unescaped). + // + // If the URL is malformed, then this method will return false, and its + // output variables will remain unchanged. On success, true is returned. + // + static bool Parse(const GURL& url, + std::string* mime_type, + std::string* charset, + std::string* data); +}; diff --git a/net/base/data_url_unittest.cc b/net/base/data_url_unittest.cc new file mode 100644 index 0000000..99865df --- /dev/null +++ b/net/base/data_url_unittest.cc @@ -0,0 +1,176 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/basictypes.h" +#include "googleurl/src/gurl.h" +#include "net/base/data_url.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + class DataURLTest : public testing::Test { + }; +} + +TEST(DataURLTest, Parse) { + const struct { + const char* url; + bool is_valid; + const char* mime_type; + const char* charset; + const char* data; + } tests[] = { + { "data:", + false, + "", + "", + "" }, + + { "data:,", + true, + "text/plain", + "US-ASCII", + "" }, + + { "data:;base64,", + true, + "text/plain", + "US-ASCII", + "" }, + + { "data:;charset=,test", + true, + "text/plain", + "US-ASCII", + "test" }, + + { "data:TeXt/HtMl,<b>x</b>", + true, + "text/html", + "US-ASCII", + "<b>x</b>" }, + + { "data:,foo", + true, + "text/plain", + "US-ASCII", + "foo" }, + + { "data:;base64,aGVsbG8gd29ybGQ=", + true, + "text/plain", + "US-ASCII", + "hello world" }, + + { "data:foo/bar;baz=1;charset=kk,boo", + true, + "foo/bar", + "kk", + "boo" }, + + { "data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E", + true, + "text/html", + "US-ASCII", + "<html><body><b>hello world</b></body></html>" }, + + { "data:text/html,<html><body><b>hello world</b></body></html>", + true, + "text/html", + "US-ASCII", + "<html><body><b>hello world</b></body></html>" }, + + // the comma cannot be url-escaped! + { "data:%2Cblah", + false, + "", + "", + "" }, + + // invalid base64 content + { "data:;base64,aGVs_-_-", + false, + "", + "", + "" }, + + // Spaces should be removed from non-text data URLs (we already tested + // spaces above). + { "data:image/fractal,a b c d e f g", + true, + "image/fractal", + "US-ASCII", + "abcdefg" }, + + // Spaces should also be removed from anything base-64 encoded + { "data:;base64,aGVs bG8gd2 9ybGQ=", + true, + "text/plain", + "US-ASCII", + "hello world" }, + + // Other whitespace should also be removed from anything base-64 encoded. + { "data:;base64,aGVs bG8gd2 \n9ybGQ=", + true, + "text/plain", + "US-ASCII", + "hello world" }, + + // In base64 encoding, escaped whitespace should be stripped. + // (This test was taken from acid3) + // http://b/1054495 + { "data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207%20", + true, + "text/javascript", + "US-ASCII", + "d4 = 'four';" }, + + // Only unescaped whitespace should be stripped in non-base64. + // http://b/1157796 + { "data:img/png,A B %20 %0A C", + true, + "img/png", + "US-ASCII", + "AB \nC" }, + + // TODO(darin): add more interesting tests + }; + + for (size_t i = 0; i < arraysize(tests); ++i) { + std::string mime_type; + std::string charset; + std::string data; + bool ok = DataURL::Parse(GURL(tests[i].url), &mime_type, &charset, &data); + EXPECT_EQ(ok, tests[i].is_valid); + if (tests[i].is_valid) { + EXPECT_EQ(tests[i].mime_type, mime_type); + EXPECT_EQ(tests[i].charset, charset); + EXPECT_EQ(tests[i].data, data); + } + } +} diff --git a/net/base/dir_header.html b/net/base/dir_header.html new file mode 100644 index 0000000..ec4eb3a --- /dev/null +++ b/net/base/dir_header.html @@ -0,0 +1,69 @@ +<html id="t"> +<head> + +<script> +function addRow(name, url, isdir, size, date_modified) { + if (name == ".") + return; + + var table = document.getElementById("table"); + var row = document.createElement("tr"); + var file_cell = document.createElement("td"); + var link = document.createElement("a"); + if (name == "..") { + link.href = ".."; + link.innerText = document.getElementById("parentDirText").innerText; + size = ""; + date_modified = ""; + } else { + if (isdir) { + name = name + "/"; + url = url + "/"; + size = ""; + } + link.innerText = name; + link.href = "./" + url; + } + file_cell.appendChild(link); + + row.appendChild(file_cell); + row.appendChild(createCell(size)); + row.appendChild(createCell(date_modified)); + + table.appendChild(row); +} + +function createCell(text) { + var cell = document.createElement("td"); + cell.setAttribute("class", "sizeColumn"); + cell.innerText = text; + return cell; +} + +function start(location) { + var header = document.getElementById("header"); + header.innerText = header.innerText.replace("LOCATION", location); + + document.getElementById("title").innerText = header.innerText; +} +</script> + +<title id="title"></title> +<style> + h1 { white-space: nowrap; } + td.sizeColumn { text-align: right; padding-left: 30px; } +</style> +</head> +<body> +<span id="parentDirText" style="display:none" jscontent="parentDirText"></span> +<h1 id="header" jscontent="header"></h1> +<hr/> +<table id="table"> +<tr style="font-weight: bold"> + <td jscontent="headerName"></td> + <td class="sizeColumn" jscontent="headerSize"></td> + <td class="sizeColumn" jscontent="headerDateModified"></td> +</tr> +</table> +</body> +</html> diff --git a/net/base/directory_lister.cc b/net/base/directory_lister.cc new file mode 100644 index 0000000..44dd251 --- /dev/null +++ b/net/base/directory_lister.cc @@ -0,0 +1,163 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <process.h> + +#include "net/base/directory_lister.h" + +#include "base/message_loop.h" + +static const int kFilesPerEvent = 8; + +class DirectoryDataEvent : public Task { + public: + explicit DirectoryDataEvent(DirectoryLister* d) + : lister(d), count(0), error(0) { + } + + void Run() { + if (count) { + lister->OnReceivedData(data, count); + } else { + lister->OnDone(error); + } + } + + scoped_refptr<DirectoryLister> lister; + WIN32_FIND_DATA data[kFilesPerEvent]; + int count; + DWORD error; +}; + +/*static*/ +unsigned __stdcall DirectoryLister::ThreadFunc(void* param) { + DirectoryLister* self = reinterpret_cast<DirectoryLister*>(param); + + std::wstring pattern = self->directory(); + if (pattern[pattern.size()-1] != '\\') { + pattern.append(L"\\*"); + } else { + pattern.append(L"*"); + } + + DirectoryDataEvent* e = new DirectoryDataEvent(self); + + HANDLE handle = FindFirstFile(pattern.c_str(), &e->data[e->count]); + if (handle == INVALID_HANDLE_VALUE) { + e->error = GetLastError(); + self->message_loop_->PostTask(FROM_HERE, e); + e = NULL; + } else { + do { + if (++e->count == kFilesPerEvent) { + self->message_loop_->PostTask(FROM_HERE, e); + e = new DirectoryDataEvent(self); + } + } while (!self->was_canceled() && FindNextFile(handle, &e->data[e->count])); + + FindClose(handle); + + if (e->count > 0) { + self->message_loop_->PostTask(FROM_HERE, e); + e = NULL; + } + + // Notify done + e = new DirectoryDataEvent(self); + self->message_loop_->PostTask(FROM_HERE, e); + } + + self->Release(); + return 0; +} + +DirectoryLister::DirectoryLister(const std::wstring& dir, Delegate* delegate) + : dir_(dir), + message_loop_(NULL), + delegate_(delegate), + thread_(NULL), + canceled_(false) { + DCHECK(!dir.empty()); +} + +DirectoryLister::~DirectoryLister() { + if (thread_) + CloseHandle(thread_); +} + +bool DirectoryLister::Start() { + // spawn a thread to enumerate the specified directory + + // pass events back to the current thread + message_loop_ = MessageLoop::current(); + DCHECK(message_loop_) << "calling thread must have a message loop"; + + AddRef(); // the thread will release us when it is done + + unsigned thread_id; + thread_ = reinterpret_cast<HANDLE>( + _beginthreadex(NULL, 0, DirectoryLister::ThreadFunc, this, 0, + &thread_id)); + + if (!thread_) { + Release(); + return false; + } + + return true; +} + +void DirectoryLister::Cancel() { + canceled_ = true; + + if (thread_) { + WaitForSingleObject(thread_, INFINITE); + CloseHandle(thread_); + thread_ = NULL; + } +} + +void DirectoryLister::OnReceivedData(const WIN32_FIND_DATA* data, int count) { + // Since the delegate can clear itself during the OnListFile callback, we + // need to null check it during each iteration of the loop. Similarly, it is + // necessary to check the canceled_ flag to avoid sending data to a delegate + // who doesn't want anymore. + for (int i = 0; !canceled_ && delegate_ && i < count; ++i) + delegate_->OnListFile(data[i]); +} + +void DirectoryLister::OnDone(int error) { + // If canceled, we need to report some kind of error, but don't overwrite the + // error condition if it is already set. + if (!error && canceled_) + error = ERROR_OPERATION_ABORTED; + + if (delegate_) + delegate_->OnListDone(error); +} diff --git a/net/base/directory_lister.h b/net/base/directory_lister.h new file mode 100644 index 0000000..b3b0949 --- /dev/null +++ b/net/base/directory_lister.h @@ -0,0 +1,92 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_DIRECTORY_LISTER_H__ +#define NET_BASE_DIRECTORY_LISTER_H__ + +#include <windows.h> +#include <string> + +#include "base/ref_counted.h" + +class MessageLoop; + +// +// This class provides an API for listing the contents of a directory on the +// filesystem asynchronously. It spawns a background thread, and enumerates +// the specified directory on that thread. It marshalls WIN32_FIND_DATA +// structs over to the main application thread. The consumer of this class +// is insulated from any of the multi-threading details. +// +class DirectoryLister : public base::RefCountedThreadSafe<DirectoryLister> { + public: + // Implement this class to receive directory entries. + class Delegate { + public: + virtual void OnListFile(const WIN32_FIND_DATA& data) = 0; + virtual void OnListDone(int error) = 0; + }; + + DirectoryLister(const std::wstring& dir, Delegate* delegate); + ~DirectoryLister(); + + // Call this method to start the directory enumeration thread. + bool Start(); + + // Call this method to asynchronously stop directory enumeration. The + // delegate will receive the OnListDone notification with an error code of + // ERROR_OPERATION_ABORTED. + void Cancel(); + + // The delegate pointer may be modified at any time. + Delegate* delegate() const { return delegate_; } + void set_delegate(Delegate* d) { delegate_ = d; } + + // Returns the directory being enumerated. + const std::wstring& directory() const { return dir_; } + + // Returns true if the directory enumeration was canceled. + bool was_canceled() const { return canceled_; } + + private: + friend class DirectoryDataEvent; + + void OnReceivedData(const WIN32_FIND_DATA* data, int count); + void OnDone(int error); + + static unsigned __stdcall ThreadFunc(void* param); + + std::wstring dir_; + Delegate* delegate_; + MessageLoop* message_loop_; + HANDLE thread_; + bool canceled_; +}; + +#endif // NET_BASE_DIRECTORY_LISTER_H__ diff --git a/net/base/directory_lister_unittest.cc b/net/base/directory_lister_unittest.cc new file mode 100644 index 0000000..b1cbc4a --- /dev/null +++ b/net/base/directory_lister_unittest.cc @@ -0,0 +1,85 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/message_loop.h" +#include "base/path_service.h" +#include "net/base/directory_lister.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + class DirectoryListerTest : public testing::Test { + }; +} + +class DirectoryListerDelegate : public DirectoryLister::Delegate { + public: + DirectoryListerDelegate() : error_(-1) { + } + void OnListFile(const WIN32_FIND_DATA& data) { + } + void OnListDone(int error) { + error_ = error; + MessageLoop::current()->Quit(); + } + int error() const { return error_; } + private: + int error_; +}; + +TEST(DirectoryListerTest, BigDirTest) { + std::wstring windows_path; + ASSERT_TRUE(PathService::Get(base::DIR_WINDOWS, &windows_path)); + + DirectoryListerDelegate delegate; + scoped_refptr<DirectoryLister> lister = + new DirectoryLister(windows_path, &delegate); + + lister->Start(); + + MessageLoop::current()->Run(); + + EXPECT_EQ(delegate.error(), ERROR_SUCCESS); +} + +TEST(DirectoryListerTest, CancelTest) { + std::wstring windows_path; + ASSERT_TRUE(PathService::Get(base::DIR_WINDOWS, &windows_path)); + + DirectoryListerDelegate delegate; + scoped_refptr<DirectoryLister> lister = + new DirectoryLister(windows_path, &delegate); + + lister->Start(); + lister->Cancel(); + + MessageLoop::current()->Run(); + + EXPECT_EQ(delegate.error(), ERROR_OPERATION_ABORTED); + EXPECT_EQ(lister->was_canceled(), true); +} diff --git a/net/base/dns_resolution_observer.cc b/net/base/dns_resolution_observer.cc new file mode 100644 index 0000000..c66be8f --- /dev/null +++ b/net/base/dns_resolution_observer.cc @@ -0,0 +1,85 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file supports network stack independent notification of progress +// towards resolving a hostname. + +#include "net/base/dns_resolution_observer.h" + +#include <string> + +#include "base/atomic.h" +#include "base/logging.h" + +namespace net { + +static DnsResolutionObserver* dns_resolution_observer; + +void AddDnsResolutionObserver(DnsResolutionObserver* new_observer) { + if (new_observer == dns_resolution_observer) + return; // Facilitate unit tests that init/teardown repeatedly. + DCHECK(!dns_resolution_observer); + if (InterlockedCompareExchangePointer( + reinterpret_cast<PVOID*>(&dns_resolution_observer), + new_observer, NULL)) + DCHECK(0) << "Second attempt to setup observer"; +} + +DnsResolutionObserver* RemoveDnsResolutionObserver() { + // We really need to check that the entire network subsystem is shutting down, + // and hence no additional calls can even *possibly* still be lingering in the + // notification path that includes our observer. Until we have a way to + // really assert that fact, we will outlaw the calling of this function. + // Darin suggested that the caller use a static initializer for the observer, + // so that it can safely be destroyed after process termination, and without + // inducing a memory leak. + // Bottom line: Don't call this function! You will crash for now. + CHECK(0); + DnsResolutionObserver* old_observer = dns_resolution_observer; + dns_resolution_observer = NULL; + return old_observer; +} + +// Locking access to dns_resolution_observer is not really critical... but we +// should test the value of dns_resolution_observer that we use. +// Worst case, we'll get an "out of date" value... which is no big deal for the +// DNS prefetching system (the most common (only?) observer). +void DidStartDnsResolution(const std::string& name, void* context) { + DnsResolutionObserver* current_observer = dns_resolution_observer; + if (current_observer) + current_observer->OnStartResolution(name, context); +} + +void DidFinishDnsResolutionWithStatus(bool was_resolved, void* context) { + DnsResolutionObserver* current_observer = dns_resolution_observer; + if (current_observer) { + current_observer->OnFinishResolutionWithStatus(was_resolved, context); + } +} +} // namspace net diff --git a/net/base/dns_resolution_observer.h b/net/base/dns_resolution_observer.h new file mode 100644 index 0000000..08ef6bf --- /dev/null +++ b/net/base/dns_resolution_observer.h @@ -0,0 +1,80 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file supports network stack independent notification of progress +// towards resolving a hostname. + +// The observer class supports exactly one active (Add'ed) instance, and in +// typical usage, that observer will be Add'ed during process startup, and +// Remove'd during process termination. + + +#ifndef NET_BASE_DNS_RESOLUTION_OBSERVER_H__ +#define NET_BASE_DNS_RESOLUTION_OBSERVER_H__ + +#include <string> + +namespace net { + +class DnsResolutionObserver { + public: + // For each OnStartResolution() notification, there should be a later + // OnFinishResolutionWithStatus() indicating completion of the resolution + // activity. + // Related pairs of notification will arrive with matching context values. + // A caller may use the context values to match up these asynchronous calls + // from among a larger call stream. + // Once a matching pair of notifications has been provided (i.e., a pair with + // identical context values), and the notification methods (below) have + // returned, the context values *might* be reused. + virtual void OnStartResolution(const std::string& name, void* context) = 0; + virtual void OnFinishResolutionWithStatus(bool was_resolved, + void* context) = 0; +}; + + +// Note that *exactly* one observer is currently supported, and any attempt to +// add a second observer via AddDnsResolutionObserver() before removing the +// first DnsResolutionObserver will induce a DCHECK() assertion. +void AddDnsResolutionObserver(DnsResolutionObserver* new_observer); + +// Note that the RemoveDnsResolutionObserver() will NOT perform any delete +// operations, and it is the responsibility of the code that called +// AddDnsResolutionObserver() to make a corresponding call to +// RemoveDnsResolutionObserver() and then delete the returned +// DnsResolutionObserver instance. +DnsResolutionObserver* RemoveDnsResolutionObserver(); + +// The following functions are expected to be called only by network stack +// implementations. This above observer class will relay the notifications +// to any registered observer. +void DidStartDnsResolution(const std::string& name, void* context); +void DidFinishDnsResolutionWithStatus(bool was_resolved, void* context); +} // namspace net +#endif // NET_BASE_DNS_RESOLUTION_OBSERVER_H__ diff --git a/net/base/effective_tld_names.dat b/net/base/effective_tld_names.dat new file mode 100644 index 0000000..578a30c --- /dev/null +++ b/net/base/effective_tld_names.dat @@ -0,0 +1,3124 @@ +// Google note: this is Mozilla's version 1.4 of this file, retrieved using +// cvs -d :pserver:anonymous@cvs-mirror.mozilla.org:/cvsroot co \ +// mozilla/netwerk/dns/src/effective_tld_names.dat +// The file itself contains no license, but may be presumed to be covered at +// most by Mozilla's standard tri-license: see http://www.mozilla.org/MPL/ + + +// ac : http://en.wikipedia.org/wiki/.ac +ac +com.ac +edu.ac +gov.ac +net.ac +mil.ac +org.ac + +// ad : http://en.wikipedia.org/wiki/.ad +ad +nom.ad + +// ae : http://en.wikipedia.org/wiki/.ae +ae +net.ae +gov.ae +ac.ae +sch.ae +org.ae +mil.ae +pro.ae +name.ae + +// aero : see http://www.information.aero/index.php?id=66 +aero +accident-investigation.aero +accident-prevention.aero +aerobatic.aero +aeroclub.aero +aerodrome.aero +agents.aero +aircraft.aero +airline.aero +airport.aero +air-surveillance.aero +airtraffic.aero +air-traffic-control.aero +ambulance.aero +amusement.aero +association.aero +author.aero +ballooning.aero +broker.aero +caa.aero +cargo.aero +catering.aero +certification.aero +championship.aero +charter.aero +civilaviation.aero +club.aero +conference.aero +consultant.aero +consulting.aero +control.aero +council.aero +crew.aero +design.aero +dgca.aero +educator.aero +emergency.aero +engine.aero +engineer.aero +entertainment.aero +equipment.aero +exchange.aero +express.aero +federation.aero +flight.aero +freight.aero +fuel.aero +gliding.aero +government.aero +groundhandling.aero +group.aero +hanggliding.aero +homebuilt.aero +insurance.aero +journal.aero +journalist.aero +leasing.aero +logistics.aero +magazine.aero +maintenance.aero +marketplace.aero +media.aero +microlight.aero +modelling.aero +navigation.aero +parachuting.aero +paragliding.aero +passenger-association.aero +pilot.aero +press.aero +production.aero +recreation.aero +repbody.aero +res.aero +research.aero +rotorcraft.aero +safety.aero +scientist.aero +services.aero +show.aero +skydiving.aero +software.aero +student.aero +taxi.aero +trader.aero +trading.aero +trainer.aero +union.aero +workinggroup.aero +works.aero + +// af : http://www.nic.af/help.jsp +af +gov.af +com.af +org.af +net.af +edu.af + +// ag : http://www.nic.ag/prices.htm +ag +com.ag +org.ag +net.ag +co.ag +nom.ag + +// ai : http://nic.com.ai/ +ai +off.ai +com.ai +net.ai +org.ai + +// al : http://www.inima.al/Domains.html +gov.al +edu.al +org.al +com.al +net.al + +// am : http://en.wikipedia.org/wiki/.am +am + +// an : http://www.una.an/an_domreg/default.asp +an +com.an +net.an +org.an +edu.an + +// ao : http://en.wikipedia.org/wiki/.ao +// list of 2nd level TLDs ? +ao + +// aq : http://en.wikipedia.org/wiki/.aq +aq + +// ar : http://en.wikipedia.org/wiki/.ar +*.ar +!congresodelalengua3.ar +!educ.ar +!gobiernoelectronico.ar +!mecon.ar +!nacion.ar +!nic.ar +!promocion.ar +!retina.ar +!uba.ar + +// arpa : http://en.wikipedia.org/wiki/.arpa +e164.arpa +in-addr.arpa +ip6.arpa +uri.arpa +urn.arpa + +// as : http://en.wikipedia.org/wiki/.as +as + +// at : http://en.wikipedia.org/wiki/.at +at +gv.at +ac.at +co.at +or.at + +// au : http://en.wikipedia.org/wiki/.au +*.au +// au geographical names (vic.au etc... are covered above) +act.edu.au +nsw.edu.au +nt.edu.au +qld.edu.au +sa.edu.au +tas.edu.au +vic.edu.au +wa.edu.au +act.gov.au +nsw.gov.au +nt.gov.au +qld.gov.au +sa.gov.au +tas.gov.au +vic.gov.au +wa.gov.au + +// aw : http://en.wikipedia.org/wiki/.aw +aw +com.aw + +// ax : http://en.wikipedia.org/wiki/.ax +ax + +// az : http://en.wikipedia.org/wiki/.az +az +com.az +net.az +int.az +gov.az +org.az +edu.az +info.az +pp.az +mil.az +name.az +pro.az +biz.az + +// ba : http://en.wikipedia.org/wiki/.ba +ba +org.ba +net.ba +edu.ba +gov.ba +mil.ba +unsa.ba +unbi.ba +co.ba +com.ba +rs.ba + +// bb : http://en.wikipedia.org/wiki/.bb +bb +com.bb +edu.bb +gov.bb +net.bb +org.bb + +// bd : http://en.wikipedia.org/wiki/.bd +*.bd + +// be : http://en.wikipedia.org/wiki/.be +be +ac.be + +// bf : http://en.wikipedia.org/wiki/.bf +bf + +// bg : http://en.wikipedia.org/wiki/.bg +bg + +// bh : http://en.wikipedia.org/wiki/.bh +// list of 2nd level tlds ? +bh + +// bi : http://en.wikipedia.org/wiki/.bi +// list of 2nd level tlds ? +bi + +// biz : http://en.wikipedia.org/wiki/.biz +biz + +// bj : http://en.wikipedia.org/wiki/.bj +// list of 2nd level tlds ? +bj + +// bm : http://www.bermudanic.bm/dnr-text.txt +bm +com.bm +edu.bm +gov.bm +net.bm +org.bm + +// bn : http://en.wikipedia.org/wiki/.bn +*.bn + +// bo : http://www.nic.bo/ +bo +com.bo +edu.bo +gov.bo +gob.bo +int.bo +org.bo +net.bo +mil.bo +tv.bo + +// br : http://en.wikipedia.org/wiki/.br +// http://registro.br/info/dpn.html +br +adm.br +adv.br +agr.br +am.br +arq.br +art.br +ato.br +bio.br +blog.br +bmd.br +cim.br +cng.br +cnt.br +com.br +coop.br +ecn.br +edu.br +eng.br +esp.br +etc.br +eti.br +far.br +flog.br +fm.br +fnd.br +fot.br +fst.br +g12.br +ggf.br +gov.br +imb.br +ind.br +inf.br +jor.br +jus.br +lel.br +mat.br +med.br +mil.br +mus.br +net.br +nom.br +not.br +ntr.br +odo.br +org.br +ppg.br +pro.br +psc.br +psi.br +qsl.br +rec.br +slg.br +srv.br +tmp.br +trd.br +tur.br +tv.br +vet.br +vlog.br +wiki.br +zlg.br + +// bs : http://www.nic.bs/rules.html +bs +com.bs +net.bs +org.bs +edu.bs +gov.bs + +// bt : http://en.wikipedia.org/wiki/.bt +*.bt + +// bw : http://en.wikipedia.org/wiki/.bw +// list of 2nd level tlds ? +bw + +// by : http://en.wikipedia.org/wiki/.by +// list of 2nd level tlds ? +by + +// bz : http://en.wikipedia.org/wiki/.bz +// list of 2nd level tlds ? +bz + +// ca : http://en.wikipedia.org/wiki/.ca +ca +// ca geographical names +ab.ca +bc.ca +mb.ca +nb.ca +nf.ca +nl.ca +ns.ca +nt.ca +nu.ca +on.ca +pe.ca +qc.ca +sk.ca +yk.ca + +// cat : http://en.wikipedia.org/wiki/.cat +cat + +// cc : http://en.wikipedia.org/wiki/.cc +cc + +// cd : http://en.wikipedia.org/wiki/.cd +cd + +// cf : http://en.wikipedia.org/wiki/.cf +cf + +// cg : http://en.wikipedia.org/wiki/.cg +cg + +// ch : http://en.wikipedia.org/wiki/.ch +ch + +// ci : http://en.wikipedia.org/wiki/.ci +// list of 2nd level tlds ? +ci + +// ck : http://en.wikipedia.org/wiki/.ck +*.ck + +// cl : http://en.wikipedia.org/wiki/.cl +cl + +// cm : http://en.wikipedia.org/wiki/.cm +cm + +// cn : http://en.wikipedia.org/wiki/.cn +cn +ac.cn +com.cn +edu.cn +gov.cn +net.cn +org.cn +// cn geographic names +ah.cn +bj.cn +cq.cn +fj.cn +gd.cn +gs.cn +gz.cn +gx.cn +ha.cn +hb.cn +he.cn +hi.cn +hl.cn +hn.cn +jl.cn +js.cn +jx.cn +ln.cn +nm.cn +nx.cn +qh.cn +sc.cn +sd.cn +sh.cn +sn.cn +sx.cn +tj.cn +xj.cn +xz.cn +yn.cn +zj.cn + +// co : http://en.wikipedia.org/wiki/.co +*.co + +// com : http://en.wikipedia.org/wiki/.com +com + +// coop : http://en.wikipedia.org/wiki/.coop +coop + +// cr : http://en.wikipedia.org/wiki/.cr +*.cr + +// cu : http://en.wikipedia.org/wiki/.cu +cu +com.cu +edu.cu +org.cu +net.cu +gov.cu +inf.cu + +// cv : http://en.wikipedia.org/wiki/.cv +cv + +// cx : http://en.wikipedia.org/wiki/.cx +cx + +// cy : http://en.wikipedia.org/wiki/.cy +*.cy + +// cz : http://en.wikipedia.org/wiki/.cz +cz + +// de : http://en.wikipedia.org/wiki/.de +de + +// dj : http://en.wikipedia.org/wiki/.dj +dj + +// dk : http://en.wikipedia.org/wiki/.dk +dk + +// dm : http://en.wikipedia.org/wiki/.dm +dm +com.dm +net.dm +org.dm + +// do : http://en.wikipedia.org/wiki/.do +*.do + +// dz : http://en.wikipedia.org/wiki/.dz +dz +com.dz +org.dz +net.dz +gov.dz +edu.dz +asso.dz +pol.dz +art.dz + +// ec : http://www.nic.ec/reg/paso1.asp +ec +com.ec +info.ec +net.ec +fin.ec +med.ec +pro.ec +org.ec +edu.ec +gov.ec +mil.ec + +// edu : http://en.wikipedia.org/wiki/.edu +edu + +// ee : http://www3.eenet.ee/ee/application.html +ee +com.ee +org.ee +fie.ee +pri.ee + +// eg : http://en.wikipedia.org/wiki/.eg +*.eg + +// er : http://en.wikipedia.org/wiki/.er +*.er + +// es : https://www.nic.es/site_ingles/ingles/dominios/index.html +es +com.es +nom.es +org.es +gob.es +edu.es + +// et : http://en.wikipedia.org/wiki/.et +*.et + +// eu : http://en.wikipedia.org/wiki/.eu +eu + +// fi : http://en.wikipedia.org/wiki/.fi +fi + +// fj : http://en.wikipedia.org/wiki/.fj +*.fj + +// fk : http://en.wikipedia.org/wiki/.fk +*.fk + +// fm : http://en.wikipedia.org/wiki/.fm +fm + +// fo : http://en.wikipedia.org/wiki/.fo +fo + +// fr : http://www.afnic.fr/ +fr +// domaines descriptifs : http://www.afnic.fr/obtenir/chartes/nommage-fr/annexe-descriptifs +fr +com.fr +asso.fr +nom.fr +prd.fr +presse.fr +tm.fr +// domaines sectoriels : http://www.afnic.fr/obtenir/chartes/nommage-fr/annexe-sectoriels +aeroport.fr +assedic.fr +avocat.fr +avoues.fr +cci.fr +chambagri.fr +chirurgiens-dentistes.fr +experts-comptables.fr +geometre-expert.fr +gouv.fr +greta.fr +huissier-justice.fr +medecin.fr +notaires.fr +pharmacien.fr +port.fr +veterinaire.fr + +// ga : http://en.wikipedia.org/wiki/.ga +ga + +// gd : http://en.wikipedia.org/wiki/.gd +gd + +// ge : http://www.nic.net.ge/policy_en.pdf +ge +com.ge +edu.ge +gov.ge +org.ge +mil.ge +net.ge +pvt.ge + +// gf : http://en.wikipedia.org/wiki/.gf +gf + +// gg : http://www.channelisles.net/tandc.shtml +gg +co.gg +org.gg +net.gg +sch.gg +gov.gg + +// gh : http://www.ghana.com/domain.htm +*.gh + +// gi : http://www.nic.gi/rules.html +gi +com.gi +ltd.gi +gov.gi +mod.gi +edu.gi +org.gi + +// gl : http://en.wikipedia.org/wiki/.gl +gl + +// gm : http://www.nic.gm/htmlpages%5Cgm-policy.htm +gm + +// gn : http://psg.com/dns/gn/gn.txt +*.gn + +// gov : http://en.wikipedia.org/wiki/.gov +gov + +// gp : http://www.nic.gp/index_en.php?url=charte_en.php +gp +com.gp +net.gp +edu.gp +org.gp + +// gq : http://en.wikipedia.org/wiki/.gq +gq + +// gr : https://grweb.ics.forth.gr/english/1617-B-2002.html +gr +com.gr +edu.gr +net.gr +org.gr +gov.gr + +// gs : http://en.wikipedia.org/wiki/.gs +gs + +// gt : http://www.gt/politicas.html +*.gt + +// gu : http://gadao.gov.gu/registration.txt +*.gu + +// gw : http://en.wikipedia.org/wiki/.gw +gw + +// gy : http://en.wikipedia.org/wiki/.gy +gy + +// hk : http://en.wikipedia.org/wiki/.hk +hk +com.hk +edu.hk +gov.hk +idv.hk +net.hk +org.hk + +// hm : http://en.wikipedia.org/wiki/.hm +hm + +// hn : http://www.nic.hn/politicas/ps02,,05.html +hn +com.hn +edu.hn +org.hn +net.hn +mil.hn +gob.hn + +// hr : http://www.dns.hr/documents/pdf/HRTLD-regulations.pdf +hr +iz.hr +from.hr +name.hr +com.hr + +// ht : http://www.nic.ht/info/charte.cfm +ht +com.ht +shop.ht +firm.ht +info.ht +adult.ht +net.ht +pro.ht +org.ht +med.ht +art.ht +coop.ht +pol.ht +asso.ht +edu.ht +rel.ht +gouv.ht +perso.ht + +// hu : http://www.domain.hu/domain/English/sld.html +hu +co.hu +info.hu +org.hu +priv.hu +sport.hu +tm.hu +2000.hu +agrar.hu +bolt.hu +casino.hu +city.hu +erotica.hu +erotika.hu +film.hu +forum.hu +games.hu +hotel.hu +ingatlan.hu +jogasz.hu +konyvelo.hu +lakas.hu +media.hu +news.hu +reklam.hu +sex.hu +shop.hu +suli.hu +szex.hu +tozsde.hu +utazas.hu +video.hu + +// id : http://en.wikipedia.org/wiki/.id +*.id + +// ie : http://en.wikipedia.org/wiki/.ie +ie + +// il : http://en.wikipedia.org/wiki/.il +*.il + +// im : https://www.nic.im/pdfs/imfaqs.pdf +im +co.im +ltd.co.im +plc.co.im +net.im +gov.im +org.im +nic.im +ac.im + +// in : http://en.wikipedia.org/wiki/.in +in +co.in +firm.in +net.in +org.in +gen.in +ind.in +nic.in +ac.in +edu.in +res.in +gov.in +mil.in + +// info : http://en.wikipedia.org/wiki/.info +info + +// int : http://en.wikipedia.org/wiki/.int +int +eu.int + +// io : http://www.nic.io/rules.html +// list of 2nd level tlds ? +io + +// iq : http://en.wikipedia.org/wiki/.iq +// no registrar website found, but google shows .gov.iq and .edu.iq websites +iq +gov.iq +edu.iq + +// ir : http://www.nic.ir/ascii/Appendix1.htm +ir +ac.ir +co.ir +gov.ir +id.ir +net.ir +org.ir +sch.ir + +// is : http://www.isnic.is/domain/rules.php +is +net.is +com.is +edu.is +gov.is +org.is +int.is + +// it : http://en.wikipedia.org/wiki/.it +it +gov.it +edu.it +// geo-names found at http://www.nic.it/RA/en/domini/regole/nomi-riservati.pdf +agrigento.it +ag.it +alessandria.it +al.it +ancona.it +an.it +aosta.it +aoste.it +ao.it +arezzo.it +ar.it +ascoli-piceno.it +ascolipiceno.it +ap.it +asti.it +at.it +avellino.it +av.it +bari.it +ba.it +barlettaandriatrani.it +barletta-andria-trani.it +belluno.it +bl.it +benevento.it +bn.it +bergamo.it +bg.it +biella.it +bi.it +bologna.it +bo.it +bolzano.it +bozen.it +balsan.it +alto-adige.it +altoadige.it +suedtirol.it +bz.it +brescia.it +bs.it +brindisi.it +br.it +cagliari.it +ca.it +caltanissetta.it +cl.it +campobasso.it +cb.it +caserta.it +ce.it +catania.it +ct.it +catanzaro.it +cz.it +chieti.it +ch.it +como.it +co.it +cosenza.it +cs.it +cremona.it +cr.it +crotone.it +kr.it +cuneo.it +cn.it +enna.it +en.it +fermo.it +ferrara.it +fe.it +firenze.it +florence.it +fi.it +foggia.it +fg.it +forli-cesena.it +forlicesena.it +fc.it +frosinone.it +fr.it +genova.it +genoa.it +ge.it +gorizia.it +go.it +grosseto.it +gr.it +imperia.it +im.it +isernia.it +is.it +laquila.it +aquila.it +aq.it +la-spezia.it +laspezia.it +sp.it +latina.it +lt.it +lecce.it +le.it +lecco.it +lc.it +livorno.it +li.it +lodi.it +lo.it +lucca.it +lu.it +macerata.it +mc.it +mantova.it +mn.it +massa-carrara.it +massacarrara.it +ms.it +matera.it +mt.it +messina.it +me.it +milano.it +milan.it +mi.it +modena.it +mo.it +monza.it +napoli.it +naples.it +na.it +novara.it +no.it +nuoro.it +nu.it +oristano.it +or.it +padova.it +padua.it +pd.it +palermo.it +pa.it +parma.it +pr.it +pavia.it +pv.it +perugia.it +pg.it +pescara.it +pe.it +pesaro-urbino.it +pesarourbino.it +pu.it +piacenza.it +pc.it +pisa.it +pi.it +pistoia.it +pt.it +pordenone.it +pn.it +potenza.it +pz.it +prato.it +po.it +ragusa.it +rg.it +ravenna.it +ra.it +reggio-calabria.it +reggiocalabria.it +rc.it +reggio-emilia.it +reggioemilia.it +re.it +rieti.it +ri.it +rimini.it +rn.it +roma.it +rome.it +rm.it +rovigo.it +ro.it +salerno.it +sa.it +sassari.it +ss.it +savona.it +sv.it +siena.it +si.it +siracusa.it +sr.it +sondrio.it +so.it +taranto.it +ta.it +teramo.it +te.it +terni.it +tr.it +torino.it +turin.it +to.it +trapani.it +tp.it +trento.it +trentino.it +tn.it +treviso.it +tv.it +trieste.it +ts.it +udine.it +ud.it +varese.it +va.it +venezia.it +venice.it +ve.it +verbania.it +vb.it +vercelli.it +vc.it +verona.it +vr.it +vibo-valentia.it +vibovalentia.it +vv.it +vicenza.it +vi.it +viterbo.it +vt.it + +// je : http://www.channelisles.net/tandc.shtml +je +co.je +org.je +net.je +sch.je +gov.je + +// jm : http://www.com.jm/register.html +*.jm + +// jo : http://www.nis.gov.jo/dns/reg.html +jo +com.jo +org.jo +net.jo +edu.jo +gov.jo +mil.jo +myname.jo + +// jobs : http://en.wikipedia.org/wiki/.jobs +jobs + +// jp : http://en.wikipedia.org/wiki/.jp +jp +ac.jp +ad.jp +co.jp +ed.jp +go.jp +gr.jp +lg.jp +ne.jp +or.jp +// jp geographical names +// I can't find an official English explanation, but used https://bugzilla.mozilla.org/show_bug.cgi?id=252342#c31 +*.aichi.jp +*.akita.jp +*.aomori.jp +*.chiba.jp +*.ehime.jp +*.fukui.jp +*.fukuoka.jp +*.fukushima.jp +*.gifu.jp +*.gunma.jp +*.hiroshima.jp +*.hokkaido.jp +*.hyogo.jp +*.ibaraki.jp +*.ishikawa.jp +*.iwate.jp +*.kagawa.jp +*.kagoshima.jp +*.kanagawa.jp +*.kawasaki.jp +*.kitakyushu.jp +*.kobe.jp +*.kochi.jp +*.kumamoto.jp +*.kyoto.jp +*.mie.jp +*.miyagi.jp +*.miyazaki.jp +*.nagano.jp +*.nagasaki.jp +*.nagoya.jp +*.nara.jp +*.niigata.jp +*.oita.jp +*.okayama.jp +*.okinawa.jp +*.osaka.jp +*.saga.jp +*.saitama.jp +*.sapporo.jp +*.sendai.jp +*.shiga.jp +*.shimane.jp +*.shizuoka.jp +*.tochigi.jp +*.tokushima.jp +*.tokyo.jp +*.tottori.jp +*.toyama.jp +*.wakayama.jp +*.yamagata.jp +*.yamaguchi.jp +*.yamanashi.jp +*.yokohama.jp +!metro.tokyo.jp +!pref.aichi.jp +!pref.akita.jp +!pref.aomori.jp +!pref.chiba.jp +!pref.ehime.jp +!pref.fukui.jp +!pref.fukuoka.jp +!pref.fukushima.jp +!pref.gifu.jp +!pref.gunma.jp +!pref.hiroshima.jp +!pref.hokkaido.jp +!pref.hyogo.jp +!pref.ibaraki.jp +!pref.ishikawa.jp +!pref.iwate.jp +!pref.kagawa.jp +!pref.kagoshima.jp +!pref.kanagawa.jp +!pref.kochi.jp +!pref.kumamoto.jp +!pref.kyoto.jp +!pref.mie.jp +!pref.miyagi.jp +!pref.miyazaki.jp +!pref.nagano.jp +!pref.nagasaki.jp +!pref.nara.jp +!pref.niigata.jp +!pref.oita.jp +!pref.okayama.jp +!pref.okinawa.jp +!pref.osaka.jp +!pref.saga.jp +!pref.saitama.jp +!pref.shiga.jp +!pref.shimane.jp +!pref.shizuoka.jp +!pref.tochigi.jp +!pref.tokushima.jp +!pref.tottori.jp +!pref.toyama.jp +!pref.wakayama.jp +!pref.yamagata.jp +!pref.yamaguchi.jp +!pref.yamanashi.jp +!city.chiba.jp +!city.fukuoka.jp +!city.hiroshima.jp +!city.kawasaki.jp +!city.kitakyushu.jp +!city.kobe.jp +!city.kyoto.jp +!city.nagoya.jp +!city.osaka.jp +!city.saitama.jp +!city.sapporo.jp +!city.sendai.jp +!city.shizuoka.jp +!city.yokohama.jp + +// ke : http://www.kenic.or.ke/index.php?option=com_content&task=view&id=117&Itemid=145 +*.ke + +// kg : http://www.domain.kg/dmn_n.html +kg +org.kg +net.kg +com.kg +edu.kg +gov.kg +mil.kg + +// kh : http://www.mptc.gov.kh/dns_registration.htm +*.kh + +// ki : http://www.ki/dns/index.html +ki +edu.ki +biz.ki +net.ki +org.ki +gov.ki +info.ki +com.ki + +// km : http://en.wikipedia.org/wiki/.km +km + +// kn : http://en.wikipedia.org/wiki/.kn +kn + +// kr : http://domain.nida.or.kr/eng/structure.jsp +kr +ac.kr +co.kr +es.kr +go.kr +hs.kr +kg.kr +mil.kr +ms.kr +ne.kr +or.kr +pe.kr +re.kr +sc.kr +// kr geographical names +// http://en.wikipedia.org/wiki/.kr +busan.kr +chungbuk.kr +chungnam.kr +daegu.kr +daejeon.kr +gangwon.kr +gwangju.kr +gyeongbuk.kr +gyeonggi.kr +gyeongnam.kr +incheon.kr +jeju.kr +jeonbuk.kr +jeonnam.kr +seoul.kr +ulsan.kr + +// kw : http://en.wikipedia.org/wiki/.kw +*.kw + +// ky : http://www.icta.ky/da_ky_reg_dom.php +ky +edu.ky +gov.ky +com.ky +org.ky +net.ky + +// kz : http://en.wikipedia.org/wiki/.kz +kz +org.kz +edu.kz +net.kz +gov.kz +mil.kz +com.kz + +// la : http://en.wikipedia.org/wiki/.la +la + +// lb : http://en.wikipedia.org/wiki/.lb +*.lb + +// lc : http://en.wikipedia.org/wiki/.lc +lc +com.lc +org.lc +edu.lc +gov.lc + +// li : http://en.wikipedia.org/wiki/.li +li + +// lk : http://www.nic.lk/seclevpr.html +lk +gov.lk +sch.lk +net.lk +int.lk +com.lk +org.lk +edu.lk +ngo.lk +soc.lk +web.lk +ltd.lk +assn.lk +grp.lk +hotel.lk + +// lr : http://psg.com/dns/lr/lr.txt +*.lr + +// ls : http://en.wikipedia.org/wiki/.ls +ls +co.ls +org.ls + +// lt : http://en.wikipedia.org/wiki/.lt +lt + +// lu : http://www.dns.lu/en/ +lu + +// lv : http://www.nic.lv/DNS/En/generic.php +lv +com.lv +edu.lv +gov.lv +org.lv +mil.lv +id.lv +net.lv +asn.lv +conf.lv + +// ly : http://www.nic.ly/regulations.php +ly +com.ly +net.ly +gov.ly +plc.ly +edu.ly +sch.ly +med.ly +org.ly +id.ly + +// ma : http://en.wikipedia.org/wiki/.ma +// list of 2nd level tlds ? +ma +co.ma +net.ma +gov.ma +org.ma + +// mc : http://www.nic.mc/ +mc +tm.mc +asso.mc + +// md : http://en.wikipedia.org/wiki/.md +md + +// mg : http://www.nic.mg/tarif.htm +mg +org.mg +nom.mg +gov.mg +prd.mg +tm.mg +edu.mg +mil.mg +com.mg + +// mh : http://en.wikipedia.org/wiki/.mh +mh + +// mil : http://en.wikipedia.org/wiki/.mil +mil + +// mk : http://en.wikipedia.org/wiki/.mk +// list of 2nd level tlds ? +mk +com.mk +gov.mk +org.mk +net.mk +edu.mk + +// ml : http://www.gobin.info/domainname/ml-template.doc +*.ml + +// mm : http://en.wikipedia.org/wiki/.mm +*.mm + +// mn : http://en.wikipedia.org/wiki/.mn +mn +gov.mn +edu.mn +org.mn + +// mo : http://www.monic.net.mo/ +mo +com.mo +net.mo +org.mo +edu.mo +gov.mo + +// mobi : http://en.wikipedia.org/wiki/.mobi +mobi + +// mp : http://www.dot.mp/ +mp + +// mq : http://en.wikipedia.org/wiki/.mq +mq + +// mr : http://en.wikipedia.org/wiki/.mr +mr + +// ms : http://en.wikipedia.org/wiki/.ms +ms + +// mt : https://www.nic.org.mt/dotmt/ +*.mt + +// mu : http://en.wikipedia.org/wiki/.mu +// list of 2nd level tlds ? +mu + +// museum : http://about.museum/naming/ +// there are 2nd-level TLD's, but there's no list +museum + +// mv : http://en.wikipedia.org/wiki/.mv +*.mv + +// mw : http://www.registrar.mw/ +mw +ac.mw +biz.mw +co.mw +com.mw +coop.mw +edu.mw +gov.mw +int.mw +net.mw +org.mw + +// mx : http://www.nic.mx/ +*.mx + +// my : http://www.mynic.net.my/ +*.my + +// mz : http://www.gobin.info/domainname/mz-template.doc +*.mz + +// na : http://www.na-nic.com.na/ +// list of 2nd level tlds ? +na + +// name : has 2nd-level tlds, but there's no list of them +name + +// nc : http://www.cctld.nc/ +nc + +// ne : http://en.wikipedia.org/wiki/.ne +ne + +// net : http://en.wikipedia.org/wiki/.net +net + +// nf : http://en.wikipedia.org/wiki/.nf +nf +com.nf +net.nf +per.nf +rec.nf +web.nf +arts.nf +firm.nf +info.nf +other.nf +store.nf + +// ng : http://psg.com/dns/ng/ +ng + +// ni : http://www.nic.ni/dominios.htm +*.ni + +// nl : http://www.domain-registry.nl/ace.php/c,728,122,,,,Home.html +nl + +// no : http://www.norid.no/regelverk/index.en.html +no +fhs.no +vgs.no +fylkesbibl.no +folkebibl.no +museum.no +idrett.no +mil.no +stat.no +dep.no +kommune.no +herad.no +priv.no +// no geographical names : http://www.norid.no/regelverk/vedlegg-b.en.html +// counties +aa.no +ah.no +bu.no +fm.no +hl.no +hm.no +jan-mayen.no +mr.no +nl.no +nt.no +of.no +ol.no +oslo.no +rl.no +sf.no +st.no +svalbard.no +tm.no +tr.no +va.no +vf.no +// primary and lower secondary schools per county +gs.aa.no +gs.ah.no +gs.bu.no +gs.fm.no +gs.hl.no +gs.hm.no +gs.jan-mayen.no +gs.mr.no +gs.nl.no +gs.nt.no +gs.of.no +gs.ol.no +gs.oslo.no +gs.rl.no +gs.sf.no +gs.st.no +gs.svalbard.no +gs.tm.no +gs.tr.no +gs.va.no +gs.vf.no +// cities +akrehamn.no +Ã¥krehamn.no +algard.no +Ã¥lgÃ¥rd.no +arna.no +brumunddal.no +bryne.no +bronnoysund.no +brønnøysund.no +drobak.no +drøbak.no +egersund.no +fetsund.no +floro.no +florø.no +fredrikstad.no +hokksund.no +honefoss.no +hønefoss.no +jessheim.no +jorpeland.no +jørpeland.no +kirkenes.no +kopervik.no +krokstadelva.no +langevag.no +langevÃ¥g.no +leirvik.no +mjondalen.no +mjøndalen.no +mo-i-rana.no +mosjoen.no +mosjøen.no +nesoddtangen.no +orkanger.no +osoyro.no +osøyro.no +raholt.no +rÃ¥holt.no +sandnessjoen.no +sandnessjøen.no +skedsmokorset.no +slattum.no +spjelkavik.no +stathelle.no +stavern.no +stjordalshalsen.no +stjørdalshalsen.no +tananger.no +tranby.no +vossevangen.no +// communities +afjord.no +Ã¥fjord.no +agdenes.no +al.no +Ã¥l.no +alesund.no +Ã¥lesund.no +alstahaug.no +alta.no +áltá.no +alaheadju.no +álaheadju.no +alvdal.no +amli.no +Ã¥mli.no +amot.no +Ã¥mot.no +andebu.no +andoy.no +andøy.no +andasuolo.no +ardal.no +Ã¥rdal.no +aremark.no +arendal.no +Ã¥s.no +aseral.no +Ã¥seral.no +asker.no +askim.no +askvoll.no +askoy.no +askøy.no +asnes.no +Ã¥snes.no +audnedaln.no +aukra.no +aure.no +aurland.no +aurskog-holand.no +aurskog-høland.no +austevoll.no +austrheim.no +averoy.no +averøy.no +balestrand.no +ballangen.no +balat.no +bálát.no +balsfjord.no +bahccavuotna.no +báhccavuotna.no +bamble.no +bardu.no +beardu.no +beiarn.no +bajddar.no +bájddar.no +baidar.no +báidár.no +berg.no +bergen.no +berlevag.no +berlevÃ¥g.no +bearalvahki.no +bearalváhki.no +bindal.no +birkenes.no +bjarkoy.no +bjarkøy.no +bjerkreim.no +bjugn.no +bodo.no +bodø.no +badaddja.no +bÃ¥dÃ¥ddjÃ¥.no +budejju.no +bokn.no +bremanger.no +bronnoy.no +brønnøy.no +bygland.no +bykle.no +barum.no +bærum.no +bo.telemark.no +bø.telemark.no +bo.nordland.no +bø.nordland.no +bievat.no +bievát.no +bomlo.no +bømlo.no +batsfjord.no +bÃ¥tsfjord.no +bahcavuotna.no +báhcavuotna.no +dovre.no +drammen.no +drangedal.no +dyroy.no +dyrøy.no +donna.no +dønna.no +eid.no +eidfjord.no +eidsberg.no +eidskog.no +eidsvoll.no +eigersund.no +elverum.no +enebakk.no +engerdal.no +etne.no +etnedal.no +evenes.no +evenassi.no +evenášši.no +evje-og-hornnes.no +farsund.no +fauske.no +fuossko.no +fuoisku.no +fedje.no +fet.no +finnoy.no +finnøy.no +fitjar.no +fjaler.no +fjell.no +flakstad.no +flatanger.no +flekkefjord.no +flesberg.no +flora.no +fla.no +flÃ¥.no +folldal.no +forsand.no +fosnes.no +frei.no +frogn.no +froland.no +frosta.no +frana.no +fræna.no +froya.no +frøya.no +fusa.no +fyresdal.no +forde.no +førde.no +gamvik.no +gangaviika.no +gáŋgaviika.no +gaular.no +gausdal.no +gildeskal.no +gildeskÃ¥l.no +giske.no +gjemnes.no +gjerdrum.no +gjerstad.no +gjesdal.no +gjovik.no +gjøvik.no +gloppen.no +gol.no +gran.no +grane.no +granvin.no +gratangen.no +grimstad.no +grong.no +kraanghke.no +krÃ¥anghke.no +grue.no +gulen.no +hadsel.no +halden.no +halsa.no +hamar.no +hamaroy.no +habmer.no +hábmer.no +hapmir.no +hápmir.no +hammerfest.no +hammarfeasta.no +hámmárfeasta.no +haram.no +hareid.no +harstad.no +hasvik.no +aknoluokta.no +ákÅ‹oluokta.no +hattfjelldal.no +aarborte.no +haugesund.no +hemne.no +hemnes.no +hemsedal.no +heroy.more-og-romsdal.no +herøy.møre-og-romsdal.no +heroy.nordland.no +herøy.nordland.no +hitra.no +hjartdal.no +hjelmeland.no +hobol.no +hobøl.no +hof.no +hol.no +hole.no +holmestrand.no +holtalen.no +holtÃ¥len.no +hornindal.no +horten.no +hurdal.no +hurum.no +hvaler.no +hyllestad.no +hagebostad.no +hægebostad.no +hoyanger.no +høyanger.no +hoylandet.no +høylandet.no +ha.no +hÃ¥.no +ibestad.no +inderoy.no +inderøy.no +iveland.no +jevnaker.no +jondal.no +jolster.no +jølster.no +karasjok.no +karasjohka.no +kárášjohka.no +karlsoy.no +galsa.no +gálsá.no +karmoy.no +karmøy.no +kautokeino.no +guovdageaidnu.no +klepp.no +klabu.no +klæbu.no +kongsberg.no +kongsvinger.no +kragero.no +kragerø.no +kristiansand.no +kristiansund.no +krodsherad.no +krødsherad.no +kvalsund.no +rahkkeravju.no +ráhkkerávju.no +kvam.no +kvinesdal.no +kvinnherad.no +kviteseid.no +kvitsoy.no +kvitsøy.no +kvafjord.no +kvæfjord.no +giehtavuoatna.no +kvanangen.no +kvænangen.no +navuotna.no +návuotna.no +kafjord.no +kÃ¥fjord.no +gaivuotna.no +gáivuotna.no +larvik.no +lavangen.no +lavagis.no +loabat.no +loabát.no +lebesby.no +davvesiida.no +leikanger.no +leirfjord.no +leka.no +leksvik.no +lenvik.no +leangaviika.no +leaÅ‹gaviika.no +lesja.no +levanger.no +lier.no +lierne.no +lillehammer.no +lillesand.no +lindesnes.no +lindas.no +lindÃ¥s.no +lom.no +loppa.no +lahppi.no +láhppi.no +lund.no +lunner.no +luroy.no +lurøy.no +luster.no +lyngdal.no +lyngen.no +ivgu.no +lardal.no +lerdal.no +lærdal.no +lodingen.no +lødingen.no +lorenskog.no +lørenskog.no +loten.no +løten.no +malvik.no +masoy.no +mÃ¥søy.no +muosat.no +muosát.no +mandal.no +marker.no +marnardal.no +masfjorden.no +meland.no +meldal.no +melhus.no +meloy.no +meløy.no +meraker.no +merÃ¥ker.no +moareke.no +moÃ¥reke.no +midsund.no +midtre-gauldal.no +modalen.no +modum.no +molde.no +moskenes.no +moss.no +mosvik.no +malselv.no +mÃ¥lselv.no +malatvuopmi.no +málatvuopmi.no +namdalseid.no +aejrie.no +namsos.no +namsskogan.no +naamesjevuemie.no +nååmesjevuemie.no +laakesvuemie.no +nannestad.no +narvik.no +narviika.no +naustdal.no +nedre-eiker.no +nes.akershus.no +nes.buskerud.no +nesna.no +nesodden.no +nesseby.no +unjarga.no +unjárga.no +nesset.no +nissedal.no +nittedal.no +nord-aurdal.no +nord-fron.no +nord-odal.no +norddal.no +nordkapp.no +davvenjarga.no +davvenjárga.no +nordre-land.no +nordreisa.no +raisa.no +ráisa.no +nore-og-uvdal.no +notodden.no +naroy.no +nærøy.no +notteroy.no +nøtterøy.no +odda.no +oksnes.no +øksnes.no +oppdal.no +oppegard.no +oppegÃ¥rd.no +orkdal.no +orland.no +ørland.no +orskog.no +ørskog.no +orsta.no +ørsta.no +os.hedmark.no +os.hordaland.no +osen.no +osteroy.no +osterøy.no +ostre-toten.no +østre-toten.no +overhalla.no +ovre-eiker.no +øvre-eiker.no +oyer.no +øyer.no +oygarden.no +øygarden.no +oystre-slidre.no +øystre-slidre.no +porsanger.no +porsangu.no +porsáŋgu.no +porsgrunn.no +radoy.no +radøy.no +rakkestad.no +rana.no +ruovat.no +randaberg.no +rauma.no +rendalen.no +rennebu.no +rennesoy.no +rennesøy.no +rindal.no +ringebu.no +ringerike.no +ringsaker.no +rissa.no +risor.no +risør.no +roan.no +rollag.no +rygge.no +ralingen.no +rælingen.no +rodoy.no +rødøy.no +romskog.no +rømskog.no +roros.no +røros.no +rost.no +røst.no +royken.no +røyken.no +royrvik.no +røyrvik.no +rade.no +rÃ¥de.no +salangen.no +siellak.no +saltdal.no +sálát.no +sálat.no +samnanger.no +sande.more-og-romsdal.no +sande.møre-og-romsdal.no +sande.vestfold.no +sandefjord.no +sandnes.no +sandoy.no +sandøy.no +sarpsborg.no +sauda.no +sauherad.no +sel.no +selbu.no +selje.no +seljord.no +sigdal.no +siljan.no +sirdal.no +skaun.no +skedsmo.no +ski.no +skien.no +skiptvet.no +skjervoy.no +skjervøy.no +skierva.no +skiervá.no +skjak.no +skjÃ¥k.no +skodje.no +skanland.no +skÃ¥nland.no +skanit.no +skánit.no +smola.no +smøla.no +snillfjord.no +snasa.no +snÃ¥sa.no +snoasa.no +snaase.no +snÃ¥ase.no +sogndal.no +sokndal.no +sola.no +solund.no +songdalen.no +sortland.no +spydeberg.no +stange.no +stavanger.no +steigen.no +steinkjer.no +stjordal.no +stjørdal.no +stokke.no +stor-elvdal.no +stord.no +stordal.no +storfjord.no +omasvuotna.no +strand.no +stranda.no +stryn.no +sula.no +suldal.no +sund.no +sunndal.no +surnadal.no +sveio.no +svelvik.no +sykkylven.no +sogne.no +søgne.no +somna.no +sømna.no +sondre-land.no +søndre-land.no +sor-aurdal.no +sør-aurdal.no +sor-fron.no +sør-fron.no +sor-odal.no +sør-odal.no +sor-varanger.no +sør-varanger.no +matta-varjjat.no +mátta-várjjat.no +sorfold.no +sørfold.no +sorreisa.no +sørreisa.no +sorum.no +sørum.no +tana.no +deatnu.no +time.no +tingvoll.no +tinn.no +tjeldsund.no +dielddanuorri.no +tjome.no +tjøme.no +tokke.no +tolga.no +torsken.no +tranoy.no +tranøy.no +tromso.no +tromsø.no +tromsa.no +romsa.no +trondheim.no +troandin.no +trysil.no +trana.no +træna.no +trogstad.no +trøgstad.no +tvedestrand.no +tydal.no +tynset.no +tysfjord.no +divtasvuodna.no +divttasvuotna.no +tysnes.no +tysvar.no +tysvær.no +tonsberg.no +tønsberg.no +ullensaker.no +ullensvang.no +ulvik.no +utsira.no +vadso.no +vadsø.no +cahcesuolo.no +cáhcesuolo.no +vaksdal.no +valle.no +vang.no +vanylven.no +vardo.no +vardø.no +varggat.no +várggát.no +vefsn.no +vaapste.no +vega.no +vegarshei.no +vegÃ¥rshei.no +vennesla.no +verdal.no +verran.no +vestby.no +vestnes.no +vestre-slidre.no +vestre-toten.no +vestvagoy.no +vestvÃ¥gøy.no +vevelstad.no +vik.no +vikna.no +vindafjord.no +volda.no +voss.no +varoy.no +værøy.no +vagan.no +vÃ¥gan.no +voagat.no +vagsoy.no +vÃ¥gsøy.no +vaga.no +vÃ¥gÃ¥.no +valer.ostfold.no +vÃ¥ler.østfold.no +valer.hedmark.no +vÃ¥ler.hedmark.no + +// np : http://www.mos.com.np/register.html +*.np + +// nr : http://cenpac.net.nr/dns/index.html +nr +biz.nr +info.nr +gov.nr +edu.nr +org.nr +net.nr +com.nr + +// nu : http://en.wikipedia.org/wiki/.nu +nu + +// nz : http://en.wikipedia.org/wiki/.nz +*.nz + +// om : http://en.wikipedia.org/wiki/.om +*.om + +// org : http://en.wikipedia.org/wiki/.og +org + +// pa : http://www.nic.pa/ +*.pa + +// pe : http://www.nic.pe/normas-proced-i.htm +*.pe + +// pf : http://www.gobin.info/domainname/formulaire-pf.pdf +pf +com.pf +org.pf +edu.pf + +// pg : http://en.wikipedia.org/wiki/.pg +*.pg + +// ph : http://www.domains.ph/FAQ2.asp +// list of 2nd level tlds ? +ph +com.ph +net.ph +org.ph +gov.ph +edu.ph +ngo.ph +mil.ph + +// pk : http://pk5.pknic.net.pk/pk5/msgNamepk.PK +pk +com.pk +net.pk +edu.pk +org.pk +fam.pk +biz.pk +web.pk +gov.pk +gob.pk +gok.pk +gon.pk +gop.pk +gos.pk +goa.pk +info.pk + +// pl : http://www.dns.pl/english/ +pl +// NASK functional domains (nask.pl / dns.pl) : http://www.dns.pl/english/dns-funk.html +aid.pl +agro.pl +atm.pl +auto.pl +biz.pl +com.pl +edu.pl +gmina.pl +gsm.pl +info.pl +mail.pl +miasta.pl +media.pl +mil.pl +net.pl +nieruchomosci.pl +nom.pl +org.pl +pc.pl +powiat.pl +priv.pl +realestate.pl +rel.pl +sex.pl +shop.pl +sklep.pl +sos.pl +szkola.pl +targi.pl +tm.pl +tourism.pl +travel.pl +turystyka.pl +// ICM functional domains (icm.edu.pl) +6bone.pl +art.pl +mbone.pl +// Government domains (administred by ippt.gov.pl) +gov.pl +uw.gov.pl +um.gov.pl +ug.gov.pl +upow.gov.pl +starostwo.gov.pl +so.gov.pl +sr.gov.pl +po.gov.pl +pa.gov.pl +// other functional domains +med.pl +ngo.pl +irc.pl +usenet.pl +// NASK geographical domains : http://www.dns.pl/english/dns-regiony.html +augustow.pl +babia-gora.pl +bedzin.pl +beskidy.pl +bialowieza.pl +bialystok.pl +bielawa.pl +bieszczady.pl +boleslawiec.pl +bydgoszcz.pl +bytom.pl +cieszyn.pl +czeladz.pl +czest.pl +dlugoleka.pl +elblag.pl +elk.pl +glogow.pl +gniezno.pl +gorlice.pl +grajewo.pl +ilawa.pl +jaworzno.pl +jelenia-gora.pl +jgora.pl +kalisz.pl +kazimierz-dolny.pl +karpacz.pl +kartuzy.pl +kaszuby.pl +katowice.pl +kepno.pl +ketrzyn.pl +klodzko.pl +kobierzyce.pl +kolobrzeg.pl +konin.pl +konskowola.pl +kutno.pl +lapy.pl +lebork.pl +legnica.pl +lezajsk.pl +limanowa.pl +lomza.pl +lowicz.pl +lubin.pl +lukow.pl +malbork.pl +malopolska.pl +mazowsze.pl +mazury.pl +mielec.pl +mielno.pl +mragowo.pl +naklo.pl +nowaruda.pl +nysa.pl +olawa.pl +olecko.pl +olkusz.pl +olsztyn.pl +opoczno.pl +opole.pl +ostroda.pl +ostroleka.pl +ostrowiec.pl +ostrowwlkp.pl +pila.pl +pisz.pl +podhale.pl +podlasie.pl +polkowice.pl +pomorze.pl +pomorskie.pl +prochowice.pl +pruszkow.pl +przeworsk.pl +pulawy.pl +radom.pl +rawa-maz.pl +rybnik.pl +rzeszow.pl +sanok.pl +sejny.pl +slask.pl +slupsk.pl +sosnowiec.pl +stalowa-wola.pl +skoczow.pl +starachowice.pl +stargard.pl +suwalki.pl +swidnica.pl +swiebodzin.pl +swinoujscie.pl +szczecin.pl +szczytno.pl +tarnobrzeg.pl +tgory.pl +turek.pl +tychy.pl +ustka.pl +walbrzych.pl +warmia.pl +warszawa.pl +waw.pl +wegrow.pl +wielun.pl +wlocl.pl +wloclawek.pl +wodzislaw.pl +wolomin.pl +wroclaw.pl +zachpomor.pl +zagan.pl +zarow.pl +zgora.pl +zgorzelec.pl +// TASK geographical domains (www.task.gda.pl/uslugi/dns) +gda.pl +gdansk.pl +gdynia.pl +sopot.pl +// other geographical domains +gliwice.pl +krakow.pl +poznan.pl +wroc.pl +zakopane.pl + +// pn : http://www.government.pn/PnRegistry/policies.htm +pn +gov.pn +co.pn +org.pn +edu.pn +net.pn + +// pr : http://www.nic.pr/index.asp?f=1 +pr +com.pr +net.pr +org.pr +gov.pr +edu.pr +isla.pr +pro.pr +biz.pr +info.pr +name.pr +// these aren't mentioned on nic.pr, but on http://en.wikipedia.org/wiki/.pr +est.pr +prof.pr +ac.pr + +// pro : http://www.nic.pro/support_faq.htm +pro +aca.pro +bar.pro +cpa.pro +jur.pro +law.pro +med.pro +eng.pro + +// ps : http://en.wikipedia.org/wiki/.ps +// list of 2nd level tlds ? +ps +edu.ps +gov.ps +sec.ps +plo.ps +com.ps +org.ps +net.ps + +// pt : http://online.dns.pt/dns/start_dns +pt +net.pt +gov.pt +org.pt +edu.pt +int.pt +publ.pt +com.pt +nome.pt + +// pw : http://en.wikipedia.org/wiki/.pw +*.pw + +// py : http://www.nic.py/faq_a.html#faq_b +*.py + +// qa : http://www.qatar.net.qa/services/virtual.htm +*.qa + +// re : http://www.afnic.re/obtenir/chartes/nommage-re/annexe-descriptifs +re +com.re +asso.re +nom.re + +// ro : http://www.rotld.ro/ +ro +com.ro +org.ro +tm.ro +nt.ro +nom.ro +info.ro +rec.ro +arts.ro +firm.ro +store.ro +www.ro + +// ru : http://en.wikipedia.org/wiki/.ru +ru +com.ru +net.ru +org.ru +pp.ru +int.ru +// there should be geo-names like msk.ru, but I didn't find a list + +// rw : http://www.nic.rw/cgi-bin/policy.pl +rw +gov.rw +net.rw +edu.rw +ac.rw +com.rw +co.rw +int.rw +mil.rw +gouv.rw + +// sa : http://www.saudinic.net.sa/page.php?page=1&lang=1 +*.sa + +// sb : http://www.sbnic.net.sb/ +*.sb + +// sc : http://www.nic.sc/ +sc +com.sc +gov.sc +net.sc +org.sc +edu.sc + +// sd : http://www.isoc.sd/sudanic.isoc.sd/billing_pricing.htm +sd +com.sd +net.sd +org.sd +edu.sd +med.sd +tv.sd +gov.sd +info.sd + +// se : http://en.wikipedia.org/wiki/.se +se +org.se +pp.se +tm.se +parti.se +press.se +mil.se +// se geographical names +ab.se +c.se +d.se +e.se +f.se +g.se +h.se +i.se +k.se +m.se +n.se +o.se +s.se +t.se +u.se +w.se +x.se +y.se +z.se +ac.se +bd.se + +// sg : http://www.nic.net.sg/sub_policies_agreement/2ld.html +sg +com.sg +net.sg +org.sg +gov.sg +edu.sg +per.sg + +// sh : http://www.nic.sh/rules.html +// list of 2nd level domains ? +sh + +// si : http://en.wikipedia.org/wiki/.si +si + +// sk : http://en.wikipedia.org/wiki/.sk +sk + +// sl : http://en.wikipedia.org/wiki/.sl +// list of 2nd level domains ? +sl + +// sm : http://en.wikipedia.org/wiki/.sm +sm + +// sn : http://en.wikipedia.org/wiki/.sn +// list of 2nd level domains ? +sn + +// sr : http://en.wikipedia.org/wiki/.sr +sr + +// st : http://www.nic.st/html/policyrules/ +st + +// su : http://en.wikipedia.org/wiki/.su +su + +// sv : http://www.svnet.org.sv/svpolicy.html +*.sv + +// sy : http://www.gobin.info/domainname/sy.doc +*.sy + +// sz : http://en.wikipedia.org/wiki/.sz +// list of 2nd level domains ? +sz + +// tc : http://en.wikipedia.org/wiki/.tc +tc + +// td : http://en.wikipedia.org/wiki/.td +td + +// tf : http://en.wikipedia.org/wiki/.tf +tf + +// tg : http://en.wikipedia.org/wiki/.tg +// list of 2nd level domains ? +tg + +// th : http://en.wikipedia.org/wiki/.th +*.th + +// tj : http://www.nic.tj/policy.htm +tj +ac.tj +biz.tj +com.tj +co.tj +edu.tj +int.tj +name.tj +net.tj +org.tj +web.tj +gov.tj +go.tj +mil.tj + +// tk : http://en.wikipedia.org/wiki/.tk +tk + +// tl : http://en.wikipedia.org/wiki/.tl +// list of 2nd level tlds ? +tl + +// tm : http://www.nic.tm/rules.html +// list of 2nd level tlds ? +tm + +// tn : http://en.wikipedia.org/wiki/.tn +// list of 2nd level tlds ? +tn + +// to : http://en.wikipedia.org/wiki/.to +// list of 2nd level tlds ? +to + +// tr : http://en.wikipedia.org/wiki/.tr +*.tr + +// travel : http://en.wikipedia.org/wiki/.travel +travel + +// tt : http://www.nic.tt/ +tt +co.tt +com.tt +org.tt +net.tt +biz.tt +info.tt +pro.tt +int.tt +coop.tt +jobs.tt +mobi.tt +travel.tt +museum.tt +aero.tt +name.tt +gov.tt +edu.tt + +// tv : http://en.wikipedia.org/wiki/.tv +// list of 2nd level tlds ? +tv + +// tw : http://en.wikipedia.org/wiki/.tw +tw +edu.tw +gov.tw +mil.tw +com.tw +net.tw +org.tw +idv.tw +game.tw +ebiz.tw +club.tw +網路.tw +組織.tw +商æ¥.tw + +// tz : http://en.wikipedia.org/wiki/.tz +*.tz + +// ua : http://www.nic.net.ua/ +ua +com.ua +edu.ua +gov.ua +net.ua +org.ua +// ua geo-names +cherkassy.ua +chernigov.ua +chernovtsy.ua +ck.ua +cn.ua +crimea.ua +cv.ua +dn.ua +dnepropetrovsk.ua +donetsk.ua +dp.ua +if.ua +ivano-frankivsk.ua +kh.ua +kharkov.ua +kherson.ua +kiev.ua +kirovograd.ua +km.ua +kr.ua +ks.ua +lg.ua +lugansk.ua +lutsk.ua +lviv.ua +mk.ua +nikolaev.ua +od.ua +odessa.ua +pl.ua +poltava.ua +rovno.ua +rv.ua +sebastopol.ua +sumy.ua +te.ua +ternopil.ua +vinnica.ua +vn.ua +zaporizhzhe.ua +zp.ua +uz.ua +uzhgorod.ua +zhitomir.ua +zt.ua + +// ug : http://www.registry.co.ug/ +ug +co.ug +ac.ug +sc.ug +go.ug +ne.ug +or.ug + +// uk : http://en.wikipedia.org/wiki/.uk +*.uk +*.sch.uk +!bl.uk +!british-library.uk +!icnet.uk +!jet.uk +!nel.uk +!nls.uk +!national-library-scotland.uk +!parliament.uk + +// us : http://en.wikipedia.org/wiki/.us +us +dni.us +fed.us +isa.us +kids.us +nsn.us +// us geographic names +ak.us +al.us +ar.us +as.us +az.us +ca.us +co.us +ct.us +dc.us +de.us +fl.us +ga.us +gu.us +hi.us +ia.us +id.us +il.us +in.us +ks.us +ky.us +la.us +ma.us +md.us +me.us +mi.us +mn.us +mo.us +ms.us +mt.us +nc.us +nd.us +ne.us +nh.us +nj.us +nm.us +nv.us +ny.us +oh.us +ok.us +or.us +pa.us +pr.us +ri.us +sc.us +sd.us +tn.us +tx.us +ut.us +vi.us +vt.us +va.us +wa.us +wi.us +wv.us +wy.us +// the following rules would be only valid under the geo-name, but we can't express that +// *.*.us cities, counties, parishes, and townships (locality.state.us) +// !ci.*.*.us city government agencies (subdomain under locality) +// !town.*.*.us town government agencies (subdomain under locality) +// !co.*.*.us county government agencies (subdomain under locality) +// k12.*.us public school districts +// pvt.k12.*.us private schools +// cc.*.us community colleges +// tec.*.us technical and vocational schools +// lib.*.us state, regional, city, and county libraries +// state.*.us state government agencies +// gen.*.us general independent entities (groups not fitting into the above categories) + +// uy : http://www.antel.com.uy/ +*.uy + +// uz : http://www.reg.uz/registerr.html +// are there other 2nd level tlds ? +uz +com.uz +co.uz + +// va : http://en.wikipedia.org/wiki/.va +va + +// vc : http://en.wikipedia.org/wiki/.vc +// list of 2nd level tlds ? +vc + +// ve : http://registro.nic.ve/nicve/registro/index.html +*.ve + +// vg : http://en.wikipedia.org/wiki/.vg +vg + +// vi : http://www.nic.vi/Domain_Rules/body_domain_rules.html +vi +com.vi +org.vi +edu.vi +gov.vi + +// vn : https://www.dot.vn/vnnic/vnnic/domainregistration.jsp +vn +com.vn +net.vn +org.vn +edu.vn +gov.vn +int.vn +ac.vn +biz.vn +info.vn +name.vn +pro.vn +health.vn + +// vu : http://en.wikipedia.org/wiki/.vu +// list of 2nd level tlds ? +vu + +// ws : http://en.wikipedia.org/wiki/.ws +ws + +// ye : http://www.y.net.ye/services/domain_name.htm +*.ye + +// yu : http://www.nic.yu/pravilnik-e.html +*.yu + +// za : http://www.zadna.org.za/slds.html +*.za + +// zm : http://en.wikipedia.org/wiki/.zm +*.zm + +// zw : http://en.wikipedia.org/wiki/.zw +*.zw + diff --git a/net/base/escape.cc b/net/base/escape.cc new file mode 100644 index 0000000..bd4aa95 --- /dev/null +++ b/net/base/escape.cc @@ -0,0 +1,272 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <algorithm> + +#include "net/base/escape.h" + +#include "base/logging.h" +#include "base/string_util.h" + +namespace { + +template <class char_type> +inline bool IsHex(char_type ch) { + return (ch >= '0' && ch <= '9') || + (ch >= 'A' && ch <= 'F') || + (ch >= 'a' && ch <= 'f'); +} + +template <class char_type> +inline char_type HexToInt(char_type ch) { + if (ch >= '0' && ch <= '9') + return ch - '0'; + if (ch >= 'A' && ch <= 'F') + return ch - 'A' + 10; + if (ch >= 'a' && ch <= 'f') + return ch - 'a' + 10; + NOTREACHED(); + return 0; +} + +static const char* const kHexString = "0123456789ABCDEF"; +inline char IntToHex(int i) { + DCHECK(i >= 0 && i <= 15) << i << " not a hex value"; + return kHexString[i]; +} + +// A fast bit-vector map for ascii characters. +// +// Internally stores 256 bits in an array of 8 ints. +// Does quick bit-flicking to lookup needed characters. +class Charmap { + public: + Charmap(uint32 b0, uint32 b1, uint32 b2, uint32 b3, + uint32 b4, uint32 b5, uint32 b6, uint32 b7) { + map_[0] = b0; map_[1] = b1; map_[2] = b2; map_[3] = b3; + map_[4] = b4; map_[5] = b5; map_[6] = b6; map_[7] = b7; + } + + bool Contains(unsigned char c) const { + return (map_[c >> 5] & (1 << (c & 31))) ? true : false; + } + + private: + uint32 map_[8]; +}; + + +// Given text to escape and a Charmap defining which values to escape, +// return an escaped string. If use_plus is true, spaces are converted +// to +, otherwise, if spaces are in the charmap, they are converted to +// %20. +const std::string Escape(const std::string& text, const Charmap& charmap, + bool use_plus) { + std::string escaped; + escaped.reserve(text.length() * 3); + for (unsigned int i = 0; i < text.length(); ++i) { + unsigned char c = static_cast<unsigned char>(text[i]); + if (use_plus && ' ' == c) { + escaped.push_back('+'); + } else if (charmap.Contains(c)) { + escaped.push_back('%'); + escaped.push_back(IntToHex(c >> 4)); + escaped.push_back(IntToHex(c & 0xf)); + } else { + escaped.push_back(c); + } + } + return escaped; +} + +std::string UnescapeURLImpl(const std::string& escaped_text, + UnescapeRule::Type rules) { + // The output of the unescaping is always smaller than the input, so we can + // reserve the input size to make sure we have enough buffer and don't have + // to allocate in the loop below. + std::string result; + result.reserve(escaped_text.length()); + + for (size_t i = 0, max = escaped_text.size(), max_digit_index = max - 2; + i < max; ++i) { + if (escaped_text[i] == '%' && i < max_digit_index) { + const std::string::value_type most_sig_digit(escaped_text[i + 1]); + const std::string::value_type least_sig_digit(escaped_text[i + 2]); + if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) { + unsigned char value = HexToInt(most_sig_digit) * 16 + + HexToInt(least_sig_digit); + if (((rules & UnescapeRule::PERCENTS) || value != '%') && + ((rules & UnescapeRule::SPACES) || value != ' ')) { + // Use the unescaped version of the character. + result.push_back(value); + i += 2; + } else { + result.push_back('%'); + } + } else { + result.push_back('%'); + } + } else if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) && + escaped_text[i] == '+') { + result.push_back(' '); + } else { + result.push_back(escaped_text[i]); + } + } + + return result; +} + +} // namespace + +// Everything except alphanumerics and !'()*-._~ +// See RFC 2396 for the list of reserved characters. +static const Charmap kQueryCharmap( + 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, + 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); + +std::string EscapeQueryParamValue(const std::string& text) { + return Escape(text, kQueryCharmap, true); +} + +// Convert the string to a sequence of bytes and then % escape anything +// except alphanumerics and !'()*-._~ +std::wstring EscapeQueryParamValueUTF8(const std::wstring& text) { + return UTF8ToWide(Escape(WideToUTF8(text), kQueryCharmap, true)); +} + +// non-printable, non-7bit, and (including space) "#%:<>?[\]^`{|} +static const Charmap kPathCharmap( + 0xffffffffL, 0xd400002dL, 0x78000000L, 0xb8000001L, + 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); + +std::string EscapePath(const std::string& path) { + return Escape(path, kPathCharmap, false); +} + +// non-7bit +static const Charmap kNonASCIICharmap( + 0x00000000L, 0x00000000L, 0x00000000L, 0x00000000L, + 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); + +std::string EscapeNonASCII(const std::string& input) { + return Escape(input, kNonASCIICharmap, false); +} + +// Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and +// !'()*-._~% +static const Charmap kExternalHandlerCharmap( + 0xffffffffL, 0x5000080dL, 0x68000000L, 0xb8000001L, + 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); + +std::string EscapeExternalHandlerValue(const std::string& text) { + return Escape(text, kExternalHandlerCharmap, false); +} + +bool EscapeQueryParamValue(const std::wstring& text, const char* codepage, + std::wstring* escaped) { + // TODO(brettw) bug 1201094: this function should be removed, this "SKIP" + // behavior is wrong when the character can't be encoded properly. + std::string encoded; + if (!WideToCodepage(text, codepage, + OnStringUtilConversionError::SKIP, &encoded)) + return false; + + // It's safe to use UTF8ToWide here because Escape should only return + // alphanumerics and !'()*-._~ + escaped->assign(UTF8ToWide(Escape(encoded, kQueryCharmap, true))); + return true; +} + +std::wstring UnescapeAndDecodeURLComponent(const std::string& text, + const char* codepage, + UnescapeRule::Type rules) { + std::wstring result; + if (CodepageToWide(UnescapeURLImpl(text, rules), codepage, + OnStringUtilConversionError::FAIL, &result)) + return result; // Character set looks like it's valid. + return UTF8ToWide(text); // Return the escaped version when it's not. +} + +std::string UnescapeURLComponent(const std::string& escaped_text, + UnescapeRule::Type rules) { + return UnescapeURLImpl(escaped_text, rules); +} + +template <class str> +void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { + static const struct { + char key; + const char *replacement; + } kCharsToEscape[] = { + { '<', "<" }, + { '>', ">" }, + { '&', "&" }, + { '"', """ }, + { '\'', "'" }, + }; + size_t k; + for (k = 0; k < arraysize(kCharsToEscape); ++k) { + if (c == kCharsToEscape[k].key) { + const char* p = kCharsToEscape[k].replacement; + while (*p) + output->push_back(*p++); + break; + } + } + if (k == arraysize(kCharsToEscape)) + output->push_back(c); +} + +void AppendEscapedCharForHTML(char c, std::string* output) { + AppendEscapedCharForHTMLImpl(c, output); +} + +void AppendEscapedCharForHTML(wchar_t c, std::wstring* output) { + AppendEscapedCharForHTMLImpl(c, output); +} + +template <class str> +str EscapeForHTMLImpl(const str& input) { + str result; + result.reserve(input.size()); // optimize for no escaping + + for (str::const_iterator it = input.begin(); it != input.end(); ++it) + AppendEscapedCharForHTMLImpl(*it, &result); + + return result; +} + +std::string EscapeForHTML(const std::string& input) { + return EscapeForHTMLImpl(input); +} + +std::wstring EscapeForHTML(const std::wstring& input) { + return EscapeForHTMLImpl(input); +} diff --git a/net/base/escape.h b/net/base/escape.h new file mode 100644 index 0000000..220eebc --- /dev/null +++ b/net/base/escape.h @@ -0,0 +1,141 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_ESCAPE_H__ +#define NET_BASE_ESCAPE_H__ + +#include <string> + +#include "base/basictypes.h" + +// Escaping -------------------------------------------------------------------- + +// Escape a file or url path. This includes: +// non-printable, non-7bit, and (including space) "#%:<>?[\]^`{|} +std::string EscapePath(const std::string& path); + +// Escape all non-ASCII input. +std::string EscapeNonASCII(const std::string& input); + +// Escapes characters in text suitable for use as an external protocol handler +// command. +// We %XX everything except alphanumerics and %-_.!~*'() and the restricted +// chracters (;/?:@&=+$,). +std::string EscapeExternalHandlerValue(const std::string& text); + +// Append the given character to the output string, escaping the character if +// the character would be interpretted as an HTML delimiter. +void AppendEscapedCharForHTML(char c, std::string* output); + +// Escape chars that might cause this text to be interpretted as HTML tags. +std::string EscapeForHTML(const std::string& text); + +// Unescaping ------------------------------------------------------------------ + +class UnescapeRule { + public: + // A combination of the following flags that is passed to the unescaping + // functions. + typedef uint32 Type; + + enum { + // Don't unescape anything special, but all normal unescaping will happen. + // This is a placeholder and can't be combined with other flags (since it's + // just the absense of them). Things like escaped letters, digits, and most + // symbols will get unescaped with this mode. + NORMAL = 0, + + // Convert %20 to spaces. In some places where we're showing URLs, we may + // want this. In places where the URL may be copied and pasted out, then + // you wouldn't want this since it might not be interpreted in one piece + // by other applications. + SPACES = 1, + + // Unescapes "%25" to "%". This must not be used when the resulting string + // will need to be interpreted as a URL again, since we won't know what + // should be escaped and what shouldn't. For example, "%2520" would be + // converted to "%20" which would have different meaning than the origina. + // This flag is used when generating final output like filenames for URLs + // where we won't be interpreting as a URL and want to do as much unescaping + // as possible. + PERCENTS = 2, + + // URL queries use "+" for space. This flag controls that replacement. + REPLACE_PLUS_WITH_SPACE = 4, + }; +}; + +// Unescapes |escaped_text| and returns the result. +// Unescaping consists of looking for the exact pattern "%XX", where each X is +// a hex digit, and converting to the character with the numerical value of +// those digits. Thus "i%20=%203%3b" unescapes to "i = 3;". +// +// Watch out: this doesn't necessarily result in the correct final result, +// because the encoding may be unknown. For example, the input might be ASCII, +// which, after unescaping, is supposed to be interpreted as UTF-8, and then +// converted into full wide chars. This function won't tell you if any +// conversions need to take place, it only unescapes. +std::string UnescapeURLComponent(const std::string& escaped_text, + UnescapeRule::Type rules); + +// Unescapes the given substring as a URL, and then tries to interpret the +// result as being encoded in the given code page. If the result is convertable +// into the code page, it will be returned as converted. If it is not, the +// original escaped string will be converted into a wide string and returned. +std::wstring UnescapeAndDecodeURLComponent(const std::string& text, + const char* codepage, + UnescapeRule::Type rules); +inline std::wstring UnescapeAndDecodeUTF8URLComponent( + const std::string& text, + UnescapeRule::Type rules) { + return UnescapeAndDecodeURLComponent(text, "UTF-8", rules); +} + +// Deprecated ------------------------------------------------------------------ + +// Escapes characters in text suitable for use as a query parameter value. +// We %XX everything except alphanumerics and -_.!~*'() +// This is basically the same as encodeURIComponent in javascript. +// For the wstring version, we do a conversion to charset before encoding the +// string. If the charset doesn't exist, we return false. +// +// TODO(brettw) bug 1201094: This function should be removed. See the bug for +// why and what callers should do instead. +std::string EscapeQueryParamValue(const std::string& text); +bool EscapeQueryParamValue(const std::wstring& text, const char* codepage, + std::wstring* escaped); + +// A specialized version of EscapeQueryParamValue for wide strings that +// assumes the codepage is UTF8. This is provided as a convenience. +// +// TODO(brettw) bug 1201094: This function should be removed. See the bug for +// why and what callers should do instead. +std::wstring EscapeQueryParamValueUTF8(const std::wstring& text); + +#endif // #ifndef NET_BASE_ESCAPE_H__ diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc new file mode 100644 index 0000000..d2d0288 --- /dev/null +++ b/net/base/escape_unittest.cc @@ -0,0 +1,229 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <string> + +#include "net/base/escape.h" + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/string_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +struct unescape_case { + const char* input; + const char* output; +}; + +TEST(Escape, EscapeTextForFormSubmission) { + struct escape_case { + const wchar_t* input; + const wchar_t* output; + } escape_cases[] = { + {L"foo", L"foo"}, + {L"foo bar", L"foo+bar"}, + {L"foo++", L"foo%2B%2B"} + }; + for (int i = 0; i < arraysize(escape_cases); ++i) { + escape_case value = escape_cases[i]; + EXPECT_EQ(value.output, EscapeQueryParamValueUTF8(value.input)); + } + + // Test all the values in we're supposed to be escaping. + const std::string no_escape( + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789" + "!'()*-._~"); + for (int i = 0; i < 256; ++i) { + std::string in; + in.push_back(i); + std::string out = EscapeQueryParamValue(in); + if (0 == i) { + EXPECT_EQ(out, std::string("%00")); + } else if (32 == i) { + // Spaces are plus escaped like web forms. + EXPECT_EQ(out, std::string("+")); + } else if (no_escape.find(in) == std::string::npos) { + // Check %hex escaping + char buf[4]; + sprintf_s(buf, 4, "%%%02X", i); + EXPECT_EQ(std::string(buf), out); + } else { + // No change for things in the no_escape list. + EXPECT_EQ(out, in); + } + } + + // Check to see if EscapeQueryParamValueUTF8 is the same as + // EscapeQueryParamValue(..., kCodepageUTF8,) + std::wstring test_str; + test_str.reserve(5000); + for (int i = 1; i < 5000; ++i) { + test_str.push_back(i); + } + std::wstring wide; + EXPECT_TRUE(EscapeQueryParamValue(test_str, kCodepageUTF8, &wide)); + EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str)); +} + +TEST(Escape, EscapePath) { + ASSERT_EQ( + // Most of the character space we care about, un-escaped + EscapePath( + "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" + "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "[\\]^_`abcdefghijklmnopqrstuvwxyz" + "{|}~\x7f\x80\xff"), + // Escaped + "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;" + "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" + "%7B%7C%7D~%7F%80%FF"); +} + +TEST(Escape, UnescapeURLComponent) { + struct UnescapeCase { + const char* input; + UnescapeRule::Type rules; + const char* output; + } unescape_cases[] = { + {"", UnescapeRule::NORMAL, ""}, + {"%2", UnescapeRule::NORMAL, "%2"}, + {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"}, + {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"}, + {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"}, + {"Some%20random text %25%3bOK", UnescapeRule::NORMAL, "Some%20random text %25;OK"}, + {"Some%20random text %25%3bOK", UnescapeRule::SPACES, "Some random text %25;OK"}, + {"Some%20random text %25%3bOK", UnescapeRule::PERCENTS, "Some%20random text %;OK"}, + {"Some%20random text %25%3bOK", UnescapeRule::SPACES | UnescapeRule::PERCENTS, "Some random text %;OK"}, + {"%01%02%03%04%05%06%07%08%09", UnescapeRule::NORMAL, "\x01\x02\x03\x04\x05\x06\x07\x08\x09"}, + {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"}, + {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"} + }; + + for (int i = 0; i < arraysize(unescape_cases); i++) { + std::string str(unescape_cases[i].input); + EXPECT_EQ(std::string(unescape_cases[i].output), + UnescapeURLComponent(str, unescape_cases[i].rules)); + } + + // test the NULL character escaping (which wouldn't work above since those + // are just char pointers) + std::string input("Null"); + input.push_back(0); // Also have a NULL in the input. + input.append("%00%39Test"); + + std::string expected("Null"); + expected.push_back(0); + expected.push_back(0); + expected.append("9Test"); + + EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); +} + +TEST(Escape, UnescapeAndDecodeURLComponent) { + struct UnescapeCase { + const char* encoding; + const char* input; + + // The expected output when run through UnescapeURL. + const char* url_unescaped; + + // The expected output when run through UnescapeQuery. + const char* query_unescaped; + + // The expected output when run through UnescapeAndDecodeURLComponent. + const wchar_t* decoded; + } unescape_cases[] = { + {"UTF8", "+", "+", " ", L"+"}, + {"UTF8", "%2+", "%2+", "%2 ", L"%2+"}, + {"UTF8", "+%%%+%%%", "+%%%+%%%", " %%% %%%", L"+%%%+%%%"}, + {"UTF8", "Don't escape anything", + "Don't escape anything", + "Don't escape anything", + L"Don't escape anything"}, + {"UTF8", "+Invalid %escape %2+", + "+Invalid %escape %2+", + " Invalid %escape %2 ", + L"+Invalid %escape %2+"}, + {"UTF8", "Some random text %25%3bOK", + "Some random text %25;OK", + "Some random text %25;OK", + L"Some random text %25;OK"}, + {"UTF8", "%01%02%03%04%05%06%07%08%09", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09", + L"\x01\x02\x03\x04\x05\x06\x07\x08\x09"}, + {"UTF8", "%E4%BD%A0+%E5%A5%BD", + "\xE4\xBD\xA0+\xE5\xA5\xBD", + "\xE4\xBD\xA0 \xE5\xA5\xBD", + L"\x4f60+\x597d"}, + {"BIG5", "%A7A%A6n", + "\xA7\x41\xA6n", + "\xA7\x41\xA6n", + L"\x4f60\x597d"}, + {"UTF8", "%ED%ED", // Invalid UTF-8. + "\xED\xED", + "\xED\xED", + L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped. + }; + + for (int i = 0; i < arraysize(unescape_cases); i++) { + std::string unescaped = UnescapeURLComponent(unescape_cases[i].input, + UnescapeRule::NORMAL); + EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped); + + unescaped = UnescapeURLComponent(unescape_cases[i].input, + UnescapeRule::REPLACE_PLUS_WITH_SPACE); + EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped); + + // TODO: Need to test unescape_spaces and unescape_percent. + std::wstring decoded = UnescapeAndDecodeURLComponent( + unescape_cases[i].input, unescape_cases[i].encoding, + UnescapeRule::NORMAL); + EXPECT_EQ(std::wstring(unescape_cases[i].decoded), decoded); + } +} + +TEST(Escape, EscapeForHTML) { + static const struct { + const char* input; + const char* expected_output; + } tests[] = { + { "hello", "hello" }, + { "<hello>", "<hello>" }, + { "don\'t mess with me", "don't mess with me" }, + }; + for (size_t i = 0; i < arraysize(tests); ++i) { + std::string result = EscapeForHTML(std::string(tests[i].input)); + EXPECT_EQ(std::string(tests[i].expected_output), result); + } +} + diff --git a/net/base/ev_root_ca_metadata.cc b/net/base/ev_root_ca_metadata.cc new file mode 100644 index 0000000..062e29e --- /dev/null +++ b/net/base/ev_root_ca_metadata.cc @@ -0,0 +1,205 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/ev_root_ca_metadata.h" + +namespace net { + +// Raw metadata. +struct EVMetadata { + // The SHA-1 fingerprint of the root CA certificate, used as a unique + // identifier for a root CA certificate. + X509Certificate::Fingerprint fingerprint; + + // The EV policy OID of the root CA. + // Note: a root CA may have multiple EV policies. When that actually + // happens, we'll need to support that. + const char* policy_oid; +}; + +static const EVMetadata ev_root_ca_metadata[] = { + // COMODO Certification Authority + // https://secure.comodo.com/ + { { 0x66, 0x31, 0xbf, 0x9e, 0xf7, 0x4f, 0x9e, 0xb6, 0xc9, 0xd5, + 0xa6, 0x0c, 0xba, 0x6a, 0xbe, 0xd1, 0xf7, 0xbd, 0xef, 0x7b }, + "1.3.6.1.4.1.6449.1.2.1.5.1" + }, + // DigiCert High Assurance EV Root CA + // https://www.digicert.com + { { 0x5f, 0xb7, 0xee, 0x06, 0x33, 0xe2, 0x59, 0xdb, 0xad, 0x0c, + 0x4c, 0x9a, 0xe6, 0xd3, 0x8f, 0x1a, 0x61, 0xc7, 0xdc, 0x25 }, + "2.16.840.1.114412.2.1" + }, + // Entrust.net Secure Server Certification Authority + // https://www.entrust.net/ + { { 0x99, 0xa6, 0x9b, 0xe6, 0x1a, 0xfe, 0x88, 0x6b, 0x4d, 0x2b, + 0x82, 0x00, 0x7c, 0xb8, 0x54, 0xfc, 0x31, 0x7e, 0x15, 0x39 }, + "2.16.840.1.114028.10.1.2" + }, + // Entrust Root Certification Authority + // https://www.entrust.net/ + { { 0xb3, 0x1e, 0xb1, 0xb7, 0x40, 0xe3, 0x6c, 0x84, 0x02, 0xda, + 0xdc, 0x37, 0xd4, 0x4d, 0xf5, 0xd4, 0x67, 0x49, 0x52, 0xf9 }, + "2.16.840.1.114028.10.1.2" + }, + // Equifax Secure Certificate Authority (GeoTrust) + // https://www.geotrust.com/ + { { 0xd2, 0x32, 0x09, 0xad, 0x23, 0xd3, 0x14, 0x23, 0x21, 0x74, + 0xe4, 0x0d, 0x7f, 0x9d, 0x62, 0x13, 0x97, 0x86, 0x63, 0x3a }, + "1.3.6.1.4.1.14370.1.6" + }, + // GeoTrust Primary Certification Authority + // https://www.geotrust.com/ + { { 0x32, 0x3c, 0x11, 0x8e, 0x1b, 0xf7, 0xb8, 0xb6, 0x52, 0x54, + 0xe2, 0xe2, 0x10, 0x0d, 0xd6, 0x02, 0x90, 0x37, 0xf0, 0x96 }, + "1.3.6.1.4.1.14370.1.6" + }, + // Go Daddy Class 2 Certification Authority + // https://www.godaddy.com/ + { { 0x27, 0x96, 0xba, 0xe6, 0x3f, 0x18, 0x01, 0xe2, 0x77, 0x26, + 0x1b, 0xa0, 0xd7, 0x77, 0x70, 0x02, 0x8f, 0x20, 0xee, 0xe4 }, + "2.16.840.1.114413.1.7.23.3" + }, + // Network Solutions Certificate Authority + // https://www.networksolutions.com/website-packages/index.jsp + { { 0x74, 0xf8, 0xa3, 0xc3, 0xef, 0xe7, 0xb3, 0x90, 0x06, 0x4b, + 0x83, 0x90, 0x3c, 0x21, 0x64, 0x60, 0x20, 0xe5, 0xdf, 0xce }, + "1.3.6.1.4.1.782.1.2.1.8.1" + }, + // QuoVadis Root CA 2 + // https://www.quovadis.bm/ + { { 0xca, 0x3a, 0xfb, 0xcf, 0x12, 0x40, 0x36, 0x4b, 0x44, 0xb2, + 0x16, 0x20, 0x88, 0x80, 0x48, 0x39, 0x19, 0x93, 0x7c, 0xf7 }, + "1.3.6.1.4.1.8024.0.2.100.1.2" + }, + // SecureTrust CA, SecureTrust Corporation + // https://www.securetrust.com + // https://www.trustwave.com/ + { { 0x87, 0x82, 0xc6, 0xc3, 0x04, 0x35, 0x3b, 0xcf, 0xd2, 0x96, + 0x92, 0xd2, 0x59, 0x3e, 0x7d, 0x44, 0xd9, 0x34, 0xff, 0x11 }, + "2.16.840.1.114404.1.1.2.4.1" + }, + // Secure Global CA, SecureTrust Corporation + { { 0x3a, 0x44, 0x73, 0x5a, 0xe5, 0x81, 0x90, 0x1f, 0x24, 0x86, + 0x61, 0x46, 0x1e, 0x3b, 0x9c, 0xc4, 0x5f, 0xf5, 0x3a, 0x1b }, + "2.16.840.1.114404.1.1.2.4.1" + }, + // Starfield Class 2 Certification Authority + // https://www.starfieldtech.com/ + { { 0xad, 0x7e, 0x1c, 0x28, 0xb0, 0x64, 0xef, 0x8f, 0x60, 0x03, + 0x40, 0x20, 0x14, 0xc3, 0xd0, 0xe3, 0x37, 0x0e, 0xb5, 0x8a }, + "2.16.840.1.114414.1.7.23.3" + }, + // Thawte Premium Server CA + // https://www.thawte.com/ + { { 0x62, 0x7f, 0x8d, 0x78, 0x27, 0x65, 0x63, 0x99, 0xd2, 0x7d, + 0x7f, 0x90, 0x44, 0xc9, 0xfe, 0xb3, 0xf3, 0x3e, 0xfa, 0x9a }, + "2.16.840.1.113733.1.7.48.1" + }, + // thawte Primary Root CA + // https://www.thawte.com/ + { { 0x91, 0xc6, 0xd6, 0xee, 0x3e, 0x8a, 0xc8, 0x63, 0x84, 0xe5, + 0x48, 0xc2, 0x99, 0x29, 0x5c, 0x75, 0x6c, 0x81, 0x7b, 0x81 }, + "2.16.840.1.113733.1.7.48.1" + }, + // UTN - DATACorp SGC + { { 0x58, 0x11, 0x9f, 0x0e, 0x12, 0x82, 0x87, 0xea, 0x50, 0xfd, + 0xd9, 0x87, 0x45, 0x6f, 0x4f, 0x78, 0xdc, 0xfa, 0xd6, 0xd4 }, + "1.3.6.1.4.1.6449.1.2.1.5.1" + }, + // UTN-USERFirst-Hardware + { { 0x04, 0x83, 0xed, 0x33, 0x99, 0xac, 0x36, 0x08, 0x05, 0x87, + 0x22, 0xed, 0xbc, 0x5e, 0x46, 0x00, 0xe3, 0xbe, 0xf9, 0xd7 }, + "1.3.6.1.4.1.6449.1.2.1.5.1" + }, + // ValiCert Class 2 Policy Validation Authority + // TODO(wtc): bug 1165107: this CA has another policy OID + // "2.16.840.1.114414.1.7.23.3". + { { 0x31, 0x7a, 0x2a, 0xd0, 0x7f, 0x2b, 0x33, 0x5e, 0xf5, 0xa1, + 0xc3, 0x4e, 0x4b, 0x57, 0xe8, 0xb7, 0xd8, 0xf1, 0xfc, 0xa6 }, + "2.16.840.1.114413.1.7.23.3" + }, + // VeriSign Class 3 Public Primary Certification Authority + // https://www.verisign.com/ + { { 0x74, 0x2c, 0x31, 0x92, 0xe6, 0x07, 0xe4, 0x24, 0xeb, 0x45, + 0x49, 0x54, 0x2b, 0xe1, 0xbb, 0xc5, 0x3e, 0x61, 0x74, 0xe2 }, + "2.16.840.1.113733.1.7.23.6" + }, + // VeriSign Class 3 Public Primary Certification Authority - G5 + // https://www.verisign.com/ + { { 0x4e, 0xb6, 0xd5, 0x78, 0x49, 0x9b, 0x1c, 0xcf, 0x5f, 0x58, + 0x1e, 0xad, 0x56, 0xbe, 0x3d, 0x9b, 0x67, 0x44, 0xa5, 0xe5 }, + "2.16.840.1.113733.1.7.23.6" + }, + // XRamp Global Certification Authority + { { 0xb8, 0x01, 0x86, 0xd1, 0xeb, 0x9c, 0x86, 0xa5, 0x41, 0x04, + 0xcf, 0x30, 0x54, 0xf3, 0x4c, 0x52, 0xb7, 0xe5, 0x58, 0xc6 }, + "2.16.840.1.114404.1.1.2.4.1" + }, +}; + +// static +EVRootCAMetadata* EVRootCAMetadata::instance_; + +// static +EVRootCAMetadata* EVRootCAMetadata::GetInstance() { + if (!instance_) { + EVRootCAMetadata* new_instance = new EVRootCAMetadata; + if (InterlockedCompareExchangePointer( + reinterpret_cast<PVOID*>(&instance_), new_instance, NULL)) + delete new_instance; + } + return instance_; +} + +bool EVRootCAMetadata::GetPolicyOID( + const X509Certificate::Fingerprint& fingerprint, + std::string* policy_oid) const { + StringMap::const_iterator iter = ev_policy_.find(fingerprint); + if (iter == ev_policy_.end()) + return false; + *policy_oid = iter->second; + return true; +} + +EVRootCAMetadata::EVRootCAMetadata() { + // Constructs the object from the raw metadata in ev_root_ca_metadata. + num_policy_oids_ = arraysize(ev_root_ca_metadata); + policy_oids_.reset(new const char*[num_policy_oids_]); + for (int i = 0; i < arraysize(ev_root_ca_metadata); i++) { + const EVMetadata& metadata = ev_root_ca_metadata[i]; + ev_policy_[metadata.fingerprint] = metadata.policy_oid; + // Multiple root CA certs may use the same EV policy OID. Having + // duplicates in the policy_oids_ array does no harm, so we don't + // bother detecting duplicates. + policy_oids_[i] = metadata.policy_oid; + } +} + +} // namespace net diff --git a/net/base/ev_root_ca_metadata.h b/net/base/ev_root_ca_metadata.h new file mode 100644 index 0000000..a849a92 --- /dev/null +++ b/net/base/ev_root_ca_metadata.h @@ -0,0 +1,76 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_EV_ROOT_CA_METADATA_H__ +#define NET_BASE_EV_ROOT_CA_METADATA_H__ + +#include <map> + +#include "net/base/x509_certificate.h" + +namespace net { + +// A singleton. This class stores the meta data of the root CAs that issue +// extended-validation (EV) certificates. +class EVRootCAMetadata { + public: + static EVRootCAMetadata* GetInstance(); + + // If the root CA cert has an EV policy OID, returns true and stores the + // policy OID in *policy_oid. Otherwise, returns false. + bool GetPolicyOID(const X509Certificate::Fingerprint& fingerprint, + std::string* policy_oid) const; + + const char* const* GetPolicyOIDs() const { return policy_oids_.get(); } + int NumPolicyOIDs() const { return num_policy_oids_; } + + private: + EVRootCAMetadata(); + ~EVRootCAMetadata() { } + + static EVRootCAMetadata* instance_; + + typedef std::map<X509Certificate::Fingerprint, std::string, + X509Certificate::FingerprintLessThan> StringMap; + + // Maps an EV root CA cert's SHA-1 fingerprint to its EV policy OID. + StringMap ev_policy_; + + // Contains dotted-decimal OID strings (in ASCII). This is a C array of + // C strings so that it can be passed directly to Windows CryptoAPI as + // LPSTR*. + scoped_array<const char*> policy_oids_; + int num_policy_oids_; + + DISALLOW_EVIL_CONSTRUCTORS(EVRootCAMetadata); +}; + +} // namespace net + +#endif // NET_BASE_EV_ROOT_CA_METADATA_H__
\ No newline at end of file diff --git a/net/base/filter.cc b/net/base/filter.cc new file mode 100644 index 0000000..508e2d4 --- /dev/null +++ b/net/base/filter.cc @@ -0,0 +1,182 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/filter.h" + +#include "base/string_util.h" +#include "net/base/gzip_filter.h" +#include "net/base/bzip2_filter.h" + +namespace { + +// Filter types: +const char kDeflate[] = "deflate"; +const char kGZip[] = "gzip"; +const char kXGZip[] = "x-gzip"; +const char kBZip2[] = "bzip2"; +const char kXBZip2[] = "x-bzip2"; +// compress and x-compress are currently not supported. If we decide to support +// them, we'll need the same mime type compatibility hack we have for gzip. For +// more information, see Firefox's nsHttpChannel::ProcessNormal. +const char kCompress[] = "compress"; +const char kXCompress[] = "x-compress"; +const char kIdentity[] = "identity"; +const char kUncompressed[] = "uncompressed"; + +// Mime types: +const char kApplicationXGzip[] = "application/x-gzip"; +const char kApplicationGzip[] = "application/gzip"; +const char kApplicationXGunzip[] = "application/x-gunzip"; +const char kApplicationXCompress[] = "application/x-compress"; +const char kApplicationCompress[] = "application/compress"; + +} // namespace + +Filter* Filter::Factory(const std::string& filter_type, + const std::string& mime_type, + int buffer_size) { + if (filter_type.empty() || buffer_size < 0) + return NULL; + + FilterType type_id; + if (LowerCaseEqualsASCII(filter_type, kDeflate)) { + type_id = FILTER_TYPE_DEFLATE; + } else if (LowerCaseEqualsASCII(filter_type, kGZip) || + LowerCaseEqualsASCII(filter_type, kXGZip)) { + if (LowerCaseEqualsASCII(mime_type, kApplicationXGzip) || + LowerCaseEqualsASCII(mime_type, kApplicationGzip) || + LowerCaseEqualsASCII(mime_type, kApplicationXGunzip)) { + // The server has told us that it sent us gziped content with a gzip + // content encoding. Sadly, Apache mistakenly sets these headers for all + // .gz files. We match Firefox's nsHttpChannel::ProcessNormal and ignore + // the Content-Encoding here. + type_id = FILTER_TYPE_UNSUPPORTED; + } else { + type_id = FILTER_TYPE_GZIP; + } + } else if (LowerCaseEqualsASCII(filter_type, kBZip2) || + LowerCaseEqualsASCII(filter_type, kXBZip2)) { + type_id = FILTER_TYPE_BZIP2; + } else { + // Note we also consider "identity" and "uncompressed" UNSUPPORTED as + // filter should be disabled in such cases. + type_id = FILTER_TYPE_UNSUPPORTED; + } + + switch (type_id) { + case FILTER_TYPE_DEFLATE: + case FILTER_TYPE_GZIP: { + scoped_ptr<GZipFilter> gz_filter(new GZipFilter()); + if (gz_filter->InitBuffer(buffer_size)) { + if (gz_filter->InitDecoding(type_id)) { + return gz_filter.release(); + } + } + break; + } + case FILTER_TYPE_BZIP2: { + scoped_ptr<BZip2Filter> bzip2_filter(new BZip2Filter()); + if (bzip2_filter->InitBuffer(buffer_size)) { + if (bzip2_filter->InitDecoding(false)) { + return bzip2_filter.release(); + } + } + break; + } + default: { + break; + } + } + + return NULL; +} + +Filter::Filter() + : stream_buffer_(NULL), + stream_buffer_size_(0), + next_stream_data_(NULL), + stream_data_len_(0) { +} + +Filter::~Filter() {} + +bool Filter::InitBuffer(int buffer_size) { + if (buffer_size < 0 || stream_buffer()) + return false; + + stream_buffer_.reset(new char[buffer_size]); + + if (stream_buffer()) { + stream_buffer_size_ = buffer_size; + return true; + } + + return false; +} + + +Filter::FilterStatus Filter::CopyOut(char* dest_buffer, int* dest_len) { + int out_len; + int input_len = *dest_len; + *dest_len = 0; + + if (0 == stream_data_len_) + return Filter::FILTER_NEED_MORE_DATA; + + out_len = std::min(input_len, stream_data_len_); + memcpy(dest_buffer, next_stream_data_, out_len); + *dest_len += out_len; + stream_data_len_ -= out_len; + if (0 == stream_data_len_) { + next_stream_data_ = NULL; + return Filter::FILTER_NEED_MORE_DATA; + } else { + next_stream_data_ += out_len; + return Filter::FILTER_OK; + } +} + + +Filter::FilterStatus Filter::ReadFilteredData(char* dest_buffer, + int* dest_len) { + return Filter::FILTER_ERROR; +} + +bool Filter::FlushStreamBuffer(int stream_data_len) { + if (stream_data_len <= 0 || stream_data_len > stream_buffer_size_) + return false; + + // bail out if there are more data in the stream buffer to be filtered. + if (!stream_buffer() || stream_data_len_) + return false; + + next_stream_data_ = stream_buffer(); + stream_data_len_ = stream_data_len; + return true; +} diff --git a/net/base/filter.h b/net/base/filter.h new file mode 100644 index 0000000..01be1ed --- /dev/null +++ b/net/base/filter.h @@ -0,0 +1,167 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Filter performs filtering on data streams. Sample usage: +// +// IStream* pre_filter_source; +// ... +// Filter* filter = Filter::Factory(filter_type, size); +// int pre_filter_data_len = filter->stream_buffer_size(); +// pre_filter_source->read(filter->stream_buffer(), pre_filter_data_len); +// +// filter->FlushStreamBuffer(pre_filter_data_len); +// +// char post_filter_buf[kBufferSize]; +// int post_filter_data_len = kBufferSize; +// filter->ReadFilteredData(post_filter_buf, &post_filter_data_len); +// +// To filters a data stream, the caller first gets filter's stream_buffer_ +// through its accessor and fills in stream_buffer_ with pre-filter data, next +// calls FlushStreamBuffer to notify Filter, then calls ReadFilteredData +// repeatedly to get all the filtered data. After all data have been fitlered +// and read out, the caller may fill in stream_buffer_ again. This +// WriteBuffer-Flush-Read cycle is repeated until reaching the end of data +// stream. +// +// The lifetime of a Filter instance is completely controlled by its caller. + +#ifndef NET_BASE_FILTER_H__ +#define NET_BASE_FILTER_H__ + +#include <string> + +#include "base/basictypes.h" +#include "base/scoped_ptr.h" + +class Filter { + public: + // Creates a Filter object. + // Parameters: Filter_type specifies the type of filter created; Buffer_size + // specifies the size (in number of chars) of the buffer the filter should + // allocate to hold pre-filter data. + // If success, the function returns the pointer to the Filter object created. + // If failed or a filter is not needed, the function returns NULL. + static Filter* Factory(const std::string& filter_type, + const std::string& mime_type, + int buffer_size); + + virtual ~Filter(); + + // Return values of function ReadFilteredData. + enum FilterStatus { + // Read filtered data successfully + FILTER_OK, + // Read filtered data successfully, and the data in the buffer has been + // consumed by the filter, but more data is needed in order to continue + // filtering. At this point, the caller is free to reuse the filter + // buffer to provide more data. + FILTER_NEED_MORE_DATA, + // Read filtered data successfully, and filter reaches the end of the data + // stream. + FILTER_DONE, + // There is an error during filtering. + FILTER_ERROR + }; + + // Filters the data stored in stream_buffer_ and writes the output into the + // dest_buffer passed in. + // + // Upon entry, *dest_len is the total size (in number of chars) of the + // destination buffer. Upon exit, *dest_len is the actual number of chars + // written into the destination buffer. + // + // This function will fail if there is no pre-filter data in the + // stream_buffer_. On the other hand, *dest_len can be 0 upon successful + // return. For example, a decoding filter may process some pre-filter data + // but not produce output yet. + virtual FilterStatus ReadFilteredData(char* dest_buffer, int* dest_len); + + // Returns a pointer to the beginning of stream_buffer_. + char* stream_buffer() const { return stream_buffer_.get(); } + + // Returns the maximum size of stream_buffer_ in number of chars. + int stream_buffer_size() const { return stream_buffer_size_; } + + // Returns the total number of chars remaining in stream_buffer_ to be + // filtered. + // + // If the function returns 0 then all data have been filtered and the caller + // is safe to copy new data into stream_buffer_. + int stream_data_len() const { return stream_data_len_; } + + // Flushes stream_buffer_ for next round of filtering. After copying data to + // stream_buffer_, the caller should call this function to notify Filter to + // start filtering. Then after this function is called, the caller can get + // post-filtered data using ReadFilteredData. The caller must not write to + // stream_buffer_ and call this function again before stream_buffer_ is empty + // out by ReadFilteredData. + // + // The input stream_data_len is the length (in number of chars) of valid + // data in stream_buffer_. It can not be greater than stream_buffer_size_. + // The function returns true if success, and false otherwise. + bool FlushStreamBuffer(int stream_data_len); + + protected: + Filter(); + + // Copy pre-filter data directly to destination buffer without decoding. + FilterStatus CopyOut(char* dest_buffer, int* dest_len); + + // Specifies type of filters that can be created. + enum FilterType { + FILTER_TYPE_DEFLATE, + FILTER_TYPE_GZIP, + FILTER_TYPE_BZIP2, + FILTER_TYPE_UNSUPPORTED + }; + + // Allocates and initializes stream_buffer_. + // Buffer_size is the maximum size of stream_buffer_ in number of chars. + bool InitBuffer(int buffer_size); + + // Buffer to hold the data to be filtered. + scoped_array<char> stream_buffer_; + + // Maximum size of stream_buffer_ in number of chars. + int stream_buffer_size_; + + // Pointer to the next data in stream_buffer_ to be filtered. + char* next_stream_data_; + + // Total number of remaining chars in stream_buffer_ to be filtered. + int stream_data_len_; + + // Filter can be chained + // TODO (huanr) + // Filter* next_filter_; + + DISALLOW_EVIL_CONSTRUCTORS(Filter); +}; + +#endif // NET_BASE_FILTER_H__ diff --git a/net/base/gzip_filter.cc b/net/base/gzip_filter.cc new file mode 100644 index 0000000..6c009ac --- /dev/null +++ b/net/base/gzip_filter.cc @@ -0,0 +1,303 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <minmax.h> + +#include "net/base/gzip_filter.h" + +#include "base/logging.h" +#include "net/base/gzip_header.h" +#include "third_party/zlib/zlib.h" + +GZipFilter::GZipFilter() + : decoding_status_(DECODING_UNINITIALIZED), + decoding_mode_(DECODE_MODE_UNKNOWN), + gzip_header_status_(GZIP_CHECK_HEADER_IN_PROGRESS), + zlib_header_added_(false), + gzip_footer_bytes_(0) { +} + +GZipFilter::~GZipFilter() { + if (decoding_status_ != DECODING_UNINITIALIZED) { + MOZ_Z_inflateEnd(zlib_stream_.get()); + } +} + +bool GZipFilter::InitDecoding(Filter::FilterType filter_type) { + if (decoding_status_ != DECODING_UNINITIALIZED) + return false; + + // Initialize zlib control block + zlib_stream_.reset(new z_stream); + if (!zlib_stream_.get()) + return false; + memset(zlib_stream_.get(), 0, sizeof(z_stream)); + + // Set decoding mode + switch (filter_type) { + case Filter::FILTER_TYPE_DEFLATE: { + if (inflateInit(zlib_stream_.get()) != Z_OK) + return false; + decoding_mode_ = DECODE_MODE_DEFLATE; + break; + } + case Filter::FILTER_TYPE_GZIP: { + gzip_header_.reset(new GZipHeader()); + if (!gzip_header_.get()) + return false; + if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK) + return false; + decoding_mode_ = DECODE_MODE_GZIP; + break; + } + default: { + return false; + } + } + + decoding_status_ = DECODING_IN_PROGRESS; + return true; +} + +Filter::FilterStatus GZipFilter::ReadFilteredData(char* dest_buffer, + int* dest_len) { + if (!dest_buffer || !dest_len || *dest_len <= 0) + return Filter::FILTER_ERROR; + + if (decoding_status_ == DECODING_DONE) { + // Some server might send extra data after the gzip footer. We just copy + // them out. Mozilla does this too. + SkipGZipFooter(); + return CopyOut(dest_buffer, dest_len); + } + + if (decoding_status_ != DECODING_IN_PROGRESS) + return Filter::FILTER_ERROR; + + Filter::FilterStatus status; + + if (decoding_mode_ == DECODE_MODE_GZIP && + gzip_header_status_ == GZIP_CHECK_HEADER_IN_PROGRESS) { + // With gzip encoding the content is wrapped with a gzip header. + // We need to parse and verify the header first. + status = CheckGZipHeader(); + switch (status) { + case Filter::FILTER_NEED_MORE_DATA: { + // We have consumed all input data, either getting a complete header or + // a partial header. Return now to get more data. + *dest_len = 0; + return status; + } + case Filter::FILTER_OK: { + // The header checking succeeds, and there are more data in the input. + // We must have got a complete header here. + DCHECK_EQ(gzip_header_status_, GZIP_GET_COMPLETE_HEADER); + break; + } + case Filter::FILTER_ERROR: { + decoding_status_ = DECODING_ERROR; + return status; + } + default: { + status = Filter::FILTER_ERROR; // Unexpected. + decoding_status_ = DECODING_ERROR; + return status; + } + } + } + + int dest_orig_size = *dest_len; + status = DoInflate(dest_buffer, dest_len); + + if (decoding_mode_ == DECODE_MODE_DEFLATE && status == Filter::FILTER_ERROR) { + // As noted in Mozilla implementation, some servers such as Apache with + // mod_deflate don't generate zlib headers. + // See 677409 for instances where this work around is needed. + // Insert a dummy zlib header and try again. + if (InsertZlibHeader()) { + *dest_len = dest_orig_size; + status = DoInflate(dest_buffer, dest_len); + } + } + + if (status == Filter::FILTER_DONE) { + decoding_status_ = DECODING_DONE; + } else if (status == Filter::FILTER_ERROR) { + decoding_status_ = DECODING_ERROR; + } + + return status; +} + +Filter::FilterStatus GZipFilter::CheckGZipHeader() { + DCHECK_EQ(gzip_header_status_, GZIP_CHECK_HEADER_IN_PROGRESS); + + // Check input data in pre-filter buffer. + if (!next_stream_data_ || stream_data_len_ <= 0) + return Filter::FILTER_ERROR; + + const char* header_end = NULL; + GZipHeader::Status header_status; + header_status = gzip_header_->ReadMore(next_stream_data_, stream_data_len_, + &header_end); + + switch (header_status) { + case GZipHeader::INCOMPLETE_HEADER: { + // We read all the data but only got a partial header. + next_stream_data_ = NULL; + stream_data_len_ = 0; + return Filter::FILTER_NEED_MORE_DATA; + } + case GZipHeader::COMPLETE_HEADER: { + // We have a complete header. Check whether there are more data. + int num_chars_left = static_cast<int>(stream_data_len_ - + (header_end - next_stream_data_)); + gzip_header_status_ = GZIP_GET_COMPLETE_HEADER; + + if (num_chars_left > 0) { + next_stream_data_ = const_cast<char*>(header_end); + stream_data_len_ = num_chars_left; + return Filter::FILTER_OK; + } else { + next_stream_data_ = NULL; + stream_data_len_ = 0; + return Filter::FILTER_NEED_MORE_DATA; + } + } + case GZipHeader::INVALID_HEADER: { + gzip_header_status_ = GZIP_GET_INVALID_HEADER; + return Filter::FILTER_ERROR; + } + default: { + break; + } + } + + return Filter::FILTER_ERROR; +} + +Filter::FilterStatus GZipFilter::DoInflate(char* dest_buffer, int* dest_len) { + // Make sure we have both valid input data and output buffer. + if (!dest_buffer || !dest_len || *dest_len <= 0) // output + return Filter::FILTER_ERROR; + + if (!next_stream_data_ || stream_data_len_ <= 0) // input + return Filter::FILTER_ERROR; + + // Fill in zlib control block + zlib_stream_.get()->next_in = bit_cast<Bytef*>(next_stream_data_); + zlib_stream_.get()->avail_in = stream_data_len_; + zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer); + zlib_stream_.get()->avail_out = *dest_len; + + int inflate_code = MOZ_Z_inflate(zlib_stream_.get(), Z_NO_FLUSH); + int bytesWritten = *dest_len - zlib_stream_.get()->avail_out; + + Filter::FilterStatus status; + + switch (inflate_code) { + case Z_STREAM_END: { + *dest_len = bytesWritten; + + stream_data_len_ = zlib_stream_.get()->avail_in; + next_stream_data_ = bit_cast<char*>(zlib_stream_.get()->next_in); + + SkipGZipFooter(); + + status = Filter::FILTER_DONE; + break; + } + case Z_BUF_ERROR: { + // According to zlib documentation, when calling inflate with Z_NO_FLUSH, + // getting Z_BUF_ERROR means no progress is possible. Neither processing + // more input nor producing more output can be done. + // Since we have checked both input data and output buffer before calling + // inflate, this result is unexpected. + status = Filter::FILTER_ERROR; + break; + } + case Z_OK: { + // Some progress has been made (more input processed or more output + // produced). + *dest_len = bytesWritten; + + // Check whether we have consumed all input data. + stream_data_len_ = zlib_stream_.get()->avail_in; + if (stream_data_len_ == 0) { + next_stream_data_ = NULL; + status = Filter::FILTER_NEED_MORE_DATA; + } else { + next_stream_data_ = bit_cast<char*>(zlib_stream_.get()->next_in); + status = Filter::FILTER_OK; + } + break; + } + default: { + status = Filter::FILTER_ERROR; + break; + } + } + + return status; +} + +bool GZipFilter::InsertZlibHeader() { + static char dummy_head[2] = { 0x78, 0x1 }; + + char dummy_output[4]; + + // We only try add additional header once. + if (zlib_header_added_) + return false; + + MOZ_Z_inflateReset(zlib_stream_.get()); + zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_head[0]); + zlib_stream_.get()->avail_in = sizeof(dummy_head); + zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]); + zlib_stream_.get()->avail_out = sizeof(dummy_output); + + int code = MOZ_Z_inflate(zlib_stream_.get(), Z_NO_FLUSH); + zlib_header_added_ = true; + + return (code == Z_OK); +} + + +void GZipFilter::SkipGZipFooter() { + int footer_bytes_expected = kGZipFooterSize - gzip_footer_bytes_; + if (footer_bytes_expected > 0) { + int footer_byte_avail = min(footer_bytes_expected, stream_data_len_); + stream_data_len_ -= footer_byte_avail; + next_stream_data_ += footer_byte_avail; + gzip_footer_bytes_ += footer_byte_avail; + + if (stream_data_len_ == 0) + next_stream_data_ = NULL; + } +} diff --git a/net/base/gzip_filter.h b/net/base/gzip_filter.h new file mode 100644 index 0000000..ae8b644 --- /dev/null +++ b/net/base/gzip_filter.h @@ -0,0 +1,158 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// GZipFilter applies gzip and deflate content encoding/decoding to a data +// stream. As specified by HTTP 1.1, with gzip encoding the content is +// wrapped with a gzip header, and with deflate encoding the content is in +// a raw, headerless DEFLATE stream. +// +// Internally GZipFilter uses zlib inflate to do decoding. +// +// GZipFilter is a subclass of Filter. See the latter's header file filter.h +// for sample usage. + +#ifndef NET_BASE_GZIP_FILTER_H__ +#define NET_BASE_GZIP_FILTER_H__ + +#include "base/scoped_ptr.h" +#include "net/base/filter.h" + +class GZipHeader; +typedef struct z_stream_s z_stream; + +class GZipFilter : public Filter { + public: + GZipFilter(); + + virtual ~GZipFilter(); + + // Initializes filter decoding mode and internal control blocks. + // Parameter filter_type specifies the type of filter, which corresponds to + // either gzip or deflate decoding. The function returns true if success and + // false otherwise. + // The filter can only be initialized once. + bool InitDecoding(Filter::FilterType filter_type); + + // Decodes the pre-filter data and writes the output into the dest_buffer + // passed in. + // The function returns FilterStatus. See filter.h for its description. + // + // Upon entry, *dest_len is the total size (in number of chars) of the + // destination buffer. Upon exit, *dest_len is the actual number of chars + // written into the destination buffer. + // + // This function will fail if there is no pre-filter data in the + // stream_buffer_. On the other hand, *dest_len can be 0 upon successful + // return. For example, the internal zlib may process some pre-filter data + // but not produce output yet. + virtual FilterStatus ReadFilteredData(char* dest_buffer, int* dest_len); + + private: + enum DecodingStatus { + DECODING_UNINITIALIZED, + DECODING_IN_PROGRESS, + DECODING_DONE, + DECODING_ERROR + }; + + enum DecodingMode { + DECODE_MODE_GZIP, + DECODE_MODE_DEFLATE, + DECODE_MODE_UNKNOWN + }; + + enum GZipCheckHeaderState { + GZIP_CHECK_HEADER_IN_PROGRESS, + GZIP_GET_COMPLETE_HEADER, + GZIP_GET_INVALID_HEADER + }; + + static const int kGZipFooterSize = 8; + + // Parses and verifies the GZip header. + // Upon exit, the function updates gzip_header_status_ accordingly. + // + // The function returns Filter::FILTER_OK if it gets a complete header and + // there are more data in the pre-filter buffer. + // The function returns Filter::FILTER_NEED_MORE_DATA if it parses all data + // in the pre-filter buffer, either getting a complete header or a partial + // header. The caller needs to check gzip_header_status_ and call this + // function again for partial header. + // The function returns Filter::FILTER_ERROR if error occurs. + FilterStatus CheckGZipHeader(); + + // Internal function to decode the pre-filter data and writes the output into + // the dest_buffer passed in. + // + // This is the internal version of ReadFilteredData. See the latter's + // comments for the use of function. + FilterStatus DoInflate(char* dest_buffer, int* dest_len); + + // Inserts a zlib header to the data stream before calling zlib inflate. + // This is used to work around server bugs. See more comments at the place + // it is called in gzip_filter.cc. + // The function returns true on success and false otherwise. + bool InsertZlibHeader(); + + // Skip the 8 byte GZip footer after z_stream_end + void SkipGZipFooter(); + + // Tracks the status of decoding. + // This variable is initialized by InitDecoding and updated only by + // ReadFilteredData. + DecodingStatus decoding_status_; + + // Indicates the type of content decoding the GZipFilter is performing. + // This variable is set only once by InitDecoding. + DecodingMode decoding_mode_; + + // Used to parse the gzip header in gzip stream. + // It is used when the decoding_mode_ is DECODE_MODE_GZIP. + scoped_ptr<GZipHeader> gzip_header_; + + // Tracks the progress of parsing gzip header. + // This variable is maintained by gzip_header_. + GZipCheckHeaderState gzip_header_status_; + + // A flag used by InsertZlibHeader to record whether we've successfully added + // a zlib header to this stream. + bool zlib_header_added_; + + // Tracks how many bytes of gzip footer have been received. + int gzip_footer_bytes_; + + // The control block of zlib which actually does the decoding. + // This data structure is initialized by InitDecoding and updated only by + // DoInflate, with InsertZlibHeader being the exception as a workaround. + scoped_ptr<z_stream> zlib_stream_; + + DISALLOW_EVIL_CONSTRUCTORS(GZipFilter); +}; + +#endif // NET_BASE_GZIP_FILTER_H__ diff --git a/net/base/gzip_filter_unittest.cc b/net/base/gzip_filter_unittest.cc new file mode 100644 index 0000000..911bd4e --- /dev/null +++ b/net/base/gzip_filter_unittest.cc @@ -0,0 +1,417 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "minmax.h" + +#include <fstream> +#include <iostream> + +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/scoped_ptr.h" +#include "net/base/gzip_filter.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/zlib/zlib.h" + +namespace { + +const int kDefaultBufferSize = 4096; +const int kSmallBufferSize = 128; +const int kMaxBufferSize = 1048576; // 1048576 == 2^20 == 1 MB + +const char kApplicationOctetStream[] = "application/octet-stream"; +const char kApplicationXGzip[] = "application/x-gzip"; +const char kApplicationGzip[] = "application/gzip"; +const char kApplicationXGunzip[] = "application/x-gunzip"; + +// The GZIP header (see RFC 1952): +// +---+---+---+---+---+---+---+---+---+---+ +// |ID1|ID2|CM |FLG| MTIME |XFL|OS | +// +---+---+---+---+---+---+---+---+---+---+ +// ID1 \037 +// ID2 \213 +// CM \010 (compression method == DEFLATE) +// FLG \000 (special flags that we do not support) +// MTIME Unix format modification time (0 means not available) +// XFL 2-4? DEFLATE flags +// OS ???? Operating system indicator (255 means unknown) +// +// Header value we generate: +const char kGZipHeader[] = { '\037', '\213', '\010', '\000', '\000', + '\000', '\000', '\000', '\002', '\377' }; + +enum EncodeMode { + ENCODE_GZIP, // Wrap the deflate with a GZip header. + ENCODE_DEFLATE // Raw deflate. +}; + +class GZipUnitTest : public testing::Test { + protected: + virtual void SetUp() { + deflate_encode_buffer_ = NULL; + gzip_encode_buffer_ = NULL; + + // Get the path of source data file. + std::wstring file_path; + PathService::Get(base::DIR_SOURCE_ROOT, &file_path); + file_util::AppendToPath(&file_path, L"net"); + file_util::AppendToPath(&file_path, L"data"); + file_util::AppendToPath(&file_path, L"filter_unittests"); + file_util::AppendToPath(&file_path, L"google.txt"); + + // Read data from the file into buffer. + file_util::ReadFileToString(file_path, &source_buffer_); + + // Encode the data with deflate + deflate_encode_buffer_ = new char[kDefaultBufferSize]; + ASSERT_TRUE(deflate_encode_buffer_ != NULL); + + deflate_encode_len_ = kDefaultBufferSize; + int code = CompressAll(ENCODE_DEFLATE , source_buffer(), source_len(), + deflate_encode_buffer_, &deflate_encode_len_); + ASSERT_TRUE(code == Z_STREAM_END); + ASSERT_TRUE(deflate_encode_len_ > 0); + ASSERT_TRUE(deflate_encode_len_ <= kDefaultBufferSize); + + // Encode the data with gzip + gzip_encode_buffer_ = new char[kDefaultBufferSize]; + ASSERT_TRUE(gzip_encode_buffer_ != NULL); + + gzip_encode_len_ = kDefaultBufferSize; + code = CompressAll(ENCODE_GZIP, source_buffer(), source_len(), + gzip_encode_buffer_, &gzip_encode_len_); + ASSERT_TRUE(code == Z_STREAM_END); + ASSERT_TRUE(gzip_encode_len_ > 0); + ASSERT_TRUE(gzip_encode_len_ <= kDefaultBufferSize); + } + + virtual void TearDown() { + delete[] deflate_encode_buffer_; + deflate_encode_buffer_ = NULL; + + delete[] gzip_encode_buffer_; + gzip_encode_buffer_ = NULL; + } + + // Compress the data in source with deflate encoding and write output to the + // buffer provided by dest. The function returns Z_OK if success, and returns + // other zlib error code if fail. + // The parameter mode specifies the encoding mechanism. + // The dest buffer should be large enough to hold all the output data. + int CompressAll(EncodeMode mode, const char* source, int source_size, + char* dest, int* dest_len) { + z_stream zlib_stream; + memset(&zlib_stream, 0, sizeof(zlib_stream)); + int code; + + // Initialize zlib + if (mode == ENCODE_GZIP) { + code = deflateInit2(&zlib_stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + -MAX_WBITS, + 8, // DEF_MEM_LEVEL + Z_DEFAULT_STRATEGY); + } else { + code = deflateInit(&zlib_stream, Z_DEFAULT_COMPRESSION); + } + + if (code != Z_OK) + return code; + + // Fill in zlib control block + zlib_stream.next_in = bit_cast<Bytef*>(source); + zlib_stream.avail_in = source_size; + zlib_stream.next_out = bit_cast<Bytef*>(dest); + zlib_stream.avail_out = *dest_len; + + // Write header if needed + if (mode == ENCODE_GZIP) { + if (zlib_stream.avail_out < sizeof(kGZipHeader)) + return Z_BUF_ERROR; + memcpy(zlib_stream.next_out, kGZipHeader, sizeof(kGZipHeader)); + zlib_stream.next_out += sizeof(kGZipHeader); + zlib_stream.avail_out -= sizeof(kGZipHeader); + } + + // Do deflate + code = MOZ_Z_deflate(&zlib_stream, Z_FINISH); + *dest_len = *dest_len - zlib_stream.avail_out; + + MOZ_Z_deflateEnd(&zlib_stream); + return code; + } + + // Use filter to decode compressed data, and compare the decoding result with + // the orginal Data. + // Parameters: Source and source_len are original data and its size. + // Encoded_source and encoded_source_len are compressed data and its size. + // Output_buffer_size specifies the size of buffer to read out data from + // filter. + void DecodeAndCompareWithFilter(Filter* filter, + const char* source, + int source_len, + const char* encoded_source, + int encoded_source_len, + int output_buffer_size) { + // Make sure we have enough space to hold the decoding output. + ASSERT_TRUE(source_len <= kDefaultBufferSize); + ASSERT_TRUE(output_buffer_size <= kDefaultBufferSize); + + char decode_buffer[kDefaultBufferSize]; + char* decode_next = decode_buffer; + int decode_avail_size = kDefaultBufferSize; + + const char* encode_next = encoded_source; + int encode_avail_size = encoded_source_len; + + int code = Filter::FILTER_OK; + while (code != Filter::FILTER_DONE) { + int encode_data_len; + encode_data_len = min(encode_avail_size, filter->stream_buffer_size()); + memcpy(filter->stream_buffer(), encode_next, encode_data_len); + filter->FlushStreamBuffer(encode_data_len); + encode_next += encode_data_len; + encode_avail_size -= encode_data_len; + + while (1) { + int decode_data_len = min(decode_avail_size, output_buffer_size); + + code = filter->ReadFilteredData(decode_next, &decode_data_len); + decode_next += decode_data_len; + decode_avail_size -= decode_data_len; + + ASSERT_TRUE(code != Filter::FILTER_ERROR); + + if (code == Filter::FILTER_NEED_MORE_DATA || + code == Filter::FILTER_DONE) { + break; + } + } + } + + // Compare the decoding result with source data + int decode_total_data_len = kDefaultBufferSize - decode_avail_size; + EXPECT_TRUE(decode_total_data_len == source_len); + EXPECT_EQ(memcmp(source, decode_buffer, source_len), 0); + } + + // Unsafe function to use filter to decode compressed data. + // Parameters: Source and source_len are compressed data and its size. + // Dest is the buffer for decoding results. Upon entry, *dest_len is the size + // of the dest buffer. Upon exit, *dest_len is the number of chars written + // into the buffer. + int DecodeAllWithFilter(Filter* filter, const char* source, int source_len, + char* dest, int* dest_len) { + memcpy(filter->stream_buffer(), source, source_len); + filter->FlushStreamBuffer(source_len); + return filter->ReadFilteredData(dest, dest_len); + } + + const char* source_buffer() const { return source_buffer_.data(); } + int source_len() const { return static_cast<int>(source_buffer_.size()); } + + std::string source_buffer_; + + char* deflate_encode_buffer_; + int deflate_encode_len_; + + char* gzip_encode_buffer_; + int gzip_encode_len_; +}; + +}; // namespace + +// Basic scenario: decoding deflate data with big enough buffer. +TEST_F(GZipUnitTest, DecodeDeflate) { + // Decode the compressed data with filter + scoped_ptr<Filter> filter( + Filter::Factory("deflate", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter.get()); + memcpy(filter->stream_buffer(), deflate_encode_buffer_, deflate_encode_len_); + filter->FlushStreamBuffer(deflate_encode_len_); + + char deflate_decode_buffer[kDefaultBufferSize]; + int deflate_decode_size = kDefaultBufferSize; + filter->ReadFilteredData(deflate_decode_buffer, &deflate_decode_size); + + // Compare the decoding result with source data + EXPECT_TRUE(deflate_decode_size == source_len()); + EXPECT_EQ(memcmp(source_buffer(), deflate_decode_buffer, source_len()), 0); +} + +// Basic scenario: decoding gzip data with big enough buffer. +TEST_F(GZipUnitTest, DecodeGZip) { + // Decode the compressed data with filter + scoped_ptr<Filter> filter( + Filter::Factory("gzip", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter.get()); + memcpy(filter->stream_buffer(), gzip_encode_buffer_, gzip_encode_len_); + filter->FlushStreamBuffer(gzip_encode_len_); + + char gzip_decode_buffer[kDefaultBufferSize]; + int gzip_decode_size = kDefaultBufferSize; + filter->ReadFilteredData(gzip_decode_buffer, &gzip_decode_size); + + // Compare the decoding result with source data + EXPECT_TRUE(gzip_decode_size == source_len()); + EXPECT_EQ(memcmp(source_buffer(), gzip_decode_buffer, source_len()), 0); +} + +// Tests we can call filter repeatedly to get all the data decoded. +// To do that, we create a filter with a small buffer that can not hold all +// the input data. +TEST_F(GZipUnitTest, DecodeWithSmallBuffer) { + scoped_ptr<Filter> filter( + Filter::Factory("deflate", kApplicationOctetStream, kSmallBufferSize)); + ASSERT_TRUE(filter.get()); + DecodeAndCompareWithFilter(filter.get(), source_buffer(), source_len(), + deflate_encode_buffer_, deflate_encode_len_, + kDefaultBufferSize); +} + +// Tests we can still decode with just 1 byte buffer in the filter. +// The purpose of this tests are two: (1) Verify filter can parse partial GZip +// header correctly. (2) Sometimes the filter will consume input without +// generating output. Verify filter can handle it correctly. +TEST_F(GZipUnitTest, DecodeWithOneByteBuffer) { + scoped_ptr<Filter> filter( + Filter::Factory("gzip", kApplicationOctetStream, 1)); + ASSERT_TRUE(filter.get()); + DecodeAndCompareWithFilter(filter.get(), source_buffer(), source_len(), + gzip_encode_buffer_, gzip_encode_len_, + kDefaultBufferSize); +} + +// Tests we can decode when caller has small buffer to read out from filter. +TEST_F(GZipUnitTest, DecodeWithSmallOutputBuffer) { + scoped_ptr<Filter> filter( + Filter::Factory("deflate", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter.get()); + DecodeAndCompareWithFilter(filter.get(), source_buffer(), source_len(), + deflate_encode_buffer_, deflate_encode_len_, + kSmallBufferSize); +} + +// Tests we can still decode with just 1 byte buffer in the filter and just 1 +// byte buffer in the caller. +TEST_F(GZipUnitTest, DecodeWithOneByteInputAndOutputBuffer) { + scoped_ptr<Filter> filter( + Filter::Factory("gzip", kApplicationOctetStream, 1)); + ASSERT_TRUE(filter.get()); + DecodeAndCompareWithFilter(filter.get(), source_buffer(), source_len(), + gzip_encode_buffer_, gzip_encode_len_, 1); +} + +// Decoding deflate stream with corrupted data. +TEST_F(GZipUnitTest, DecodeCorruptedData) { + char corrupt_data[kDefaultBufferSize]; + int corrupt_data_len = deflate_encode_len_; + memcpy(corrupt_data, deflate_encode_buffer_, deflate_encode_len_); + + int pos = corrupt_data_len / 2; + corrupt_data[pos] = !corrupt_data[pos]; + + // Decode the corrupted data with filter + scoped_ptr<Filter> filter( + Filter::Factory("deflate", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter.get()); + char corrupt_decode_buffer[kDefaultBufferSize]; + int corrupt_decode_size = kDefaultBufferSize; + + int code = DecodeAllWithFilter(filter.get(), corrupt_data, corrupt_data_len, + corrupt_decode_buffer, &corrupt_decode_size); + + // Expect failures + EXPECT_TRUE(code == Filter::FILTER_ERROR); +} + +// Decoding deflate stream with missing data. +TEST_F(GZipUnitTest, DecodeMissingData) { + char corrupt_data[kDefaultBufferSize]; + int corrupt_data_len = deflate_encode_len_; + memcpy(corrupt_data, deflate_encode_buffer_, deflate_encode_len_); + + int pos = corrupt_data_len / 2; + int len = corrupt_data_len - pos - 1; + memcpy(&corrupt_data[pos], &corrupt_data[pos+1], len); + --corrupt_data_len; + + // Decode the corrupted data with filter + scoped_ptr<Filter> filter( + Filter::Factory("deflate", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter.get()); + char corrupt_decode_buffer[kDefaultBufferSize]; + int corrupt_decode_size = kDefaultBufferSize; + + int code = DecodeAllWithFilter(filter.get(), corrupt_data, corrupt_data_len, + corrupt_decode_buffer, &corrupt_decode_size); + + // Expect failures + EXPECT_TRUE(code == Filter::FILTER_ERROR); +} + +// Decoding gzip stream with corrupted header. +TEST_F(GZipUnitTest, DecodeCorruptedHeader) { + char corrupt_data[kDefaultBufferSize]; + int corrupt_data_len = gzip_encode_len_; + memcpy(corrupt_data, gzip_encode_buffer_, gzip_encode_len_); + + corrupt_data[2] = !corrupt_data[2]; + + // Decode the corrupted data with filter + scoped_ptr<Filter> filter( + Filter::Factory("gzip", kApplicationOctetStream, kDefaultBufferSize)); + ASSERT_TRUE(filter.get()); + char corrupt_decode_buffer[kDefaultBufferSize]; + int corrupt_decode_size = kDefaultBufferSize; + + int code = DecodeAllWithFilter(filter.get(), corrupt_data, corrupt_data_len, + corrupt_decode_buffer, &corrupt_decode_size); + + // Expect failures + EXPECT_TRUE(code == Filter::FILTER_ERROR); +} + +TEST_F(GZipUnitTest, ApacheWorkaround) { + const int kBufferSize = kDefaultBufferSize; // To fit in 80 cols. + scoped_ptr<Filter> filter; + + filter.reset(Filter::Factory("gzip", kApplicationXGzip, kBufferSize)); + EXPECT_FALSE(filter.get()); + filter.reset(Filter::Factory("gzip", kApplicationGzip, kBufferSize)); + EXPECT_FALSE(filter.get()); + filter.reset(Filter::Factory("gzip", kApplicationXGunzip, kBufferSize)); + EXPECT_FALSE(filter.get()); + + filter.reset(Filter::Factory("x-gzip", kApplicationXGzip, kBufferSize)); + EXPECT_FALSE(filter.get()); + filter.reset(Filter::Factory("x-gzip", kApplicationGzip, kBufferSize)); + EXPECT_FALSE(filter.get()); + filter.reset(Filter::Factory("x-gzip", kApplicationXGunzip, kBufferSize)); + EXPECT_FALSE(filter.get()); +} diff --git a/net/base/gzip_header.cc b/net/base/gzip_header.cc new file mode 100644 index 0000000..db810dc --- /dev/null +++ b/net/base/gzip_header.cc @@ -0,0 +1,199 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <minmax.h> + +#include "net/base/gzip_header.h" + +#include "base/logging.h" +#include "third_party/zlib/zlib.h" // for Z_DEFAULT_COMPRESSION + +const uint8 GZipHeader::magic[] = { 0x1f, 0x8b }; + +// ---------------------------------------------------------------------- +// GZipHeader::ReadMore() +// Attempt to parse the beginning of the given buffer as a gzip +// header. If these bytes do not constitute a complete gzip header, +// return INCOMPLETE_HEADER. If these bytes do not constitute a +// *valid* gzip header, return INVALID_HEADER. If we find a +// complete header, return COMPLETE_HEADER and set the pointer +// pointed to by header_end to the first byte beyond the gzip header. +// ---------------------------------------------------------------------- + +GZipHeader::Status GZipHeader::ReadMore(const char* inbuf, int inbuf_len, + const char** header_end) { + DCHECK_GE(inbuf_len, 0); + const uint8* pos = reinterpret_cast<const uint8*>(inbuf); + const uint8* const end = pos + inbuf_len; + + while ( pos < end ) { + switch ( state_ ) { + case IN_HEADER_ID1: + if ( *pos != magic[0] ) return INVALID_HEADER; + pos++; + state_++; + break; + case IN_HEADER_ID2: + if ( *pos != magic[1] ) return INVALID_HEADER; + pos++; + state_++; + break; + case IN_HEADER_CM: + if ( *pos != Z_DEFLATED ) return INVALID_HEADER; + pos++; + state_++; + break; + case IN_HEADER_FLG: + flags_ = (*pos) & (FLAG_FHCRC | FLAG_FEXTRA | + FLAG_FNAME | FLAG_FCOMMENT); + pos++; + state_++; + break; + + case IN_HEADER_MTIME_BYTE_0: + pos++; + state_++; + break; + case IN_HEADER_MTIME_BYTE_1: + pos++; + state_++; + break; + case IN_HEADER_MTIME_BYTE_2: + pos++; + state_++; + break; + case IN_HEADER_MTIME_BYTE_3: + pos++; + state_++; + break; + + case IN_HEADER_XFL: + pos++; + state_++; + break; + + case IN_HEADER_OS: + pos++; + state_++; + break; + + case IN_XLEN_BYTE_0: + if ( !(flags_ & FLAG_FEXTRA) ) { + state_ = IN_FNAME; + break; + } + // We have a two-byte little-endian length, followed by a + // field of that length. + extra_length_ = *pos; + pos++; + state_++; + break; + case IN_XLEN_BYTE_1: + extra_length_ += *pos << 8; + pos++; + state_++; + // We intentionally fall through, because if we have a + // zero-length FEXTRA, we want to check to notice that we're + // done reading the FEXTRA before we exit this loop... + + case IN_FEXTRA: { + // Grab the rest of the bytes in the extra field, or as many + // of them as are actually present so far. + const int num_extra_bytes = static_cast<const int>(min( + extra_length_, + (end - pos))); + pos += num_extra_bytes; + extra_length_ -= num_extra_bytes; + if ( extra_length_ == 0 ) { + state_ = IN_FNAME; // advance when we've seen extra_length_ bytes + flags_ &= ~FLAG_FEXTRA; // we're done with the FEXTRA stuff + } + break; + } + + case IN_FNAME: + if ( !(flags_ & FLAG_FNAME) ) { + state_ = IN_FCOMMENT; + break; + } + // See if we can find the end of the \0-terminated FNAME field. + pos = reinterpret_cast<const uint8*>(memchr(pos, '\0', (end - pos))); + if ( pos != NULL ) { + pos++; // advance past the '\0' + flags_ &= ~FLAG_FNAME; // we're done with the FNAME stuff + state_ = IN_FCOMMENT; + } else { + pos = end; // everything we have so far is part of the FNAME + } + break; + + case IN_FCOMMENT: + if ( !(flags_ & FLAG_FCOMMENT) ) { + state_ = IN_FHCRC_BYTE_0; + break; + } + // See if we can find the end of the \0-terminated FCOMMENT field. + pos = reinterpret_cast<const uint8*>(memchr(pos, '\0', (end - pos))); + if ( pos != NULL ) { + pos++; // advance past the '\0' + flags_ &= ~FLAG_FCOMMENT; // we're done with the FCOMMENT stuff + state_ = IN_FHCRC_BYTE_0; + } else { + pos = end; // everything we have so far is part of the FNAME + } + break; + + case IN_FHCRC_BYTE_0: + if ( !(flags_ & FLAG_FHCRC) ) { + state_ = IN_DONE; + break; + } + pos++; + state_++; + break; + + case IN_FHCRC_BYTE_1: + pos++; + flags_ &= ~FLAG_FHCRC; // we're done with the FHCRC stuff + state_++; + break; + + case IN_DONE: + *header_end = reinterpret_cast<const char*>(pos); + return COMPLETE_HEADER; + } + } + + if ( (state_ > IN_HEADER_OS) && (flags_ == 0) ) { + *header_end = reinterpret_cast<const char*>(pos); + return COMPLETE_HEADER; + } else { + return INCOMPLETE_HEADER; + } +} diff --git a/net/base/gzip_header.h b/net/base/gzip_header.h new file mode 100644 index 0000000..63ea5a9 --- /dev/null +++ b/net/base/gzip_header.h @@ -0,0 +1,120 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The GZipHeader class allows you to parse a gzip header, such as you +// might find at the beginning of a file compressed by gzip (ie, a .gz +// file), or at the beginning of an HTTP response that uses a gzip +// Content-Encoding. See RFC 1952 for the specification for the gzip +// header. +// +// The model is that you call ReadMore() for each chunk of bytes +// you've read from a file or socket. +// + +#ifndef NET_BASE_GZIPHEADER_H__ +#define NET_BASE_GZIPHEADER_H__ + +#include "base/basictypes.h" + +class GZipHeader { + public: + GZipHeader() { + Reset(); + } + ~GZipHeader() { + } + + // Wipe the slate clean and start from scratch. + void Reset() { + state_ = IN_HEADER_ID1; + flags_ = 0; + extra_length_ = 0; + } + + enum Status { + INCOMPLETE_HEADER, // don't have all the bits yet... + COMPLETE_HEADER, // complete, valid header + INVALID_HEADER, // found something invalid in the header + }; + + // Attempt to parse the given buffer as the next installment of + // bytes from a gzip header. If the bytes we've seen so far do not + // yet constitute a complete gzip header, return + // INCOMPLETE_HEADER. If these bytes do not constitute a *valid* + // gzip header, return INVALID_HEADER. When we've seen a complete + // gzip header, return COMPLETE_HEADER and set the pointer pointed + // to by header_end to the first byte beyond the gzip header. + Status ReadMore(const char* inbuf, int inbuf_len, + const char** header_end); + private: + + static const uint8 magic[]; // gzip magic header + + enum { // flags (see RFC) + FLAG_FTEXT = 0x01, // bit 0 set: file probably ascii text + FLAG_FHCRC = 0x02, // bit 1 set: header CRC present + FLAG_FEXTRA = 0x04, // bit 2 set: extra field present + FLAG_FNAME = 0x08, // bit 3 set: original file name present + FLAG_FCOMMENT = 0x10, // bit 4 set: file comment present + FLAG_RESERVED = 0xE0, // bits 5..7: reserved + }; + + enum State { + // The first 10 bytes are the fixed-size header: + IN_HEADER_ID1, + IN_HEADER_ID2, + IN_HEADER_CM, + IN_HEADER_FLG, + IN_HEADER_MTIME_BYTE_0, + IN_HEADER_MTIME_BYTE_1, + IN_HEADER_MTIME_BYTE_2, + IN_HEADER_MTIME_BYTE_3, + IN_HEADER_XFL, + IN_HEADER_OS, + + IN_XLEN_BYTE_0, + IN_XLEN_BYTE_1, + IN_FEXTRA, + + IN_FNAME, + + IN_FCOMMENT, + + IN_FHCRC_BYTE_0, + IN_FHCRC_BYTE_1, + + IN_DONE, + }; + + int state_; // our current State in the parsing FSM: an int so we can ++ + uint8 flags_; // the flags byte of the header ("FLG" in the RFC) + uint16 extra_length_; // how much of the "extra field" we have yet to read +}; + +#endif // NET_BASE_GZIPHEADER_H__ diff --git a/net/base/host_resolver.cc b/net/base/host_resolver.cc new file mode 100644 index 0000000..f3091a6 --- /dev/null +++ b/net/base/host_resolver.cc @@ -0,0 +1,161 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/host_resolver.h" + +#include <ws2tcpip.h> +#include <wspiapi.h> // Needed for Win2k compat. + +#include "base/string_util.h" +#include "net/base/address_list.h" +#include "net/base/net_errors.h" +#include "net/base/winsock_init.h" + +namespace net { + +//----------------------------------------------------------------------------- + +static int ResolveAddrInfo(const std::string& host, const std::string& port, + struct addrinfo** results) { + struct addrinfo hints = {0}; + hints.ai_family = PF_UNSPEC; + hints.ai_flags = AI_ADDRCONFIG; + + // Restrict result set to only this socket type to avoid duplicates. + hints.ai_socktype = SOCK_STREAM; + + int err = getaddrinfo(host.c_str(), port.c_str(), &hints, results); + return err ? ERR_NAME_NOT_RESOLVED : OK; +} + +//----------------------------------------------------------------------------- + +struct HostResolver::Request : + public base::RefCountedThreadSafe<HostResolver::Request> { + Request() : error(OK), results(NULL) { + DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), + GetCurrentProcess(), &origin_thread, + 0, FALSE, DUPLICATE_SAME_ACCESS); + } + ~Request() { + CloseHandle(origin_thread); + } + + // Only used on the origin thread (where Resolve was called). + AddressList* addresses; + CompletionCallback* callback; + + // Set on the origin thread, read on the worker thread. + std::string host; + std::string port; + HANDLE origin_thread; + + // Assigned on the worker thread. + int error; + struct addrinfo* results; + + static void CALLBACK ReturnResults(ULONG_PTR param) { + Request* r = reinterpret_cast<Request*>(param); + // The HostResolver may have gone away. + if (r->addresses) { + DCHECK(r->addresses); + r->addresses->Adopt(r->results); + if (r->callback) + r->callback->Run(r->error); + } else if (r->results) { + freeaddrinfo(r->results); + } + r->Release(); + } + + static DWORD CALLBACK DoLookup(void* param) { + Request* r = static_cast<Request*>(param); + + r->error = ResolveAddrInfo(r->host, r->port, &r->results); + + if (!QueueUserAPC(ReturnResults, r->origin_thread, + reinterpret_cast<ULONG_PTR>(param))) { + // The origin thread must have gone away. + if (r->results) + freeaddrinfo(r->results); + r->Release(); + } + return 0; + } +}; + +//----------------------------------------------------------------------------- + +HostResolver::HostResolver() { + EnsureWinsockInit(); +} + +HostResolver::~HostResolver() { + if (request_) { + request_->addresses = NULL; + request_->callback = NULL; + } +} + +int HostResolver::Resolve(const std::string& hostname, int port, + AddressList* addresses, + CompletionCallback* callback) { + DCHECK(!request_); + + const std::string& port_str = IntToString(port); + + // Do a synchronous resolution? + if (!callback) { + struct addrinfo* results; + int rv = ResolveAddrInfo(hostname, port_str, &results); + if (rv == OK) + addresses->Adopt(results); + return rv; + } + + // Dispatch to worker thread... + request_ = new Request(); + request_->host = hostname; + request_->port = port_str; + request_->addresses = addresses; + request_->callback = callback; + + // Balanced in Request::ReturnResults (or DoLookup if there is an error). + request_->AddRef(); + if (!QueueUserWorkItem(Request::DoLookup, request_, WT_EXECUTELONGFUNCTION)) { + DLOG(ERROR) << "QueueUserWorkItem failed: " << GetLastError(); + request_->Release(); + request_ = NULL; + return ERR_FAILED; + } + + return ERR_IO_PENDING; +} + +} // namespace net diff --git a/net/base/host_resolver.h b/net/base/host_resolver.h new file mode 100644 index 0000000..327e019 --- /dev/null +++ b/net/base/host_resolver.h @@ -0,0 +1,72 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_HOST_RESOLVER_H__ +#define NET_BASE_HOST_RESOLVER_H__ + +#include "base/basictypes.h" +#include "base/ref_counted.h" +#include "net/base/completion_callback.h" + +namespace net { + +class AddressList; + +// This class represents the task of resolving a single hostname. To resolve +// multiple hostnames, a new resolver will need to be created for each. +class HostResolver { + public: + HostResolver(); + + // If a completion callback is pending when the resolver is destroyed, the + // host resolution is cancelled, and the completion callback will not be + // called. + ~HostResolver(); + + // Resolves the given hostname, filling out the |addresses| object upon + // success. The |port| parameter is optional (will be set as the sin_port + // field of the sockaddr_in{6} struct). Returns OK if successful or an error + // code upon failure. + // + // When callback is non-null, ERR_IO_PENDING is returned if the operation + // could not be completed synchronously, in which case the result code will + // be passed to the callback when available. + // + int Resolve(const std::string& hostname, int port, + AddressList* addresses, CompletionCallback* callback); + + private: + DISALLOW_EVIL_CONSTRUCTORS(HostResolver); + struct Request; + scoped_refptr<Request> request_; +}; + +} // namespace net + +#endif // NET_BASE_HOST_RESOLVER_H__ diff --git a/net/base/listen_socket.cc b/net/base/listen_socket.cc new file mode 100644 index 0000000..4893dcc --- /dev/null +++ b/net/base/listen_socket.cc @@ -0,0 +1,188 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// winsock2.h must be included first in order to ensure it is included before +// windows.h. +#include <winsock2.h> + +#include "net/base/listen_socket.h" + +#include "base/thread.h" + +#define READ_BUF_SIZE 200 + +ListenSocket::ListenSocket(SOCKET s, ListenSocketDelegate *del, + MessageLoop *loop) + : socket_(s), socket_delegate_(del), loop_(loop) { + socket_event_ = WSACreateEvent(); + WSAEventSelect(socket_, socket_event_, FD_ACCEPT | FD_CLOSE | FD_READ); + loop_->WatchObject(socket_event_, this); +} + +ListenSocket::~ListenSocket() { + DCHECK(MessageLoop::current() == loop_); + if (socket_event_) { + loop_->WatchObject(socket_event_, NULL); + WSACloseEvent(socket_event_); + } + if (socket_) { + closesocket(socket_); + } +} + +SOCKET ListenSocket::Listen(std::string ip, int port) { + SOCKET s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (s != INVALID_SOCKET) { + sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = inet_addr(ip.c_str()); + addr.sin_port = htons(port); + if (bind(s, reinterpret_cast<SOCKADDR*>(&addr), sizeof(addr))) { + closesocket(s); + s = INVALID_SOCKET; + } + } + return s; +} + +ListenSocket* ListenSocket::Listen(std::string ip, int port, + ListenSocketDelegate* del, MessageLoop* l) { + SOCKET s = Listen(ip, port); + if (s == INVALID_SOCKET) { + // TODO(erikkay): error handling + } else { + ListenSocket* sock = new ListenSocket(s, del, l); + sock->Listen(); + return sock; + } + return NULL; +} + +void ListenSocket::Listen() { + DCHECK(MessageLoop::current() == loop_); + int backlog = 10; // TODO(erikkay): maybe don't allow any backlog? + listen(socket_, backlog); + // TODO(erikkay): handle error +} + +SOCKET ListenSocket::Accept(SOCKET s) { + sockaddr_in from; + int from_len = sizeof(from); + SOCKET conn = accept(s, reinterpret_cast<SOCKADDR*>(&from), &from_len); + if (conn != INVALID_SOCKET) { + // a non-blocking socket + unsigned long no_block = 1; + ioctlsocket(conn, FIONBIO, &no_block); + } + return conn; +} + +void ListenSocket::Accept() { + SOCKET conn = Accept(socket_); + if (conn == INVALID_SOCKET) { + // TODO + } else { + scoped_refptr<ListenSocket> sock = + new ListenSocket(conn, socket_delegate_, loop_); + // it's up to the delegate to AddRef if it wants to keep it around + socket_delegate_->DidAccept(this, sock); + } +} + +void ListenSocket::Read() { + char buf[READ_BUF_SIZE+1]; + int len; + do { + len = recv(socket_, buf, READ_BUF_SIZE, 0); + if (len == SOCKET_ERROR) { + int err = WSAGetLastError(); + if (err == WSAEWOULDBLOCK) { + break; + } else { + // TODO - error + break; + } + } else if (len == 0) { + // socket closed, ignore + } else { + // TODO(erikkay): maybe change DidRead to take a length instead + DCHECK(len > 0 && len <= READ_BUF_SIZE); + buf[len] = 0; + socket_delegate_->DidRead(this, buf); + } + } while (len == READ_BUF_SIZE); +} + +void ListenSocket::Close() { + socket_delegate_->DidClose(this); +} + +// MessageLoop watcher callback +void ListenSocket::OnObjectSignaled(HANDLE object) { + WSANETWORKEVENTS ev; + if (SOCKET_ERROR == WSAEnumNetworkEvents(socket_, socket_event_, &ev)) { + // TODO + return; + } + if (ev.lNetworkEvents == 0) { + // Occasionally the event is set even though there is no new data. + // The net seems to think that this is ignorable. + return; + } + if (ev.lNetworkEvents & FD_ACCEPT) { + Accept(); + } + if (ev.lNetworkEvents & FD_READ) { + Read(); + } + if (ev.lNetworkEvents & FD_CLOSE) { + Close(); + } +} + +void ListenSocket::SendInternal(const char* bytes, int len) { + DCHECK(MessageLoop::current() == loop_); + int sent = send(socket_, bytes, len, 0); + if (sent == SOCKET_ERROR) { + // TODO + } else if (sent != len) { + // TODO + } +} + +void ListenSocket::Send(const char* bytes, int len, bool append_linefeed) { + SendInternal(bytes, len); + if (append_linefeed) { + SendInternal("\r\n", 2); + } +} + +void ListenSocket::Send(const std::string& str, bool append_linefeed) { + Send(str.data(), static_cast<int>(str.length()), append_linefeed); +} diff --git a/net/base/listen_socket.h b/net/base/listen_socket.h new file mode 100644 index 0000000..92df390 --- /dev/null +++ b/net/base/listen_socket.h @@ -0,0 +1,99 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// TCP/IP server that handles IO asynchronously in the specified MessageLoop. +// These objects are NOT thread safe. They use WSAEVENT handles to monitor +// activity in a given MessageLoop. This means that callbacks will +// happen in that loop's thread always and that all other methods (including +// constructors and destructors) should also be called from the same thread. + +#ifndef NET_BASE_SOCKET_H__ +#define NET_BASE_SOCKET_H__ + +#include "base/basictypes.h" +#include "base/message_loop.h" +#include "base/ref_counted.h" + +#include <winsock2.h> + +// Implements a raw socket interface +class ListenSocket : public base::RefCountedThreadSafe<ListenSocket>, + public MessageLoop::Watcher { + public: + // TODO(erikkay): this delegate should really be split into two parts + // to split up the listener from the connected socket. Perhaps this class + // should be split up similarly. + class ListenSocketDelegate { + public: + // server is the original listening Socket, connection is the new + // Socket that was created. Ownership of connection is transferred + // to the delegate with this call. + virtual void DidAccept(ListenSocket *server, ListenSocket *connection) = 0; + virtual void DidRead(ListenSocket *connection, const std::string& data) = 0; + virtual void DidClose(ListenSocket *sock) = 0; + }; + + // Listen on port for the specified IP address. Use 127.0.0.1 to only + // accept local connections. + static ListenSocket* Listen(std::string ip, int port, + ListenSocketDelegate* del, + MessageLoop* loop); + virtual ~ListenSocket(); + + // send data to the socket + void Send(const char* bytes, int len, bool append_linefeed = false); + void Send(const std::string& str, bool append_linefeed = false); + + protected: + ListenSocket(SOCKET s, ListenSocketDelegate* del, MessageLoop* loop); + static SOCKET Listen(std::string ip, int port); + // if valid, returned SOCKET is non-blocking + static SOCKET Accept(SOCKET s); + + virtual void SendInternal(const char* bytes, int len); + + // MessageLoop watcher callback + virtual void OnObjectSignaled(HANDLE object); + + virtual void Listen(); + virtual void Accept(); + virtual void Read(); + virtual void Close(); + + SOCKET socket_; + HANDLE socket_event_; + ListenSocketDelegate *socket_delegate_; + MessageLoop* loop_; + + private: + + DISALLOW_EVIL_CONSTRUCTORS(ListenSocket); +}; + +#endif // BASE_SOCKET_H__ diff --git a/net/base/listen_socket_unittest.cc b/net/base/listen_socket_unittest.cc new file mode 100644 index 0000000..f8b2609 --- /dev/null +++ b/net/base/listen_socket_unittest.cc @@ -0,0 +1,67 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Tests ListenSocket. + +#include "net/base/listen_socket_unittest.h" + +namespace { + +class ListenSocketTest: public testing::Test { +public: + ListenSocketTest() { + tester_ = NULL; + } + + virtual void SetUp() { + tester_ = new ListenSocketTester(); + tester_->SetUp(); + } + + virtual void TearDown() { + tester_->TearDown(); + tester_ = NULL; + } + + scoped_refptr<ListenSocketTester> tester_; +}; + +} // namespace + +TEST_F(ListenSocketTest, ClientSend) { + tester_->TestClientSend(); +} + +TEST_F(ListenSocketTest, ClientSendLong) { + tester_->TestClientSendLong(); +} + +TEST_F(ListenSocketTest, ServerSend) { + tester_->TestServerSend(); +} diff --git a/net/base/listen_socket_unittest.h b/net/base/listen_socket_unittest.h new file mode 100644 index 0000000..8160ade --- /dev/null +++ b/net/base/listen_socket_unittest.h @@ -0,0 +1,291 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef LISTEN_SOCKET_UNITTEST_H__ +#define LISTEN_SOCKET_UNITTEST_H__ + +#include <winsock2.h> + +#include <deque> +#include <string> + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/string_util.h" +#include "base/thread.h" +#include "net/base/listen_socket.h" +#include "net/base/winsock_init.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +const int TEST_PORT = 9999; +const std::string HELLO_WORLD("HELLO, WORLD"); +const int MAX_QUEUE_SIZE = 20; + +enum ActionType { + ACTION_NONE = 0, + ACTION_LISTEN = 1, + ACTION_ACCEPT = 2, + ACTION_READ = 3, + ACTION_SEND = 4, + ACTION_CLOSE = 5, +}; + +class ListenSocketTestAction { + public: + ListenSocketTestAction() : action_(ACTION_NONE) {} + explicit ListenSocketTestAction(ActionType action) : action_(action) {} + ListenSocketTestAction(ActionType action, std::string data) + : action_(action), + data_(data) {} + + const std::string data() const { return data_; } + const ActionType type() const { return action_; } + + private: + std::string data_; + ActionType action_; +}; + +// This had to be split out into a separate class because I couldn't +// make a the testing::Test class refcounted. +class ListenSocketTester : + public ListenSocket::ListenSocketDelegate, + public base::RefCountedThreadSafe<ListenSocketTester> { + protected: + virtual ListenSocket* DoListen() { + return ListenSocket::Listen("127.0.0.1", TEST_PORT, this, loop_); + } + + public: + ListenSocketTester() + : server_(NULL), + connection_(NULL), + thread_(NULL), + loop_(NULL) { + } + + virtual ~ListenSocketTester() { + } + + virtual void SetUp() { + InitializeCriticalSection(&lock_); + semaphore_ = CreateSemaphore(NULL, 0, MAX_QUEUE_SIZE, NULL); + server_ = NULL; + WinsockInit::Init(); + + thread_.reset(new Thread("socketio_test")); + thread_->Start(); + loop_ = thread_->message_loop(); + + loop_->PostTask(FROM_HERE, NewRunnableMethod( + this, &ListenSocketTester::Listen)); + + // verify Listen succeeded + ASSERT_TRUE(NextAction()); + ASSERT_FALSE(server_ == NULL); + ASSERT_EQ(ACTION_LISTEN, last_action_.type()); + + // verify the connect/accept and setup test_socket_ + test_socket_ = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + struct sockaddr_in client; + client.sin_family = AF_INET; + client.sin_addr.s_addr = inet_addr("127.0.0.1"); + client.sin_port = htons(TEST_PORT); + int ret = connect(test_socket_, + reinterpret_cast<SOCKADDR*>(&client), sizeof(client)); + ASSERT_NE(ret, SOCKET_ERROR); + // non-blocking socket + unsigned long no_block = 1; + ioctlsocket(test_socket_, FIONBIO, &no_block); + ASSERT_TRUE(NextAction()); + ASSERT_EQ(ACTION_ACCEPT, last_action_.type()); + } + + virtual void TearDown() { + // verify close + closesocket(test_socket_); + ASSERT_TRUE(NextAction(5000)); + ASSERT_EQ(ACTION_CLOSE, last_action_.type()); + + CloseHandle(semaphore_); + semaphore_ = 0; + DeleteCriticalSection(&lock_); + if (connection_) { + loop_->ReleaseSoon(FROM_HERE, connection_); + connection_ = NULL; + } + if (server_) { + loop_->ReleaseSoon(FROM_HERE, server_); + server_ = NULL; + } + thread_.reset(); + loop_ = NULL; + WinsockInit::Cleanup(); + } + + void ReportAction(const ListenSocketTestAction& action) { + EnterCriticalSection(&lock_); + queue_.push_back(action); + LeaveCriticalSection(&lock_); + ReleaseSemaphore(semaphore_, 1, NULL); + } + + bool NextAction(int timeout = 5000) { + DWORD ret = ::WaitForSingleObject(semaphore_, timeout); + if (ret != WAIT_OBJECT_0) + return false; + EnterCriticalSection(&lock_); + if (queue_.size() == 0) + return false; + last_action_ = queue_.front(); + queue_.pop_front(); + LeaveCriticalSection(&lock_); + return true; + } + + // read all pending data from the test socket + int ClearTestSocket() { + char buf[1024]; + int len = 0; + do { + int ret = recv(test_socket_, buf, 1024, 0); + if (ret < 0) { + int err = WSAGetLastError(); + if (err == WSAEWOULDBLOCK) { + break; + } + } else { + len += ret; + } + } while (true); + return len; + } + + void Listen() { + server_ = DoListen(); + if (server_) { + server_->AddRef(); + ReportAction(ListenSocketTestAction(ACTION_LISTEN)); + } + } + + void SendFromTester() { + connection_->Send(HELLO_WORLD); + ReportAction(ListenSocketTestAction(ACTION_SEND)); + } + + virtual void DidAccept(ListenSocket *server, ListenSocket *connection) { + connection_ = connection; + connection_->AddRef(); + ReportAction(ListenSocketTestAction(ACTION_ACCEPT)); + } + + virtual void DidRead(ListenSocket *connection, const std::string& data) { + ReportAction(ListenSocketTestAction(ACTION_READ, data)); + } + + virtual void DidClose(ListenSocket *sock) { + ReportAction(ListenSocketTestAction(ACTION_CLOSE)); + } + + virtual bool Send(SOCKET sock, const std::string& str) { + int len = static_cast<int>(str.length()); + int send_len = send(sock, str.data(), len, 0); + if (send_len != len) { + return false; + } + return true; + } + + // verify the send/read from client to server + void TestClientSend() { + ASSERT_TRUE(Send(test_socket_, HELLO_WORLD)); + ASSERT_TRUE(NextAction()); + ASSERT_EQ(ACTION_READ, last_action_.type()); + ASSERT_EQ(last_action_.data(), HELLO_WORLD); + } + + // verify send/read of a longer string + void TestClientSendLong() { + int hello_len = static_cast<int>(HELLO_WORLD.length()); + std::string long_string; + int long_len = 0; + for (int i = 0; i < 200; i++) { + long_string += HELLO_WORLD; + long_len += hello_len; + } + ASSERT_TRUE(Send(test_socket_, long_string)); + int read_len = 0; + while (read_len < long_len) { + ASSERT_TRUE(NextAction()); + ASSERT_EQ(ACTION_READ, last_action_.type()); + std::string last_data = last_action_.data(); + size_t len = last_data.length(); + if (long_string.compare(read_len, len, last_data)) { + ASSERT_EQ(long_string.compare(read_len, len, last_data), 0); + } + read_len += static_cast<int>(last_data.length()); + } + ASSERT_EQ(read_len, long_len); + } + + // verify a send/read from server to client + void TestServerSend() { + loop_->PostTask(FROM_HERE, NewRunnableMethod( + this, &ListenSocketTester::SendFromTester)); + ASSERT_TRUE(NextAction()); + ASSERT_EQ(ACTION_SEND, last_action_.type()); + // TODO(erikkay): Without this sleep, the recv seems to fail a small amount + // of the time. I could fix this by making the socket blocking, but then + // this test might hang in the case of errors. It would be nice to do + // something that felt more reliable here. + Sleep(10); + const int buf_len = 200; + char buf[buf_len+1]; + int recv_len = recv(test_socket_, buf, buf_len, 0); + buf[recv_len] = 0; + ASSERT_EQ(buf, HELLO_WORLD); + } + + scoped_ptr<Thread> thread_; + MessageLoop* loop_; + ListenSocket* server_; + ListenSocket* connection_; + CRITICAL_SECTION lock_; + HANDLE semaphore_; + ListenSocketTestAction last_action_; + std::deque<ListenSocketTestAction> queue_; + SOCKET test_socket_; +}; + +} // namespace + +#endif // LISTEN_SOCKET_UNITTEST_H__ diff --git a/net/base/load_flags.h b/net/base/load_flags.h new file mode 100644 index 0000000..9a33b37 --- /dev/null +++ b/net/base/load_flags.h @@ -0,0 +1,88 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_LOAD_FLAGS_H__ +#define NET_BASE_LOAD_FLAGS_H__ + +namespace net { + +// These flags provide metadata about the type of the load request. They are +// intended to be OR'd together. +enum { + LOAD_NORMAL = 0, + + // This is "normal reload", meaning an if-none-match/if-modified-since query + LOAD_VALIDATE_CACHE = 1 << 0, + + // This is "shift-reload", meaning a "pragma: no-cache" end-to-end fetch + LOAD_BYPASS_CACHE = 1 << 1, + + // This is a back/forward style navigation where the cached content should + // be preferred over any protocol specific cache validation. + LOAD_PREFERRING_CACHE = 1 << 2, + + // This is a navigation that will fail if it cannot serve the requested + // resource from the cache (or some equivalent local store). + LOAD_ONLY_FROM_CACHE = 1 << 3, + + // This is a navigation that will not use the cache at all. It does not + // impact the HTTP request headers. + LOAD_DISABLE_CACHE = 1 << 4, + + // This is a navigation that will not be intercepted by any registered + // URLRequest::Interceptors. + LOAD_DISABLE_INTERCEPT = 1 << 5, + + // If present, upload progress messages should be provided to initiator. + LOAD_ENABLE_UPLOAD_PROGRESS = 1 << 6, + + // If present, ignores certificate mismatches with the domain name. + // (The default behavior is to trigger an OnSSLCertificateError callback.) + LOAD_IGNORE_CERT_COMMON_NAME_INVALID = 1 << 8, + + // If present, ignores certificate expiration dates + // (The default behavior is to trigger an OnSSLCertificateError callback). + LOAD_IGNORE_CERT_DATE_INVALID = 1 << 9, + + // If present, trusts all certificate authorities + // (The default behavior is to trigger an OnSSLCertificateError callback). + LOAD_IGNORE_CERT_AUTHORITY_INVALID = 1 << 10, + + // If present, ignores certificate revocation + // (The default behavior is to trigger an OnSSLCertificateError callback). + LOAD_IGNORE_CERT_REVOCATION = 1 << 11, + + // If present, ignores wrong key usage of the certificate + // (The default behavior is to trigger an OnSSLCertificateError callback). + LOAD_IGNORE_CERT_WRONG_USAGE = 1 << 12, +}; + +} // namespace net + +#endif // NET_BASE_LOAD_FLAGS_H__ diff --git a/net/base/load_states.h b/net/base/load_states.h new file mode 100644 index 0000000..719026e --- /dev/null +++ b/net/base/load_states.h @@ -0,0 +1,90 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_LOAD_STATES_H__ +#define NET_BASE_LOAD_STATES_H__ + +namespace net { + +// These states correspond to the lengthy periods of time that a resource load +// may be blocked and unable to make progress. +enum LoadState { + // This is the default state. It corresponds to a resource load that has + // either not yet begun or is idle waiting for the consumer to do something + // to move things along (e.g., the consumer of an URLRequest may not have + // called Read yet). + LOAD_STATE_IDLE, + + // This state corresponds to a resource load that is blocked waiting for + // access to a resource in the cache. If multiple requests are made for the + // same resource, the first request will be responsible for writing (or + // updating) the cache entry and the second request will be deferred until + // the first completes. This may be done to optimize for cache reuse. + LOAD_STATE_WAITING_FOR_CACHE, + + // This state corresponds to a resource load that is blocked waiting for a + // proxy autoconfig script to return a proxy server to use. This state may + // take a while if the proxy script needs to resolve the IP address of the + // host before deciding what proxy to use. + LOAD_STATE_RESOLVING_PROXY_FOR_URL, + + // This state corresponds to a resource load that is blocked waiting for a + // host name to be resolved. This could either indicate resolution of the + // origin server corresponding to the resource or to the host name of a proxy + // server used to fetch the resource. + LOAD_STATE_RESOLVING_HOST, + + // This state corresponds to a resource load that is blocked waiting for a + // TCP connection (or other network connection) to be established. HTTP + // requests that reuse a keep-alive connection skip this state. + LOAD_STATE_CONNECTING, + + // This state corresponds to a resource load that is blocked waiting to + // completely upload a request to a server. In the case of a HTTP POST + // request, this state includes the period of time during which the message + // body is being uploaded. + LOAD_STATE_SENDING_REQUEST, + + // This state corresponds to a resource load that is blocked waiting for the + // response to a network request. In the case of a HTTP transaction, this + // corresponds to the period after the request is sent and before all of the + // response headers have been received. + LOAD_STATE_WAITING_FOR_RESPONSE, + + // This state corresponds to a resource load that is blocked waiting for a + // read to complete. In the case of a HTTP transaction, this corresponds to + // the period after the response headers have been received and before all of + // the response body has been downloaded. (NOTE: This state only applies for + // an URLRequest while there is an outstanding Read operation.) + LOAD_STATE_READING_RESPONSE, +}; + +} // namespace net + +#endif // NET_BASE_LOAD_STATES_H__ diff --git a/net/base/mime_sniffer.cc b/net/base/mime_sniffer.cc new file mode 100644 index 0000000..12aec1d --- /dev/null +++ b/net/base/mime_sniffer.cc @@ -0,0 +1,623 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Detecting mime types is a tricky business because we need to balance +// compatibility concerns with security issues. Here is a survey of how other +// browsers behave and then a description of how we intend to behave. +// +// HTML payload, no Content-Type header: +// * IE 7: Render as HTML +// * Firefox 2: Render as HTML +// * Safari 3: Render as HTML +// * Opera 9: Render as HTML +// +// Here the choice seems clear: +// => Chrome: Render as HTML +// +// HTML payload, Content-Type: "text/plain": +// * IE 7: Render as HTML +// * Firefox 2: Render as text +// * Safari 3: Render as text (Note: Safari will Render as HTML if the URL +// has an HTML extension) +// * Opera 9: Render as text +// +// Here we choose to follow the majority (and break some compatibility with IE). +// Many folks dislike IE's behavior here. +// => Chrome: Render as text +// We generalize this as follows. If the Content-Type header is text/plain +// we won't detect dangerous mime types (those that can execute script). +// +// HTML payload, Content-Type: "application/octet-stream": +// * IE 7: Render as HTML +// * Firefox 2: Download as application/octet-stream +// * Safari 3: Render as HTML +// * Opera 9: Render as HTML +// +// We follow Firefox. +// => Chrome: Download as application/octet-stream +// One factor in this decision is that IIS 4 and 5 will send +// application/octet-stream for .xhtml files (because they don't recognize +// the extension). We did some experiments and it looks like this doesn't occur +// very often on the web. We choose the more secure option. +// +// GIF payload, no Content-Type header: +// * IE 7: Render as GIF +// * Firefox 2: Render as GIF +// * Safari 3: Download as Unknown (Note: Safari will Render as GIF if the +// URL has an GIF extension) +// * Opera 9: Render as GIF +// +// The choice is clear. +// => Chrome: Render as GIF +// Once we decide to render HTML without a Content-Type header, there isn't much +// reason not to render GIFs. +// +// GIF payload, Content-Type: "text/plain": +// * IE 7: Render as GIF +// * Firefox 2: Download as application/octet-stream (Note: Firefox will +// Download as GIF if the URL has an GIF extension) +// * Safari 3: Download as Unknown (Note: Safari will Render as GIF if the +// URL has an GIF extension) +// * Opera 9: Render as GIF +// +// Displaying as text/plain makes little sense as the content will look like +// gibberish. Here, we could change our minds and download. +// => Chrome: Render as GIF +// +// GIF payload, Content-Type: "application/octet-stream": +// * IE 7: Render as GIF +// * Firefox 2: Download as application/octet-stream (Note: Firefox will +// Download as GIF if the URL has an GIF extension) +// * Safari 3: Download as Unknown (Note: Safari will Render as GIF if the +// URL has an GIF extension) +// * Opera 9: Render as GIF +// +// Given our previous decisions, this decision is more or less clear. +// => Chrome: Render as GIF +// +// XHTML payload, Content-Type: "text/xml": +// * IE 7: Render as XML +// * Firefox 2: Render as HTML +// * Safari 3: Render as HTML +// * Opera 9: Render as HTML +// The layout tests rely on us rendering this as HTML. +// But we're conservative in XHTML detection, as this runs afoul of the +// "don't detect dangerous mime types" rule. +// +// Note that our definition of HTML payload is much stricter than IE's +// definition and roughly the same as Firefox's definition. + +#include <string> + +#include "net/base/mime_sniffer.h" + +#include "base/basictypes.h" +#include "base/histogram.h" +#include "base/logging.h" +#include "base/registry.h" +#include "base/string_util.h" +#include "googleurl/src/gurl.h" +#include "net/base/mime_util.h" + +namespace { + +class SnifferHistogram : public LinearHistogram { + public: + SnifferHistogram(const wchar_t* name, int array_size) + : LinearHistogram(name, 0, array_size - 1, array_size) { + SetFlags(kUmaTargetedHistogramFlag); + } +}; + +} // namespace + +namespace mime_util { + +// We aren't interested in looking at more than 512 bytes of content +static const size_t kMaxBytesToSniff = 512; + +// The number of content bytes we need to use all our magic numbers. Feel free +// to increase this number if you add a longer magic number. +static const size_t kBytesRequiredForMagic = 42; + +struct MagicNumber { + const char* mime_type; + const char* magic; + size_t magic_len; + bool is_string; +}; + +#define MAGIC_NUMBER(mime_type, magic) \ + { (mime_type), (magic), sizeof(magic)-1, false }, + +// Magic strings are case insensitive and must not include '\0' characters +#define MAGIC_STRING(mime_type, magic) \ + { (mime_type), (magic), sizeof(magic)-1, true }, + +static const MagicNumber kMagicNumbers[] = { + // Source: HTML 5 specification + MAGIC_NUMBER("application/pdf", "%PDF-") + MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") + MAGIC_NUMBER("image/gif", "GIF87a") + MAGIC_NUMBER("image/gif", "GIF89a") + MAGIC_NUMBER("image/png", "\x89" "PNG\x0D\x0A\x1A\x0A") + MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") + MAGIC_NUMBER("image/bmp", "BM") + // Source: Mozilla + MAGIC_NUMBER("application/postscript", "%! PS-Adobe-") + // Mozilla uses "\x4a47????00" for image/x-jg, but we use stronger pattern + MAGIC_NUMBER("image/x-icon", "\x00\x00\x10\x00") + MAGIC_NUMBER("image/x-icon", "\x00\x00\x20\x00") + MAGIC_NUMBER("image/x-xbitmap", "#define ") + MAGIC_NUMBER("text/plain", "#!") // Script + MAGIC_NUMBER("text/plain", "%!") // Script, similar to PS + MAGIC_NUMBER("text/plain", "From") + MAGIC_NUMBER("text/plain", ">From") + // Chrome specific + MAGIC_NUMBER("image/x-rgb", "\x01\xDA\x01\x01\x00\x03") + MAGIC_NUMBER("application/x-gzip", "\x1F\x8B\x08") + MAGIC_NUMBER("application/x-compress", "\x1F\x9D\x90") // tar.Z + MAGIC_NUMBER("audio/x-pn-realaudio", "\x2E\x52\x4D\x46") + MAGIC_NUMBER("video/x-ms-asf", + "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C") + MAGIC_NUMBER("application/winhlp", "?_\x03") + MAGIC_NUMBER("application/winhlp", "LN\x02\x00") + MAGIC_NUMBER("application/x-bzip2", "BZ") + MAGIC_NUMBER("image/tiff", "I I") + MAGIC_NUMBER("image/tiff", "II*") + MAGIC_NUMBER("image/tiff", "MM\x00*") + MAGIC_NUMBER("audio/mpeg", "ID3") + // TODO(abarth): we don't handle partial byte matches yet + // MAGIC_NUMBER("video/mpeg", "\x00\x00\x01\xB") + // MAGIC_NUMBER("audio/mpeg", "\xFF\xE") + // MAGIC_NUMBER("audio/mpeg", "\xFF\xF") + MAGIC_NUMBER("image/x-jg", "\x4A\x47\x03\x0E\x00\x00\x00") + MAGIC_NUMBER("image/x-jg", "\x4A\x47\x04\x0E\x00\x00\x00") + MAGIC_NUMBER("image/x-portable-graymap", "P4\x0A") + MAGIC_NUMBER("application/zip", "PK\x03\x04") + MAGIC_NUMBER("application/x-rar-compressed", "Rar!\x1A\x07\x00") + MAGIC_NUMBER("application/rtf", "{\\rtf1") + MAGIC_NUMBER("application/postscript", "\xC5\xD0\xD3\xC6") + MAGIC_NUMBER("application/x-msmetafile", "\xD7\xCD\xC6\x9A") + MAGIC_NUMBER("application/octet-stream", "\x7F" "ELF") // ELF + MAGIC_NUMBER("application/octet-stream", "\xE8") // COM, SYS + MAGIC_NUMBER("application/octet-stream", "\xE9") // COM, SYS + MAGIC_NUMBER("application/octet-stream", "\xEB") // COM, SYS + MAGIC_NUMBER("application/octet-stream", "MZ") // EXE + // Sniffing for Flash: + // + // MAGIC_NUMBER("application/x-shockwave-flash", "CWS") + // MAGIC_NUMBER("application/x-shockwave-flash", "FLV") + // MAGIC_NUMBER("application/x-shockwave-flash", "FWS") + // + // Including these magic number for Flash is a trade off. + // + // Pros: + // * Flash is an important and popular file format + // + // Cons: + // * These patterns are fairly weak + // * If we mistakenly decide something is Flash, we will execute it + // in the origin of an unsuspecting site. This could be a security + // vulnerability if the site allows users to upload content. + // + // On balance, we do not include these patterns. +}; + +// Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will +// decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is +// HTML, but we will not. + +#define MAGIC_HTML_TAG(tag) \ + MAGIC_STRING("text/html", "<" tag) + +static const MagicNumber kSniffableTags[] = { + // XML processing directive. Although this is not an HTML mime type, we sniff + // for this in the HTML phase because text/xml is just as powerful as HTML and + // we want to leverage our white space skipping technology. + MAGIC_NUMBER("text/xml", "<?xml") // Mozilla + // DOCTYPEs + MAGIC_HTML_TAG("!DOCTYPE html") // HTML5 spec + // Sniffable tags, ordered by how often they occur in web documents with a + // sniffable mime type (as measured in 2007). + MAGIC_HTML_TAG("html") // HTML5 spec, Mozilla + MAGIC_HTML_TAG("head") // HTML5 spec, Mozilla + MAGIC_HTML_TAG("script") // HTML5 spec, Mozilla + MAGIC_HTML_TAG("tr") + MAGIC_HTML_TAG("link") // Mozilla + MAGIC_HTML_TAG("meta") // Mozilla + MAGIC_HTML_TAG("title") // Mozilla + MAGIC_HTML_TAG("pre") // Mozilla + MAGIC_HTML_TAG("table") // Mozilla + MAGIC_HTML_TAG("basefont") + // Not HTML: "xml" + MAGIC_HTML_TAG("p") // Mozilla + MAGIC_HTML_TAG("div") // Mozilla + MAGIC_HTML_TAG("base") // Mozilla + // Not HTML: "metadata" + MAGIC_HTML_TAG("body") // Mozilla + // Not HTML: "asx" + MAGIC_HTML_TAG("frameset") // Mozilla + // Not HTML: "sami" + MAGIC_HTML_TAG("a") // Mozilla + MAGIC_HTML_TAG("style") // Mozilla + // Not HTML: "rss" + MAGIC_HTML_TAG("br") + MAGIC_HTML_TAG("center") // Mozilla + MAGIC_HTML_TAG("b") // Mozilla + MAGIC_HTML_TAG("iframe") // Mozilla + MAGIC_HTML_TAG("img") // Mozilla + MAGIC_HTML_TAG("h1") // Mozilla + MAGIC_HTML_TAG("td") + // Not HTML: "printer" + MAGIC_HTML_TAG("font") // Mozilla + // Not HTML: "htlm" + MAGIC_HTML_TAG("form") // Mozilla + // Not HTML: "master" + MAGIC_HTML_TAG("h3") // Mozilla + MAGIC_HTML_TAG("h2") // Mozilla + // Plus a long tail, but we need to stop somewhere. + // + // We also include all the other tags that Mozilla sniffs: + MAGIC_HTML_TAG("!--") + MAGIC_HTML_TAG("applet") + MAGIC_HTML_TAG("isindex") + MAGIC_HTML_TAG("h4") + MAGIC_HTML_TAG("h5") + MAGIC_HTML_TAG("h6") +}; + +static bool MatchMagicNumber(const char* content, size_t size, + const MagicNumber* magic_entry, + std::string* result) { + const size_t len = magic_entry->magic_len; + + // Keep kBytesRequiredForMagic honest. + DCHECK(len <= kBytesRequiredForMagic); + + // To compare with magic strings, we need to compute strlen(content), but + // content might not actually have a null terminator. In that case, we + // pretend the length is content_size. + const char* end = + static_cast<const char*>(memchr(content, '\0', size)); + const size_t content_strlen = (end != NULL) ? (end - content) : size; + + bool match = false; + if (magic_entry->is_string) { + if (content_strlen >= len) { + // String comparisons are case-insensitive + match = (_strnicmp(magic_entry->magic, content, len) == 0); + } + } else { + if (size >= len) + match = (memcmp(magic_entry->magic, content, len) == 0); + } + + if (match) { + result->assign(magic_entry->mime_type); + return true; + } + return false; +} + +static bool CheckForMagicNumbers(const char* content, size_t size, + const MagicNumber* magic, size_t magic_len, + Histogram* counter, std::string* result) { + for (size_t i = 0; i < magic_len; ++i) { + if (MatchMagicNumber(content, size, &(magic[i]), result)) { + counter->Add(static_cast<int>(i)); + return true; + } + } + return false; +} + +static bool SniffForHTML(const char* content, size_t size, + std::string* result) { + // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, + // but with some modifications to better match the HTML5 spec. + const char* const end = content + size; + const char* pos; + for (pos = content; pos < end; ++pos) { + if (!IsAsciiWhitespace(*pos)) + break; + } + static SnifferHistogram counter(L"mime_sniffer.kSniffableTags", + arraysize(kSniffableTags)); + // |pos| now points to first non-whitespace character (or at end). + return CheckForMagicNumbers(pos, end - pos, + kSniffableTags, arraysize(kSniffableTags), + &counter, result); +} + +static bool SniffForMagicNumbers(const char* content, size_t size, + std::string* result) { + // Check our big table of Magic Numbers + static SnifferHistogram counter(L"mime_sniffer.kMagicNumbers", + arraysize(kMagicNumbers)); + return CheckForMagicNumbers(content, size, + kMagicNumbers, arraysize(kMagicNumbers), + &counter, result); +} + +// Byte order marks +static const MagicNumber kMagicXML[] = { + // We want to be very conservative in interpreting text/xml content as + // XHTML -- we just want to sniff enough to make unit tests pass. + // So we match explicitly on this, and don't match other ways of writing + // it in semantically-equivalent ways. + MAGIC_STRING("application/xhtml+xml", + "<html xmlns=\"http://www.w3.org/1999/xhtml\"") + MAGIC_STRING("application/atom+xml", "<feed") + MAGIC_STRING("application/rss+xml", "<rss") // UTF-8 +}; + +// Sniff an XML document to judge whether it contains XHTML or a feed. +// Returns true if it has seen enough content to make a definitive decision. +// TODO(evanm): this is similar but more conservative than what Safari does, +// while HTML5 has a different recommendation -- what should we do? +// TODO(evanm): this is incorrect for documents whose encoding isn't a superset +// of ASCII -- do we care? +static bool SniffXML(const char* content, size_t size, std::string* result) { + // We allow at most kFirstTagBytes bytes of content before we expect the + // opening tag. + const size_t kFeedAllowedHeaderBytes = 300; + const char* const end = content + std::min(size, kFeedAllowedHeaderBytes); + const char* pos = content; + + // This loop iterates through tag-looking offsets in the file. + // We want to skip XML processing instructions (of the form "<?xml ...") + // and stop at the first "plain" tag, then make a decision on the mime-type + // based on the name (or possibly attributes) of that tag. + static SnifferHistogram counter(L"mime_sniffer.kMagicXML", + arraysize(kMagicXML)); + const int kMaxTagIterations = 5; + for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { + pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); + if (!pos) + return false; + + if (_strnicmp(pos, "<?xml", sizeof("<?xml")-1) == 0) { + // Skip XML declarations. + ++pos; + continue; + } else if (_strnicmp(pos, "<!DOCTYPE", sizeof("<!DOCTYPE")-1) == 0) { + // Skip DOCTYPE declarations. + ++pos; + continue; + } + + if (CheckForMagicNumbers(pos, end - pos, + kMagicXML, arraysize(kMagicXML), + &counter, result)) + return true; + + // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult + // to identify. + + // If we get here, we've hit an initial tag that hasn't matched one of the + // above tests. Abort. + return true; + } + + // We iterated too far without finding a start tag. + // If we have more content to look at, we aren't going to change our mind by + // seeing more bytes from the network. + return pos < end; +} + +// Byte order marks +static const MagicNumber kByteOrderMark[] = { + MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE + MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE + MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 + MAGIC_NUMBER("text/plain", "\x00\x00\xFE\xFF") // UCS-4BE +}; + +// Whether a given byte looks like it might be part of binary content. +// Source: HTML5 spec +static char kByteLooksBinary[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, // 0x00 - 0x0F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF +}; + +static bool LooksBinary(const char* content, size_t size) { + // First, we look for a BOM. + static SnifferHistogram counter(L"mime_sniffer.kByteOrderMark", + arraysize(kByteOrderMark)); + std::string unused; + if (CheckForMagicNumbers(content, size, + kByteOrderMark, arraysize(kByteOrderMark), + &counter, &unused)) { + // If there is BOM, we think the buffer is not binary. + return false; + } + + // Next we look to see if any of the bytes "look binary." + for (size_t i = 0; i < size; ++i) { + // If we a see a binary-looking byte, we think the content is binary. + if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) + return true; + } + + // No evidence either way, default to non-binary. + return false; +} + +static bool IsUnknownMimeType(const std::string& mime_type) { + // TODO(tc): Maybe reuse some code in net/http/http_response_headers.* here. + static const char* kUnknownMimeTypes[] = { + // Empty mime types are as unknown as they get. + "", + // The unknown/unknown type is popular and uninformative + "unknown/unknown", + // The second most popular unknown mime type is application/unknown + "application/unknown", + // Firefox rejects a mime type if it is exactly */* + "*/*", + }; + static SnifferHistogram counter(L"mime_sniffer.kUnknownMimeTypes", + arraysize(kUnknownMimeTypes) + 1); + for (int i = 0; i < arraysize(kUnknownMimeTypes); ++i) { + if (mime_type == kUnknownMimeTypes[i]) { + counter.Add(i); + return true; + } + } + if (mime_type.find('/') == std::string::npos) { + // Firefox rejects a mime type if it does not contain a slash + counter.Add(arraysize(kUnknownMimeTypes)); + return true; + } + return false; +} + +bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { + // We are willing to sniff the mime type for HTTP, HTTPS, and FTP + bool sniffable_scheme = url.is_empty() || + url.SchemeIs("http") || + url.SchemeIs("https") || + url.SchemeIs("ftp"); + if (!sniffable_scheme) + return false; + + static const char* kSniffableTypes[] = { + // Many web servers are misconfigured to send text/plain for many + // different types of content. + "text/plain", + // IIS 4.0 and 5.0 send application/octet-stream when serving .xhtml + // files. Firefox 2.0 does not sniff xhtml here, but Safari 3, + // Opera 9, and IE do. + "application/octet-stream", + // XHTML and Atom/RSS feeds are often served as plain xml instead of + // their more specific mime types. + "text/xml", + "application/xml", + }; + static SnifferHistogram counter(L"mime_sniffer.kSniffableTypes", + arraysize(kSniffableTypes) + 1); + for (int i = 0; i < arraysize(kSniffableTypes); ++i) { + if (mime_type == kSniffableTypes[i]) { + counter.Add(i); + return true; + } + } + if (IsUnknownMimeType(mime_type)) { + // The web server didn't specify a content type or specified a mime + // type that we ignore. + counter.Add(arraysize(kSniffableTypes)); + return true; + } + return false; +} + +bool SniffMimeType(const char* content, size_t content_size, + const GURL& url, const std::string& type_hint, + std::string* result) { + DCHECK_LT(content_size, 1000000U); // sanity check + DCHECK(content); + DCHECK(result); + + // By default, we'll return the type hint. + result->assign(type_hint); + + // Flag for tracking whether our decision was limited by content_size. We + // probably have enough content if we can use all our magic numbers. + const bool have_enough_content = content_size >= kBytesRequiredForMagic; + + // We have an upper limit on the number of bytes we will consider. + if (content_size > kMaxBytesToSniff) + content_size = kMaxBytesToSniff; + + // Cache information about the type_hint + const bool hint_is_unknown_mime_type = IsUnknownMimeType(type_hint); + + // First check for HTML + if (hint_is_unknown_mime_type) { + // We're only willing to sniff HTML if the server has not supplied a mime + // type, or if the type it did supply indicates that it doesn't know what + // the type should be. + if (SniffForHTML(content, content_size, result)) + return true; // We succeeded in sniffing HTML. No more content needed. + } + + // We'll reuse this information later + const bool hint_is_text_plain = (type_hint == "text/plain"); + const bool looks_binary = LooksBinary(content, content_size); + + if (hint_is_text_plain && !looks_binary) { + // The server said the content was text/plain and we don't really have any + // evidence otherwise. + result->assign("text/plain"); + return have_enough_content; + } + + // If we have plain XML, sniff XML subtypes. + if (type_hint == "text/xml" || type_hint == "application/xml") { + // We're not interested in sniffing these types for images and the like. + // Instead, we're looking explicitly for a feed. If we don't find one we're + // done and return early. + return SniffXML(content, content_size, result); + } + + // Now we look in our large table of magic numbers to see if we can find + // anything that matches the content. + if (SniffForMagicNumbers(content, content_size, result)) + return true; // We've matched a magic number. No more content needed. + + // Having failed thus far, we're willing to override unknown mime types and + // text/plain. + if (hint_is_unknown_mime_type || hint_is_text_plain) { + if (looks_binary) + result->assign("application/octet-stream"); + else + result->assign("text/plain"); + // We could change our mind if a binary-looking byte appears later in + // the content, so we only have enough content if we have the max. + return content_size >= kMaxBytesToSniff; + } + + return have_enough_content; +} + +} // namespace mime_util diff --git a/net/base/mime_sniffer.h b/net/base/mime_sniffer.h new file mode 100644 index 0000000..352343c --- /dev/null +++ b/net/base/mime_sniffer.h @@ -0,0 +1,62 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_MIME_SNIFFER_H__ +#define NET_BASE_MIME_SNIFFER_H__ + +#include <string> + +class GURL; + +namespace mime_util { + +// Examine the URL and the mime_type and decide whether we should sniff a +// replacement mime type from the content. +// +// @param url The URL from which we obtained the content. +// @param mime_type The current mime type, e.g. from the Content-Type header. +// @return Returns true if we should sniff the mime type. +bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type); + +// Guess a mime type from the first few bytes of content an its URL. Always +// assigns |result| with its best guess of a mime type. +// +// @param content A buffer containing the bytes to sniff. +// @param content_size The number of bytes in the |content| buffer. +// @param url The URL from which we obtained this content. +// @param type_hint The current mime type, e.g. from the Content-Type header. +// @param result Address at which to place the sniffed mime type. +// @return Returns true if we have enough content to guess the mime type. +bool SniffMimeType(const char* content, size_t content_size, + const GURL& url, const std::string& type_hint, + std::string* result); + +} // namespace mime_util + +#endif // NET_BASE_MIME_SNIFFER_H__ diff --git a/net/base/mime_sniffer_unittest.cc b/net/base/mime_sniffer_unittest.cc new file mode 100644 index 0000000..0d1011f --- /dev/null +++ b/net/base/mime_sniffer_unittest.cc @@ -0,0 +1,324 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/basictypes.h" +#include "googleurl/src/gurl.h" +#include "net/base/mime_sniffer.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + class MimeSnifferTest : public testing::Test { + }; +} + +struct SnifferTest { + const char* content; + size_t content_len; + std::string url; + std::string type_hint; + const char* mime_type; +}; + +static void TestArray(SnifferTest* tests, size_t count) { + std::string mime_type; + + for (size_t i = 0; i < count; ++i) { + mime_util::SniffMimeType(tests[i].content, + tests[i].content_len, + GURL(tests[i].url), + tests[i].type_hint, + &mime_type); + EXPECT_EQ(tests[i].mime_type, mime_type); + } +} + +// TODO(evanm): convert other tests to use SniffMimeType instead of TestArray, +// so the error messages produced by test failures are more useful. +static std::string SniffMimeType(const std::string& content, + const std::string& url, + const std::string& mime_type_hint) { + std::string mime_type; + mime_util::SniffMimeType(content.data(), content.size(), GURL(url), + mime_type_hint, &mime_type); + return mime_type; +} + +TEST(MimeSnifferTest, BoundaryConditionsTest) { + std::string mime_type; + std::string type_hint; + + char buf[] = { + 'd', '\x1f', '\xFF' + }; + + GURL url; + + mime_util::SniffMimeType(buf, 0, url, type_hint, &mime_type); + EXPECT_EQ("text/plain", mime_type); + mime_util::SniffMimeType(buf, 1, url, type_hint, &mime_type); + EXPECT_EQ("text/plain", mime_type); + mime_util::SniffMimeType(buf, 2, url, type_hint, &mime_type); + EXPECT_EQ("application/octet-stream", mime_type); +} + +TEST(MimeSnifferTest, BasicSniffingTest) { + SnifferTest tests[] = { + { "<!DOCTYPE html PUBLIC", sizeof("<!DOCTYPE html PUBLIC")-1, + "http://www.example.com/", + "", "text/html" }, + { "<HtMl><Body></body></htMl>", sizeof("<HtMl><Body></body></htMl>")-1, + "http://www.example.com/foo.gif", + "application/octet-stream", "application/octet-stream" }, + { "GIF89a\x1F\x83\x94", sizeof("GIF89a\xAF\x83\x94")-1, + "http://www.example.com/foo", + "text/plain", "image/gif" }, + { "Gif87a\x1F\x83\x94", sizeof("Gif87a\xAF\x83\x94")-1, + "http://www.example.com/foo?param=tt.gif", + "", "application/octet-stream" }, + { "%!PS-Adobe-3.0", sizeof("%!PS-Adobe-3.0")-1, + "http://www.example.com/foo", + "text/plain", "text/plain" }, + { "\x89" "PNG\x0D\x0A\x1A\x0A", sizeof("\x89" "PNG\x0D\x0A\x1A\x0A")-1, + "http://www.example.com/foo", + "application/octet-stream", "image/png" }, + { "\xFF\xD8\xFF\x23\x49\xAF", sizeof("\xFF\xD8\xFF\x23\x49\xAF")-1, + "http://www.example.com/foo", + "", "image/jpeg" }, + }; + + TestArray(tests, arraysize(tests)); +} + +TEST(MimeSnifferTest, MozillaCompatibleTest) { + SnifferTest tests[] = { + { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1, + "http://www.example.com/", + "", "text/html" }, + { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1, + "http://www.example.com/", + "text/plain", "text/plain" }, + { "BMjlakdsfk", sizeof("BMjlakdsfk")-1, + "http://www.example.com/foo", + "", "image/bmp" }, + { "\x00\x00\x20\x00", sizeof("\x00\x00\x30\x00")-1, + "http://www.example.com/favicon", + "", "image/x-icon" }, + { "\x00\x00\x30\x00", sizeof("\x00\x00\x30\x00")-1, + "http://www.example.com/favicon.ico", + "", "application/octet-stream" }, + { "#!/bin/sh\nls /\n", sizeof("#!/bin/sh\nls /\n")-1, + "http://www.example.com/foo", + "", "text/plain" }, + { "From: Fred\nTo: Bob\n\nHi\n.\n", + sizeof("From: Fred\nTo: Bob\n\nHi\n.\n")-1, + "http://www.example.com/foo", + "", "text/plain" }, + { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", + sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1, + "http://www.example.com/foo", + "", "text/xml" }, + { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", + sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1, + "http://www.example.com/foo", + "application/octet-stream", "application/octet-stream" }, + }; + + TestArray(tests, arraysize(tests)); +} + +TEST(MimeSnifferTest, DontAllowPrivilegeEscalationTest) { + SnifferTest tests[] = { + { "GIF87a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n", + sizeof("GIF87a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n")-1, + "http://www.example.com/foo", + "", "image/gif" }, + { "GIF87a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n", + sizeof("GIF87a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n")-1, + "http://www.example.com/foo?q=ttt.html", + "", "image/gif" }, + { "GIF87a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n", + sizeof("GIF87a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n")-1, + "http://www.example.com/foo#ttt.html", + "", "image/gif" }, + { "a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n", + sizeof("a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n")-1, + "http://www.example.com/foo", + "", "text/plain" }, + { "a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n", + sizeof("a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n")-1, + "http://www.example.com/foo?q=ttt.html", + "", "text/plain" }, + { "a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n", + sizeof("a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n")-1, + "http://www.example.com/foo#ttt.html", + "", "text/plain" }, + { "a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n", + sizeof("a\n<html>\n<body>" + "<script>alert('haxorzed');\n</script>" + "</body></html>\n")-1, + "http://www.example.com/foo.html", + "", "text/plain" }, + }; + + TestArray(tests, arraysize(tests)); +} + +TEST(MimeSnifferTest, UnicodeTest) { + SnifferTest tests[] = { + { "\xEF\xBB\xBF" "Hi there", sizeof("\xEF\xBB\xBF" "Hi there")-1, + "http://www.example.com/foo", + "", "text/plain" }, + { "\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79", + sizeof("\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79")-1, + "http://www.example.com/foo", + "", "text/plain" }, + { "\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9", + sizeof("\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9")-1, + "http://www.example.com/foo", + "", "text/plain" }, + { "\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01", + sizeof("\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01")-1, + "http://www.example.com/foo", + "", "text/plain" }, + }; + + TestArray(tests, arraysize(tests)); +} + +TEST(MimeSnifferTest, FlashTest) { + SnifferTest tests[] = { + { "CWSdd\x00\xB3", sizeof("CWSdd\x00\xB3")-1, + "http://www.example.com/foo", + "", "application/octet-stream" }, + { "FLVjdkl*(#)0sdj\x00", sizeof("FLVjdkl*(#)0sdj\x00")-1, + "http://www.example.com/foo?q=ttt.swf", + "", "application/octet-stream" }, + { "FWS3$9\r\b\x00", sizeof("FWS3$9\r\b\x00")-1, + "http://www.example.com/foo#ttt.swf", + "", "application/octet-stream" }, + { "FLVjdkl*(#)0sdj", sizeof("FLVjdkl*(#)0sdj")-1, + "http://www.example.com/foo.swf", + "", "text/plain" }, + { "FLVjdkl*(#)0s\x01dj", sizeof("FLVjdkl*(#)0s\x01dj")-1, + "http://www.example.com/foo/bar.swf", + "", "application/octet-stream" }, + { "FWS3$9\r\b\x1A", sizeof("FWS3$9\r\b\x1A")-1, + "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar", + "", "application/octet-stream" }, + { "FWS3$9\r\x1C\b", sizeof("FWS3$9\r\x1C\b")-1, + "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar", + "text/plain", "application/octet-stream" }, + }; + + TestArray(tests, arraysize(tests)); +} + +TEST(MimeSnifferTest, XMLTest) { + // An easy feed to identify. + EXPECT_EQ("application/atom+xml", + SniffMimeType("<?xml?><feed", "", "text/xml")); + // Don't sniff out of plain text. + EXPECT_EQ("text/plain", + SniffMimeType("<?xml?><feed", "", "text/plain")); + // Simple RSS. + EXPECT_EQ("application/rss+xml", + SniffMimeType("<?xml version='1.0'?>\r\n<rss", "", "text/xml")); + + // The top of CNN's RSS feed, which we'd like to recognize as RSS. + static const char kCNNRSS[] = + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/rss2full.xsl\" " + "type=\"text/xsl\" media=\"screen\"?>" + "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/itemcontent.css\" " + "type=\"text/css\" media=\"screen\"?>" + "<rss xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" " + "version=\"2.0\">"; + // CNN's RSS + EXPECT_EQ("application/rss+xml", + SniffMimeType(kCNNRSS, "", "text/xml")); + EXPECT_EQ("text/plain", + SniffMimeType(kCNNRSS, "", "text/plain")); + + // Don't sniff random XML as something different. + EXPECT_EQ("text/xml", + SniffMimeType("<?xml?><notafeed", "", "text/xml")); + // Don't sniff random plain-text as something different. + EXPECT_EQ("text/plain", + SniffMimeType("<?xml?><notafeed", "", "text/plain")); + + // Positive test for the two instances we upgrade to XHTML. + EXPECT_EQ("application/xhtml+xml", + SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">", + "", "text/xml")); + EXPECT_EQ("application/xhtml+xml", + SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">", + "", "application/xml")); + + // Following our behavior with HTML, don't call other mime types XHTML. + EXPECT_EQ("text/plain", + SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">", + "", "text/plain")); + EXPECT_EQ("application/rss+xml", + SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">", + "", "application/rss+xml")); + + // Don't sniff other HTML-looking bits as HTML. + EXPECT_EQ("text/xml", + SniffMimeType("<html><head>", "", "text/xml")); + EXPECT_EQ("text/xml", + SniffMimeType("<foo><html xmlns=\"http://www.w3.org/1999/xhtml\">", + "", "text/xml")); + +} diff --git a/net/base/mime_util.cc b/net/base/mime_util.cc new file mode 100644 index 0000000..71421f9 --- /dev/null +++ b/net/base/mime_util.cc @@ -0,0 +1,305 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <hash_set> +#include <string.h> + +#include "net/base/mime_util.h" + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/registry.h" +#include "base/string_util.h" + +using std::string; +using std::wstring; + +namespace mime_util { + +struct MimeInfo { + const char* mime_type; + const char* extensions; // comma separated list +}; + +static const MimeInfo primary_mappings[] = { + { "text/html", "html,htm" }, + { "text/css", "css" }, + { "text/xml", "xml" }, + { "image/gif", "gif" }, + { "image/jpeg", "jpeg,jpg" }, + { "image/png", "png" }, + { "application/xhtml+xml", "xhtml,xht" } +}; + +static const MimeInfo secondary_mappings[] = { + { "application/octet-stream", "exe,com,bin" }, + { "application/gzip", "gz" }, + { "application/pdf", "pdf" }, + { "application/postscript", "ps,eps,ai" }, + { "application/x-javascript", "js" }, + { "image/bmp", "bmp" }, + { "image/x-icon", "ico" }, + { "image/jpeg", "jfif,pjpeg,pjp" }, + { "image/tiff", "tiff,tif" }, + { "image/x-xbitmap", "xbm" }, + { "image/svg+xml", "svg,svgz" }, + { "message/rfc822", "eml" }, + { "text/plain", "txt,text" }, + { "text/html", "shtml,ehtml" }, + { "application/rss+xml", "rss" }, + { "application/rdf+xml", "rdf" }, + { "text/xml", "xsl,xbl" }, + { "application/vnd.mozilla.xul+xml", "xul" }, + { "application/x-shockwave-flash", "swf,swl" } +}; + +static const char* FindMimeType(const MimeInfo* mappings, size_t mappings_len, + const char* ext) { + size_t ext_len = strlen(ext); + + for (size_t i = 0; i < mappings_len; ++i) { + const char* extensions = mappings[i].extensions; + for (;;) { + size_t end_pos = strcspn(extensions, ","); + if (end_pos == ext_len && _strnicmp(extensions, ext, ext_len) == 0) + return mappings[i].mime_type; + extensions += end_pos; + if (!*extensions) + break; + extensions += 1; // skip over comma + } + } + return NULL; +} + +bool GetMimeTypeFromExtension(const wstring& ext, string* result) { + // We implement the same algorithm as Mozilla for mapping a file extension to + // a mime type. That is, we first check a hard-coded list (that cannot be + // overridden), and then if not found there, we defer to the system registry. + // Finally, we scan a secondary hard-coded list to catch types that we can + // deduce but that we also want to allow the OS to override. + + string ext_utf8 = WideToUTF8(ext); + const char* mime_type; + + mime_type = FindMimeType(primary_mappings, arraysize(primary_mappings), + ext_utf8.c_str()); + if (mime_type) { + *result = mime_type; + return true; + } + + // check windows registry for file extension's mime type (registry key + // names are not case-sensitive). + wstring value, key = L"." + ext; + RegKey(HKEY_CLASSES_ROOT, key.c_str()).ReadValue(L"Content Type", &value); + if (!value.empty()) { + *result = WideToUTF8(value); + return true; + } + + mime_type = FindMimeType(secondary_mappings, arraysize(secondary_mappings), + ext_utf8.c_str()); + if (mime_type) { + *result = mime_type; + return true; + } + + return false; +} + +bool GetMimeTypeFromFile(const wstring& file_path, string* result) { + wstring::size_type dot = file_path.find_last_of('.'); + if (dot == wstring::npos) + return false; + return GetMimeTypeFromExtension(file_path.substr(dot + 1), result); +} + +bool GetPreferredExtensionForMimeType(const std::string& mime_type, + std::wstring* ext) { + wstring key(L"MIME\\Database\\Content Type\\" + UTF8ToWide(mime_type)); + return RegKey(HKEY_CLASSES_ROOT, key.c_str()).ReadValue(L"Extension", ext); +} + + +// From WebKit's WebCore/platform/MIMETypeRegistry.cpp: + +static const char* supported_image_types[] = { + "image/jpeg", + "image/jpg", + "image/png", + "image/gif", + "image/bmp", + "image/x-icon", // ico + "image/x-xbitmap" // xbm +}; + +// Note: does not include javascript types list (see supported_javascript_types) +static const char* supported_non_image_types[] = { + "text/html", + "text/xml", + "text/xsl", + "text/plain", + "text/", + "image/svg+xml", // SVG is text-based XML, even though it has an image/ type + "application/xml", + "application/xhtml+xml", + "application/rss+xml", + "application/atom+xml", + "multipart/x-mixed-replace" +}; + +// Mozilla 1.8 and WinIE 7 both accept text/javascript and text/ecmascript. +// Mozilla 1.8 accepts application/javascript, application/ecmascript, and application/x-javascript, but WinIE 7 doesn't. +// WinIE 7 accepts text/javascript1.1 - text/javascript1.3, text/jscript, and text/livescript, but Mozilla 1.8 doesn't. +// Mozilla 1.8 allows leading and trailing whitespace, but WinIE 7 doesn't. +// Mozilla 1.8 and WinIE 7 both accept the empty string, but neither accept a whitespace-only string. +// We want to accept all the values that either of these browsers accept, but not other values. +static const char* supported_javascript_types[] = { + "text/javascript", + "text/ecmascript", + "application/javascript", + "application/ecmascript", + "application/x-javascript", + "text/javascript1.1", + "text/javascript1.2", + "text/javascript1.3", + "text/jscript", + "text/livescript" +}; + +static const char* view_source_types[] = { + "text/xml", + "text/xsl", + "application/xml", + "application/rss+xml", + "application/atom+xml", + "image/svg+xml" +}; + +// For faster lookup +static stdext::hash_set<string>* image_map = NULL; +static stdext::hash_set<string>* non_image_map = NULL; +static stdext::hash_set<string>* javascript_map = NULL; +static stdext::hash_set<string>* view_source_map = NULL; + +static void InitializeMimeTypeMaps() { + image_map = new stdext::hash_set<string>; + non_image_map = new stdext::hash_set<string>; + javascript_map = new stdext::hash_set<string>; + view_source_map = new stdext::hash_set<string>; + + for (int i = 0; i < arraysize(supported_image_types); ++i) + image_map->insert(supported_image_types[i]); + + // Initialize the supported non-image types + for (int i = 0; i < arraysize(supported_non_image_types); ++i) + non_image_map->insert(supported_non_image_types[i]); + for (int i = 0; i < arraysize(supported_javascript_types); ++i) + non_image_map->insert(supported_javascript_types[i]); + + for (int i = 0; i < arraysize(supported_javascript_types); ++i) + javascript_map->insert(supported_javascript_types[i]); + + for (int i = 0; i < arraysize(view_source_types); ++i) + view_source_map->insert(view_source_types[i]); +} + +bool IsSupportedImageMimeType(const char* mime_type) { + if (!image_map) + InitializeMimeTypeMaps(); + return image_map->find(mime_type) != image_map->end(); +} + +bool IsSupportedNonImageMimeType(const char* mime_type) { + if (!non_image_map) + InitializeMimeTypeMaps(); + return non_image_map->find(mime_type) != non_image_map->end(); +} + +bool IsSupportedJavascriptMimeType(const char* mime_type) { + if (!javascript_map) + InitializeMimeTypeMaps(); + return javascript_map->find(mime_type) != javascript_map->end(); +} + +bool IsViewSourceMimeType(const char* mime_type) { + if (!view_source_map) + InitializeMimeTypeMaps(); + return view_source_map->find(mime_type) != view_source_map->end(); +} + +// Mirrors WebViewImpl::CanShowMIMEType() +bool IsSupportedMimeType(const std::string& mime_type) { + if (mime_type.compare(0, 5, "text/") == 0 || + (mime_type.compare(0, 6, "image/") == 0 && + mime_util::IsSupportedImageMimeType(mime_type.c_str())) || + mime_util::IsSupportedNonImageMimeType(mime_type.c_str())) + return true; + return false; +} + +bool MatchesMimeType(const std::string &mime_type_pattern, + const std::string &mime_type) { + // verify caller is passing lowercase + DCHECK(mime_type_pattern == StringToLowerASCII(mime_type_pattern)); + DCHECK(mime_type == StringToLowerASCII(mime_type)); + + // This comparison handles absolute maching and also basic + // wildcards. The plugin mime types could be: + // application/x-foo + // application/* + // application/*+xml + // * + if (mime_type_pattern.empty()) + return false; + + const std::string::size_type star = mime_type_pattern.find('*'); + + if (star == std::string::npos) + return mime_type_pattern == mime_type; + + // Test length to prevent overlap between |left| and |right|. + if (mime_type.length() < mime_type_pattern.length() - 1) + return false; + + const std::string left(mime_type_pattern.substr(0, star)); + const std::string right(mime_type_pattern.substr(star + 1)); + + if (mime_type.find(left) != 0) + return false; + + if (!right.empty() && + mime_type.rfind(right) != mime_type.length() - right.length()) + return false; + + return true; +} + +} // namespace mime_util diff --git a/net/base/mime_util.h b/net/base/mime_util.h new file mode 100644 index 0000000..047e79c --- /dev/null +++ b/net/base/mime_util.h @@ -0,0 +1,71 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_MIME_UTIL_H__ +#define NET_BASE_MIME_UTIL_H__ + +#include <string> + +namespace mime_util { + +// Get the mime type (if any) that is associated with the given file extension. +// Returns true if a corresponding mime type exists. +bool GetMimeTypeFromExtension(const std::wstring& ext, std::string* mime_type); + +// Get the mime type (if any) that is associated with the given file. Returns +// true if a corresponding mime type exists. +bool GetMimeTypeFromFile(const std::wstring& file_path, std::string* mime_type); + +// Get the preferred extension (if any) associated with the given mime type. +// Returns true if a corresponding file extension exists. The extension is +// returned with a prefixed dot (as stored in the registry), ex ".avi". +bool GetPreferredExtensionForMimeType(const std::string& mime_type, + std::wstring* extension); + +// Check to see if a particular MIME type is in our list. +bool IsSupportedImageMimeType(const char* mime_type); +bool IsSupportedNonImageMimeType(const char* mime_type); +bool IsSupportedJavascriptMimeType(const char* mime_type); + +// Get whether this mime type should be displayed in view-source mode. +// (For example, XML.) +bool IsViewSourceMimeType(const char* mime_type); + +// Convenience function. +bool IsSupportedMimeType(const std::string& mime_type); + +// Returns true if this the mime_type_pattern matches a given mime-type. +// Checks for absolute matching and wildcards. mime-types should be in +// lower case. +bool MatchesMimeType(const std::string &mime_type_pattern, + const std::string &mime_type); + +} // namespace mime_util + +#endif // NET_BASE_MIME_UTIL_H__ diff --git a/net/base/mime_util_unittest.cc b/net/base/mime_util_unittest.cc new file mode 100644 index 0000000..3031d1b --- /dev/null +++ b/net/base/mime_util_unittest.cc @@ -0,0 +1,114 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/basictypes.h" +#include "net/base/mime_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + class MimeUtilTest : public testing::Test { + }; +} + +TEST(MimeUtilTest, ExtensionTest) { + const struct { + const wchar_t* extension; + const char* mime_type; + bool valid; + } tests[] = { + { L"png", "image/png", true }, + { L"css", "text/css", true }, + { L"pjp", "image/jpeg", true }, + { L"pjpeg", "image/jpeg", true }, + { L"not an extension / for sure", "", false }, + }; + + std::string mime_type; + bool rv; + + for (size_t i = 0; i < arraysize(tests); ++i) { + rv = mime_util::GetMimeTypeFromExtension(tests[i].extension, &mime_type); + EXPECT_EQ(rv, tests[i].valid); + if (rv) + EXPECT_EQ(mime_type, tests[i].mime_type); + } +} + +TEST(MimeUtilTest, FileTest) { + const struct { + const wchar_t* file_path; + const char* mime_type; + bool valid; + } tests[] = { + { L"c:\\foo\\bar.css", "text/css", true }, + { L"c:\\blah", "", false }, + { L"c:\\blah.", "", false }, + }; + + std::string mime_type; + bool rv; + + for (size_t i = 0; i < arraysize(tests); ++i) { + rv = mime_util::GetMimeTypeFromFile(tests[i].file_path, &mime_type); + EXPECT_EQ(rv, tests[i].valid); + if (rv) + EXPECT_EQ(mime_type, tests[i].mime_type); + } +} + +TEST(MimeUtilTest, LookupTypes) { + EXPECT_EQ(true, mime_util::IsSupportedImageMimeType("image/jpeg")); + EXPECT_EQ(false, mime_util::IsSupportedImageMimeType("image/lolcat")); + EXPECT_EQ(true, mime_util::IsSupportedNonImageMimeType("text/html")); + EXPECT_EQ(false, mime_util::IsSupportedNonImageMimeType("text/virus")); +} + +TEST(MimeUtilTest, MatchesMimeType) { + EXPECT_EQ(true, mime_util::MatchesMimeType("*", "video/x-mpeg")); + EXPECT_EQ(true, mime_util::MatchesMimeType("video/*", "video/x-mpeg")); + EXPECT_EQ(true, mime_util::MatchesMimeType("video/x-mpeg", "video/x-mpeg")); + EXPECT_EQ(true, mime_util::MatchesMimeType("application/*+xml", + "application/html+xml")); + EXPECT_EQ(true, mime_util::MatchesMimeType("application/*+xml", + "application/+xml")); + EXPECT_EQ(true, mime_util::MatchesMimeType("aaa*aaa", + "aaaaaa")); + EXPECT_EQ(false, mime_util::MatchesMimeType("video/", "video/x-mpeg")); + EXPECT_EQ(false, mime_util::MatchesMimeType("", "video/x-mpeg")); + EXPECT_EQ(false, mime_util::MatchesMimeType("", "")); + EXPECT_EQ(false, mime_util::MatchesMimeType("video/x-mpeg", "")); + EXPECT_EQ(false, mime_util::MatchesMimeType("application/*+xml", + "application/xml")); + EXPECT_EQ(false, mime_util::MatchesMimeType("application/*+xml", + "application/html+xmlz")); + EXPECT_EQ(false, mime_util::MatchesMimeType("application/*+xml", + "applcation/html+xml")); + EXPECT_EQ(false, mime_util::MatchesMimeType("aaa*aaa", + "aaaaa")); +} diff --git a/net/base/net_error_list.h b/net/base/net_error_list.h new file mode 100644 index 0000000..1b12af4 --- /dev/null +++ b/net/base/net_error_list.h @@ -0,0 +1,214 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file contains the list of network errors. + +// An asynchronous IO operation is not yet complete. This usually does not +// indicate a fatal error. Typically this error will be generated as a +// notification to wait for some external notification that the IO operation +// finally completed. +NET_ERROR(IO_PENDING, -1) + +// A generic failure occured. +NET_ERROR(FAILED, -2) + +// An operation was aborted (due to user action). +NET_ERROR(ABORTED, -3) + +// An argument to the function is incorrect. +NET_ERROR(INVALID_ARGUMENT, -4) + +// The handle or file descriptor is invalid. +NET_ERROR(INVALID_HANDLE, -5) + +// The file or directory cannot be found. +NET_ERROR(FILE_NOT_FOUND, -6) + +// An operation timed out. +NET_ERROR(TIMED_OUT, -7) + +// The file is too large. +NET_ERROR(FILE_TOO_BIG, -8) + +// A connection was closed (corresponding to a TCP FIN). +NET_ERROR(CONNECTION_CLOSED, -100) + +// A connection was reset (corresponding to a TCP RST). +NET_ERROR(CONNECTION_RESET, -101) + +// A connection attempt was refused. +NET_ERROR(CONNECTION_REFUSED, -102) + +// A connection timed out as a result of not receiving an ACK for data sent. +// This can include a FIN packet that did not get ACK'd. +NET_ERROR(CONNECTION_ABORTED, -103) + +// A connection attempt failed. +NET_ERROR(CONNECTION_FAILED, -104) + +// The host name could not be resolved. +NET_ERROR(NAME_NOT_RESOLVED, -105) + +// The Internet connection has been lost. +NET_ERROR(INTERNET_DISCONNECTED, -106) + +// An SSL protocol error occurred. +NET_ERROR(SSL_PROTOCOL_ERROR, -107) + +// The IP address or port number is invalid (e.g., cannot connect to the IP +// address 0 or the port 0). +NET_ERROR(ADDRESS_INVALID, -108) + +// The IP address is unreachable. This usually means that there is no route to +// the specified host or network. +NET_ERROR(ADDRESS_UNREACHABLE, -109) + +// The server requested a client certificate for SSL client authentication. +NET_ERROR(SSL_CLIENT_AUTH_CERT_NEEDED, -110) + +// Certificate error codes +// +// The values of certificate error codes must be consecutive. + +// The server responded with a certificate whose common name did not match +// the host name. This could mean: +// +// 1. An attacker has redirected our traffic to his server and is +// presenting a certificate for which he knows the private key. +// +// 2. The server is misconfigured and responding with the wrong cert. +// +// 3. The user is on a wireless network and is being redirected to the +// network's login page. +// +// 4. The OS has used a DNS search suffix and the server doesn't have +// a certificate for the abbreviated name in the address bar. +// +NET_ERROR(CERT_COMMON_NAME_INVALID, -200) + +// The server responded with a certificate that, by our clock, appears to +// either not yet be valid or to have expired. This could mean: +// +// 1. An attacker is presenting an old certificate for which he has +// managed to obtain the private key. +// +// 2. The server is misconfigured and is not presenting a valid cert. +// +// 3. Our clock is wrong. +// +NET_ERROR(CERT_DATE_INVALID, -201) + +// The server responded with a certificate that is signed by an authority +// we don't trust. The could mean: +// +// 1. An attacker has substituted the real certificate for a cert that +// contains his public key and is signed by his cousin. +// +// 2. The server operator has a legitimate certificate from a CA we don't +// know about, but should trust. +// +// 3. The server is presenting a self-signed certificate, providing no +// defense against active attackers (but foiling passive attackers). +// +NET_ERROR(CERT_AUTHORITY_INVALID, -202) + +// The server responded with a certificate that contains errors. +// This error is not recoverable. +// +// MSDN describes this error as follows: +// "The SSL certificate contains errors." +// +NET_ERROR(CERT_CONTAINS_ERRORS, -203) + +// The certificate has no mechanism for determining if it is revoked. In +// effect, this certificate cannot be revoked. +NET_ERROR(CERT_NO_REVOCATION_MECHANISM, -204) + +// Revocation information for the security certificate for this site is not +// available. This could mean: +// +// 1. An attacker has compromised the private key in the certificate and is +// blocking our attempt to find out that the cert was revoked. +// +// 2. The certificate is unrevoked, but the revocation server is busy or +// unavailable. +// +NET_ERROR(CERT_UNABLE_TO_CHECK_REVOCATION, -205) + +// The server responded with a certificate has been revoked. +// We have the capability to ignore this error, but it is probably not the +// thing to do. +NET_ERROR(CERT_REVOKED, -206) + +// The server responded with a certificate that is invalid. +// This error is not recoverable. +// +// MSDN describes this error as follows: +// "The SSL certificate is invalid." +// +NET_ERROR(CERT_INVALID, -207) + +// Add new certificate error codes here. +// +// Update the value of CERT_END whenever you add a new certificate error +// code. + +// The value immediately past the last certificate error code. +NET_ERROR(CERT_END, -208) + +// The URL is invalid. +NET_ERROR(INVALID_URL, -300) + +// The scheme of the URL is disallowed. +NET_ERROR(DISALLOWED_URL_SCHEME, -301) + +// The scheme of the URL is unknown. +NET_ERROR(UNKNOWN_URL_SCHEME, -302) + +// Attempting to load an URL resulted in too many redirects. +NET_ERROR(TOO_MANY_REDIRECTS, -310) + +// Attempting to load an URL resulted in an unsafe redirect (e.g., a redirect +// to file:// is considered unsafe). +NET_ERROR(UNSAFE_REDIRECT, -311) + +// Attempting to load an URL with an unsafe port number. These are port +// numbers that correspond to services, which are not robust to spurious input +// that may be constructed as a result of an allowed web construct (e.g., HTTP +// looks a lot like SMTP, so form submission to port 25 is denied). +NET_ERROR(UNSAFE_PORT, -312) + +// The server's response was invalid. +NET_ERROR(INVALID_RESPONSE, -320) + +// The cache does not have the requested entry. +NET_ERROR(CACHE_MISS, -400) + +// The server's response was insecure (e.g. there was a cert error). +NET_ERROR(INSECURE_RESPONSE, -501) diff --git a/net/base/net_errors.cc b/net/base/net_errors.cc new file mode 100644 index 0000000..02a2797 --- /dev/null +++ b/net/base/net_errors.cc @@ -0,0 +1,55 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/net_errors.h" + +#include "base/basictypes.h" + +#define STRINGIZE(x) #x + +namespace net { + +const char kErrorDomain[] = "net"; + +const char* ErrorToString(int error) { + if (error == 0) + return "net::OK"; + + switch (error) { +#define NET_ERROR(label, value) \ + case ERR_ ## label: \ + return "net::" STRINGIZE(ERR_ ## label); +#include "net/base/net_error_list.h" +#undef NET_ERROR + default: + return "net::<unknown>"; + } +} + +} // namespace net diff --git a/net/base/net_errors.h b/net/base/net_errors.h new file mode 100644 index 0000000..71c19614 --- /dev/null +++ b/net/base/net_errors.h @@ -0,0 +1,65 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_NET_ERRORS_H__ +#define NET_BASE_NET_ERRORS_H__ + +#include "base/basictypes.h" + +namespace net { + +// Error domain of the net module's error codes. +extern const char kErrorDomain[]; + +// Error values are negative. +enum { + // No error. + OK = 0, + +#define NET_ERROR(label, value) ERR_ ## label = value, +#include "net/base/net_error_list.h" +#undef NET_ERROR + + // The value of the first certificate error code. + ERR_CERT_BEGIN = ERR_CERT_COMMON_NAME_INVALID, +}; + +// Returns a textual representation of the error code for logging purposes. +const char* ErrorToString(int error); + +// Returns true if |error| is a certificate error code. +inline bool IsCertificateError(int error) { + // Certificate errors are negative integers from net::ERR_CERT_BEGIN + // (inclusive) to net::ERR_CERT_END (exclusive) in *decreasing* order. + return error <= ERR_CERT_BEGIN && error > ERR_CERT_END; +} + +} // namespace net + +#endif // NET_BASE_NET_ERRORS_H__ diff --git a/net/base/net_module.cc b/net/base/net_module.cc new file mode 100644 index 0000000..7fa21f2 --- /dev/null +++ b/net/base/net_module.cc @@ -0,0 +1,44 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/net_module.h" + +static NetModule::ResourceProvider resource_provider; + +// static +void NetModule::SetResourceProvider(ResourceProvider func) { + resource_provider = func; +} + +// static +std::string NetModule::GetResource(int key) { + // avoid thread safety issues by copying provider address to a local var + ResourceProvider func = resource_provider; + return func ? func(key) : std::string(); +} diff --git a/net/base/net_module.h b/net/base/net_module.h new file mode 100644 index 0000000..37698cc --- /dev/null +++ b/net/base/net_module.h @@ -0,0 +1,60 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_NET_MODULE_H__ +#define NET_BASE_NET_MODULE_H__ + +#include <string> + +#include "base/basictypes.h" + +// Defines global initializers and associated methods for the net module. +// +// The network module does not have direct access to the way application +// resources are stored and fetched by the embedding application (e.g., it +// cannot see the ResourceBundle class used by Chrome), so it uses this API to +// get access to such resources. +// +class NetModule { + public: + typedef std::string (*ResourceProvider)(int key); + + // Set the function to call when the net module needs resources + static void SetResourceProvider(ResourceProvider func); + + // Call the resource provider (if one exists) to get the specified resource. + // Returns an empty string if the resource does not exist or if there is no + // resource provider. + static std::string GetResource(int key); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(NetModule); +}; + +#endif // NET_BASE_NET_MODULE_H__ diff --git a/net/base/net_resources.h b/net/base/net_resources.h new file mode 100644 index 0000000..d0b9f8b --- /dev/null +++ b/net/base/net_resources.h @@ -0,0 +1,4 @@ +// TODO(tc): Come up with a way to automate the generation of these +// IDs so they don't collide with other rc files. +#define IDR_DIR_HEADER_HTML 400 +#define IDR_EFFECTIVE_TLD_NAMES 401 diff --git a/net/base/net_resources.rc b/net/base/net_resources.rc new file mode 100644 index 0000000..a85f4d3 --- /dev/null +++ b/net/base/net_resources.rc @@ -0,0 +1,20 @@ +// Resources used by the net module. This rc file is meant to be included by +// the application rc file (e.g., app/chrome_dll.rc). +// +// Paths in this file are relative to SolutionDir (//trunk/chrome/). + +#ifdef APSTUDIO_INVOKED + #error // Don't open in the Visual Studio resource editor! +#endif //APSTUDIO_INVOKED + +#include "net\\base\\net_resources.h" + +///////////////////////////////////////////////////////////////////////////// +// +// data resources +// + +IDR_DIR_HEADER_HTML BINDATA "net\\base\\dir_header.html" + +// The converted file is generated, so we need to use a path relative to "obj". +IDR_EFFECTIVE_TLD_NAMES BINDATA "net\\effective_tld_names_clean.dat" diff --git a/net/base/net_util.cc b/net/base/net_util.cc new file mode 100644 index 0000000..416252c --- /dev/null +++ b/net/base/net_util.cc @@ -0,0 +1,993 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <algorithm> +#include <unicode/ucnv.h> +#include <unicode/uidna.h> +#include <unicode/ulocdata.h> +#include <unicode/uniset.h> +#include <unicode/uscript.h> +#include <unicode/uset.h> +#include <windows.h> +#include <wininet.h> + +#include "net/base/net_util.h" + +#include "base/basictypes.h" +#include "base/file_util.h" +#include "base/logging.h" +#include "base/path_service.h" +#include "base/scoped_ptr.h" +#include "base/string_tokenizer.h" +#include "base/string_util.h" +#include "base/time.h" +#include "base/string_escape.h" +#include "googleurl/src/gurl.h" +#include "googleurl/src/url_canon.h" +#include "googleurl/src/url_parse.h" +#include "net/base/escape.h" +#include "net/base/net_module.h" +#include "net/base/net_resources.h" +#include "net/base/base64.h" +#include "unicode/datefmt.h" + +namespace { + +// what we prepend to get a file URL +static const wchar_t kFileURLPrefix[] = L"file:///"; + +// The general list of blocked ports. Will be blocked unless a specific +// protocol overrides it. (Ex: ftp can use ports 20 and 21) +static const int kRestrictedPorts[] = { + 1, // tcpmux + 7, // echo + 9, // discard + 11, // systat + 13, // daytime + 15, // netstat + 17, // qotd + 19, // chargen + 20, // ftp data + 21, // ftp access + 22, // ssh + 23, // telnet + 25, // smtp + 37, // time + 42, // name + 43, // nicname + 53, // domain + 77, // priv-rjs + 79, // finger + 87, // ttylink + 95, // supdup + 101, // hostriame + 102, // iso-tsap + 103, // gppitnp + 104, // acr-nema + 109, // pop2 + 110, // pop3 + 111, // sunrpc + 113, // auth + 115, // sftp + 117, // uucp-path + 119, // nntp + 123, // NTP + 135, // loc-srv /epmap + 139, // netbios + 143, // imap2 + 179, // BGP + 389, // ldap + 465, // smtp+ssl + 512, // print / exec + 513, // login + 514, // shell + 515, // printer + 526, // tempo + 530, // courier + 531, // chat + 532, // netnews + 540, // uucp + 556, // remotefs + 563, // nntp+ssl + 587, // stmp? + 601, // ?? + 636, // ldap+ssl + 993, // ldap+ssl + 995, // pop3+ssl + 2049, // nfs + 4045, // lockd + 6000, // X11 +}; + +// FTP overrides the following restricted ports. +static const int kAllowedFtpPorts[] = { + 21, // ftp data + 22, // ssh +}; + +template<typename STR> +STR GetSpecificHeaderT(const STR& headers, const STR& name) { + // We want to grab the Value from the "Key: Value" pairs in the headers, + // which should look like this (no leading spaces, \n-separated) (we format + // them this way in url_request_inet.cc): + // HTTP/1.1 200 OK\n + // ETag: "6d0b8-947-24f35ec0"\n + // Content-Length: 2375\n + // Content-Type: text/html; charset=UTF-8\n + // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n + if (headers.empty()) + return STR(); + + STR match; + match.push_back('\n'); + match.append(name); + match.push_back(':'); + + STR::const_iterator begin = + search(headers.begin(), headers.end(), match.begin(), match.end(), + CaseInsensitiveCompareASCII<STR::value_type>()); + + if (begin == headers.end()) + return STR(); + + begin += match.length(); + + STR::const_iterator end = find(begin, headers.end(), '\n'); + + STR ret; + TrimWhitespace(STR(begin, end), TRIM_ALL, &ret); + return ret; +} + +// TODO(jungshik): We have almost identical hex-decoding code else where. +// Consider refactoring and moving it somewhere(base?). Bug 1224311 +inline bool IsHexDigit(unsigned char c) { + return ('0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'); +} + +inline unsigned char HexToInt(unsigned char c) { + DCHECK(IsHexDigit(c)); + static unsigned char kOffset[4] = {0, 0x30u, 0x37u, 0x57u}; + return c - kOffset[c / 0x20]; +} + +// Similar to Base64Decode. Decodes a Q-encoded string to a sequence +// of bytes. If input is invalid, return false. +bool QPDecode(const std::string& input, std::string* output) { + std::string temp; + temp.reserve(input.size()); + std::string::const_iterator it = input.begin(); + while (it != input.end()) { + if (*it == '_') { + temp.push_back(' '); + } else if (*it == '=') { + if (input.end() - it < 3) { + return false; + } + if (IsHexDigit(static_cast<unsigned char>(*(it + 1))) && + IsHexDigit(static_cast<unsigned char>(*(it + 2)))) { + unsigned char ch = HexToInt(*(it + 1)) * 16 + HexToInt(*(it + 2)); + temp.push_back(static_cast<char>(ch)); + ++it; + ++it; + } else { + return false; + } + } else if (0x20 < *it && *it < 0x7F) { + // In a Q-encoded word, only printable ASCII characters + // represent themselves. Besides, space, '=', '_' and '?' are + // not allowed, but they're already filtered out. + DCHECK(*it != 0x3D && *it != 0x5F && *it != 0x3F); + temp.push_back(*it); + } else { + return false; + } + ++it; + } + output->swap(temp); + return true; +} + +enum RFC2047EncodingType {Q_ENCODING, B_ENCODING}; +bool DecodeBQEncoding(const std::string& part, RFC2047EncodingType enc_type, + const std::string& charset, std::string* output) { + std::string decoded; + if (enc_type == B_ENCODING) { + if (!Base64Decode(part, &decoded)) { + return false; + } + } else { + if (!QPDecode(part, &decoded)) { + return false; + } + } + + UErrorCode err = U_ZERO_ERROR; + UConverter* converter(ucnv_open(charset.c_str(), &err)); + if (U_FAILURE(err)) { + return false; + } + + // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8. + // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes + // in UTF-8. Therefore, the expansion ratio is 3 at most. + int length = static_cast<int>(decoded.length()); + char* buf = WriteInto(output, length * 3); + length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, length * 3, + decoded.data(), length, &err); + ucnv_close(converter); + if (U_FAILURE(err)) { + return false; + } + output->resize(length); + return true; +} + +bool DecodeWord(const std::string& encoded_word, + bool *is_rfc2047, + std::string* output) { + // TODO(jungshik) : Revisit this later. Do we want to pass through non-ASCII + // strings which can be mozibake? WinHTTP converts a raw 8bit string + // UTF-16 assuming it's in the OS default encoding. + if (!IsStringASCII(encoded_word)) { + // Try falling back to the NativeMB encoding if the raw input is not UTF-8. + if (IsStringUTF8(encoded_word.c_str())) { + *output = encoded_word; + } else { + *output = WideToUTF8(NativeMBToWide(encoded_word)); + } + *is_rfc2047 = false; + return true; + } + + // RFC 2047 : one of encoding methods supported by Firefox and relatively + // widely used by web servers. + // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. + // We don't care about the length restriction (72 bytes) because + // many web servers generate encoded words longer than the limit. + std::string tmp; + *is_rfc2047 = true; + int part_index = 0; + std::string charset; + StringTokenizer t(encoded_word, "?"); + RFC2047EncodingType enc_type = Q_ENCODING; + while (*is_rfc2047 && t.GetNext()) { + std::string part = t.token(); + switch (part_index) { + case 0: + if (part != "=") { + *is_rfc2047 = false; + break; + } + ++part_index; + break; + case 1: + // Do we need charset validity check here? + charset = part; + ++part_index; + break; + case 2: + if (part.size() > 1 || + part.find_first_of("bBqQ") == std::string::npos) { + *is_rfc2047 = false; + break; + } + if (part[0] == 'b' || part[0] == 'B') { + enc_type = B_ENCODING; + } + ++part_index; + break; + case 3: + *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp); + if (!*is_rfc2047) { + // Last minute failure. Invalid B/Q encoding. Rather than + // passing it through, return now. + return false; + } + ++part_index; + break; + case 4: + if (part != "=") { + // Another last minute failure ! + // Likely to be a case of two encoded-words in a row or + // an encoded word followed by a non-encoded word. We can be + // generous, but it does not help much in terms of compatibility, + // I believe. Return immediately. + *is_rfc2047 = false; + return false; + } + ++part_index; + break; + default: + *is_rfc2047 = false; + return false; + } + } + + if (*is_rfc2047) { + if (*(encoded_word.end() - 1) == '=') { + output->swap(tmp); + return true; + } + // encoded_word ending prematurelly with '?' or extra '?' + *is_rfc2047 = false; + return false; + } + + // We're not handling 'especial' characters quoted with '\', but + // it should be Ok because we're not an email client but a + // web browser. + + // What IE6/7 does: %-escaped UTF-8. We could extend this to + // support a rudimentary form of RFC 2231 with charset label, but + // it'd gain us little in terms of compatibility. + tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES); + if (IsStringUTF8(tmp.c_str())) { + output->swap(tmp); + return true; + // We can try either the OS default charset or 'origin charset' here, + // As far as I can tell, IE does not support it. However, I've seen + // web servers emit %-escaped string in a legacy encoding (usually + // origin charset). + // TODO(jungshik) : Test IE further and consider adding a fallback here. + } + return false; +} + +bool DecodeParamValue(const std::string& input, std::string* output) { + std::string tmp; + // Tokenize with whitespace characters. + StringTokenizer t(input, " \t\n\r"); + t.set_options(StringTokenizer::RETURN_DELIMS); + bool is_previous_token_rfc2047 = true; + while (t.GetNext()) { + if (t.token_is_delim()) { + // If the previous non-delimeter token is not RFC2047-encoded, + // put in a space in its place. Otheriwse, skip over it. + if (!is_previous_token_rfc2047) { + tmp.push_back(' '); + } + continue; + } + // We don't support a single multibyte character split into + // adjacent encoded words. Some broken mail clients emit headers + // with that problem, but most web servers usually encode a filename + // in a single encoded-word. Firefox/Thunderbird do not support + // it, either. + std::string decoded; + if (!DecodeWord(t.token(), &is_previous_token_rfc2047, &decoded)) + return false; + tmp.append(decoded); + } + output->swap(tmp); + return true; +} + +// TODO(mpcomplete): This is a quick and dirty implementation for now. I'm +// sure this doesn't properly handle all (most?) cases. +template<typename STR> +STR GetHeaderParamValueT(const STR& header, const STR& param_name) { + // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". + STR::const_iterator param_begin = + search(header.begin(), header.end(), param_name.begin(), param_name.end(), + CaseInsensitiveCompareASCII<STR::value_type>()); + + if (param_begin == header.end()) + return STR(); + param_begin += param_name.length(); + + STR whitespace; + whitespace.push_back(' '); + whitespace.push_back('\t'); + const STR::size_type equals_offset = + header.find_first_not_of(whitespace, param_begin - header.begin()); + if (equals_offset == STR::npos || header.at(equals_offset) != '=') + return STR(); + + param_begin = header.begin() + equals_offset + 1; + if (param_begin == header.end()) + return STR(); + + STR::const_iterator param_end; + if (*param_begin == '"') { + param_end = find(param_begin+1, header.end(), '"'); + if (param_end == header.end()) + return STR(); // poorly formatted param? + + ++param_begin; // skip past the quote. + } else { + param_end = find(param_begin+1, header.end(), ';'); + } + + return STR(param_begin, param_end); +} + +// Does some simple normalization of scripts so we can allow certain scripts +// to exist together. +// TODO(brettw) bug 880223: we should allow some other languages to be +// oombined such as Chinese and Latin. We will probably need a more +// complicated system of language pairs to have more fine-grained control. +UScriptCode NormalizeScript(UScriptCode code) { + switch (code) { + case USCRIPT_KATAKANA: + case USCRIPT_HIRAGANA: + case USCRIPT_KATAKANA_OR_HIRAGANA: + case USCRIPT_HANGUL: // This one is arguable. + return USCRIPT_HAN; + default: + return code; + } +} + +bool IsIDNComponentInSingleScript(const wchar_t* str, int str_len) { + UScriptCode first_script; + bool is_first = true; + + int i = 0; + while (i < str_len) { + unsigned code_point; + U16_NEXT(str, i, str_len, code_point); + + UErrorCode err = U_ZERO_ERROR; + UScriptCode cur_script = uscript_getScript(code_point, &err); + if (err != U_ZERO_ERROR) + return false; // Report mixed on error. + cur_script = NormalizeScript(cur_script); + + // TODO(brettw) We may have to check for USCRIPT_INHERENT as well. + if (is_first && cur_script != USCRIPT_COMMON) { + first_script = cur_script; + is_first = false; + } else { + if (cur_script != USCRIPT_COMMON && cur_script != first_script) + return false; + } + } + return true; +} + +// Check if the script of a language can be 'safely' mixed with +// Latin letters in the ASCII range. +bool IsCompatibleWithASCIILetters(const std::string& lang) { + // For now, just list Chinese, Japanese and Korean (positive list). + // An alternative is negative-listing (languages using Greek and + // Cyrillic letters), but it can be more dangerous. + return !lang.substr(0,2).compare("zh") || + !lang.substr(0,2).compare("ja") || + !lang.substr(0,2).compare("ko"); +} + +// Returns true if the given Unicode host component is safe to display to the +// user. +bool IsIDNComponentSafe(const wchar_t* str, + int str_len, + const std::wstring& languages) { + // Most common cases (non-IDN) do not reach here so that we don't + // need a fast return path. + // TODO(jungshik) : Check if there's any character inappropriate + // (although allowed) for domain names. + // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and + // http://www.unicode.org/reports/tr39/data/xidmodifications.txt + // For now, we borrow the list from Mozilla and tweaked it slightly. + // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because + // they're gonna be canonicalized to U+0020 and full stop before + // reaching here.) + // The original list is available at + // http://kb.mozillazine.org/Network.IDN.blacklist_chars and + // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703 + + UErrorCode status = U_ZERO_ERROR; +#ifdef U_WCHAR_IS_UTF16 + UnicodeSet dangerous_characters(UnicodeString( + L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" + L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" + L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" + L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" + L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" + L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" + L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" + L"[\ufffa-\ufffd]]"), status); +#else + UnicodeSet dangerous_characters(UnicodeString( + "[[\\ \\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" + "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" + "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" + "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" + "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" + "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" + "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" + "[\\ufffa-\\ufffd]]", -1, US_INV), status); +#endif + DCHECK(U_SUCCESS(status)); + UnicodeSet component_characters; + component_characters.addAll(UnicodeString(str, str_len)); + if (dangerous_characters.containsSome(component_characters)) + return false; + + // If the language list is empty, the result is completely determined + // by whether a component is a single script or not. This will block + // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are + // allowed with |languages| (while it blocks Chinese + Latin letters with + // an accent as should be the case), but we want to err on the safe side + // when |languages| is empty. + if (languages.empty()) + return IsIDNComponentInSingleScript(str, str_len); + + // |common_characters| is made up of ASCII numbers, hyphen, plus and + // underscore that are used across scripts and allowed in domain names. + // (sync'd with characters allowed in url_canon_host with square + // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. + UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), + status); + DCHECK(U_SUCCESS(status)); + // Subtract common characters because they're always allowed so that + // we just have to check if a language-specific set contains + // the remainder. + component_characters.removeAll(common_characters); + + USet *lang_set = uset_open(1, 0); // create an empty set + UnicodeSet ascii_letters(0x61, 0x7a); // [a-z] + bool safe = false; + std::string languages_list(WideToASCII(languages)); + StringTokenizer t(languages_list, ","); + while (t.GetNext()) { + std::string lang = t.token(); + status = U_ZERO_ERROR; + // TODO(jungshik) Cache exemplar sets for locales. + ULocaleData* uld = ulocdata_open(lang.c_str(), &status); + if (U_SUCCESS(status)) { + // Should we use auxiliary set, instead? + ulocdata_getExemplarSet(uld, lang_set, 0, ULOCDATA_ES_STANDARD, &status); + ulocdata_close(uld); + if (U_SUCCESS(status)) { + UnicodeSet* allowed_characters = + reinterpret_cast<UnicodeSet*>(lang_set); + // If |lang| is compatible with ASCII Latin letters, add them. + if (IsCompatibleWithASCIILetters(lang)) + allowed_characters->addAll(ascii_letters); + if (allowed_characters->containsAll(component_characters)) { + safe = true; + break; + } + } + } + } + uset_close(lang_set); + return safe; +} + +// Converts one component of a host (between dots) to IDN if safe. The result +// will be APPENDED to the given output string and will be the same as the +// input if it is not IDN or the IDN is unsafe to display. +void IDNToUnicodeOneComponent(const wchar_t* comp, + int comp_len, + const std::wstring& languages, + std::wstring* out) { + DCHECK(comp_len >= 0); + if (comp_len == 0) + return; + + // Expand the output string to make room for a possibly longer string + // (we'll expand if it's still not big enough below). + int extra_space = 64; + size_t host_begin_in_output = out->size(); + + // Just copy the input if it can't be an IDN component. + if (comp_len < 4 || wcsncmp(comp, L"xn--", 4)) { + out->resize(host_begin_in_output + comp_len); + for (int i = 0; i < comp_len; i++) + (*out)[host_begin_in_output + i] = comp[i]; + return; + } + + while (true) { + out->resize(out->size() + extra_space); + UErrorCode status = U_ZERO_ERROR; + int output_chars = uidna_IDNToUnicode( + comp, comp_len, &(*out)[host_begin_in_output], extra_space, + UIDNA_DEFAULT, NULL, &status); + if (status == U_ZERO_ERROR) { + // Converted successfully. + out->resize(host_begin_in_output + output_chars); + if (!IsIDNComponentSafe(&out->data()[host_begin_in_output], + output_chars, + languages)) + break; // The error handling below will undo the IDN. + return; + } + if (status != U_BUFFER_OVERFLOW_ERROR) + break; + + // Need to loop again with a bigger buffer. It looks like ICU will + // return the required size of the buffer, but that's not documented, + // so we'll just grow by 2x. This should be rare and is not on a + // critical path. + extra_space *= 2; + } + + // We get here on error, in which case we replace anything that was added + // with the literal input. + out->resize(host_begin_in_output + comp_len); + for (int i = 0; i < comp_len; i++) + (*out)[host_begin_in_output + i] = comp[i]; +} + +// Convert a FILETIME to a localized string. |filetime| may be NULL. +// TODO(tc): Remove this once bug 1164516 is fixed. +std::wstring LocalizedDateTime(const FILETIME* filetime) { + if (!filetime) + return std::wstring(); + + Time time = Time::FromFileTime(*filetime); + scoped_ptr<DateFormat> formatter(DateFormat::createDateTimeInstance( + DateFormat::kShort)); + UnicodeString date_string; + formatter->format(static_cast<UDate>(time.ToDoubleT() * 1000), date_string); + + std::wstring formatted; + int capacity = date_string.length() + 1; + UErrorCode error = U_ZERO_ERROR; + date_string.extract(static_cast<UChar*>(WriteInto(&formatted, capacity)), + capacity, error); + return formatted; +} + +} // namespace + +namespace net_util { + +GURL FilePathToFileURL(const std::wstring& file_path) { + // Produce a URL like "file:///C:/foo" for a regular file, or + // "file://///server/path" for UNC. The URL canonicalizer will fix up the + // latter case to be the canonical UNC form: "file://server/path" + std::wstring url_str(kFileURLPrefix); + url_str.append(file_path); + + // Now do replacement of some characters. Since we assume the input is a + // literal filename, anything the URL parser might consider special should + // be escaped here. + + // must be the first substitution since others will introduce percents as the + // escape character + ReplaceSubstringsAfterOffset(&url_str, 0, L"%", L"%25"); + + // semicolon is supposed to be some kind of separator according to RFC 2396 + ReplaceSubstringsAfterOffset(&url_str, 0, L";", L"%3B"); + + ReplaceSubstringsAfterOffset(&url_str, 0, L"#", L"%23"); + + return GURL(url_str); +} + +bool FileURLToFilePath(const GURL& url, std::wstring* file_path) { + file_path->clear(); + + if (!url.is_valid()) + return false; + + std::string path; + std::string host = url.host(); + if (host.empty()) { + // URL contains no host, the path is the filename. In this case, the path + // will probably be preceeded with a slash, as in "/C:/foo.txt", so we + // trim out that here. + path = url.path(); + size_t first_non_slash = path.find_first_not_of("/\\"); + if (first_non_slash != std::string::npos && first_non_slash > 0) + path.erase(0, first_non_slash); + } else { + // URL contains a host: this means it's UNC. We keep the preceeding slash + // on the path. + path = "\\\\"; + path.append(host); + path.append(url.path()); + } + + if (path.empty()) + return false; + std::replace(path.begin(), path.end(), '/', '\\'); + + // GURL stores strings as percent-encoded UTF-8, this will undo if possible. + path = UnescapeURLComponent(path, + UnescapeRule::SPACES | UnescapeRule::PERCENTS); + + if (!IsStringUTF8(path.c_str())) { + // Not UTF-8, assume encoding is native codepage and we're done. We know we + // are giving the conversion function a nonempty string, and it may fail if + // the given string is not in the current encoding and give us an empty + // string back. We detect this and report failure. + *file_path = NativeMBToWide(path); + return !file_path->empty(); + } + file_path->assign(UTF8ToWide(path)); + + // Now we have an unescaped filename, but are still not sure about its + // encoding. For example, each character could be part of a UTF-8 string. + if (file_path->empty() || !IsString8Bit(*file_path)) { + // assume our 16-bit encoding is correct if it won't fit into an 8-bit + // string + return true; + } + + // Convert our narrow string into the native wide path. + std::string narrow; + if (!WideToLatin1(*file_path, &narrow)) { + NOTREACHED() << "Should have filtered out non-8-bit strings above."; + return false; + } + if (IsStringUTF8(narrow.c_str())) { + // Our string actually looks like it could be UTF-8, convert to 8-bit + // UTF-8 and then to the corresponding wide string. + *file_path = UTF8ToWide(narrow); + } else { + // Our wide string contains only 8-bit characters and it's not UTF-8, so + // we assume it's in the native codepage. + *file_path = NativeMBToWide(narrow); + } + + // Fail if 8-bit -> wide conversion failed and gave us an empty string back + // (we already filtered out empty strings above). + return !file_path->empty(); +} + +std::wstring GetSpecificHeader(const std::wstring& headers, + const std::wstring& name) { + return GetSpecificHeaderT(headers, name); +} + +std::string GetSpecificHeader(const std::string& headers, + const std::string& name) { + return GetSpecificHeaderT(headers, name); +} + +std::wstring GetFileNameFromCD(const std::string& header) { + std::string param_value = GetHeaderParamValue(header, "filename"); + if (param_value.empty()) { + // Some servers use 'name' parameter. + param_value = GetHeaderParamValue(header, "name"); + } + if (param_value.empty()) + return std::wstring(); + std::string decoded; + if (DecodeParamValue(param_value, &decoded)) + return UTF8ToWide(decoded); + return std::wstring(); +} + +std::wstring GetHeaderParamValue(const std::wstring& field, + const std::wstring& param_name) { + return GetHeaderParamValueT(field, param_name); +} + +std::string GetHeaderParamValue(const std::string& field, + const std::string& param_name) { + return GetHeaderParamValueT(field, param_name); +} + +// TODO(brettw) bug 734373: check the scripts for each host component and +// don't un-IDN-ize if there is more than one. Alternatively, only IDN for +// scripts that the user has installed. For now, just put the entire +// path through IDN. Maybe this feature can be implemented in ICU itself? +// +// We may want to skip this step in the case of file URLs to allow unicode +// UNC hostnames regardless of encodings. +void IDNToUnicode(const char* host, + int host_len, + const std::wstring& languages, + std::wstring* out) { + // Convert the ASCII input to a wide string for ICU. + std::wstring wide_input; + wide_input.reserve(host_len); + for (int i = 0; i < host_len; i++) + wide_input.push_back(host[i]); + + // Do each component of the host separately, since we enforce script matching + // on a per-component basis. + size_t cur_begin = 0; // Beginning of the current component (inclusive). + while (cur_begin < wide_input.size()) { + // Find the next dot or the end of the string. + size_t next_dot = wide_input.find_first_of('.', cur_begin); + if (next_dot == std::wstring::npos) + next_dot = wide_input.size(); // For getting the last component. + + if (next_dot > cur_begin) { + // Add the substring that we just found. + IDNToUnicodeOneComponent(&wide_input[cur_begin], + static_cast<int>(next_dot - cur_begin), + languages, + out); + } + + // Need to add the dot we just found (if we found one). This needs to be + // done before we break out below in case the URL ends in a dot. + if (next_dot < wide_input.size()) + out->push_back('.'); + else + break; // No more components left. + + cur_begin = next_dot + 1; + } +} + +template <typename str> +std::string CanonicalizeHost(const str& host, bool* is_ip_address) { + // Try to canonicalize the host. + const url_parse::Component raw_host_component(0, + static_cast<int>(host.length())); + std::string canon_host; + url_canon::StdStringCanonOutput canon_host_output(&canon_host); + url_parse::Component canon_host_component; + if (!url_canon::CanonicalizeHost(host.c_str(), raw_host_component, + &canon_host_output, &canon_host_component)) { + if (is_ip_address) + *is_ip_address = false; + return std::string(); + } + canon_host_output.Complete(); + + if (is_ip_address) { + // See if the host is an IP address. + url_canon::RawCanonOutputT<char, 128> ignored_output; + url_parse::Component ignored_component; + *is_ip_address = url_canon::CanonicalizeIPAddress(canon_host.c_str(), + canon_host_component, + &ignored_output, + &ignored_component); + } + + // Return the host as a string, stripping any unnecessary bits off the ends. + if ((canon_host_component.begin == 0) && + (canon_host_component.len == canon_host.length())) + return canon_host; + return canon_host.substr(canon_host_component.begin, + canon_host_component.len); +} + +// Forcibly instantiate narrow and wide versions of this function so we don't +// need to put the function definition in the header. +template std::string CanonicalizeHost<std::string>(const std::string& host, + bool* is_ip_address); +template std::string CanonicalizeHost<std::wstring>(const std::wstring& host, + bool* is_ip_address); + +std::string GetDirectoryListingHeader(const std::string& title) { + std::string result = NetModule::GetResource(IDR_DIR_HEADER_HTML); + if (result.empty()) { + NOTREACHED() << "expected resource not found"; + } + + result.append("<script>start("); + string_escape::JavascriptDoubleQuote(title, true, &result); + result.append(");</script>\n"); + + return result; +} + +std::string GetDirectoryListingEntry(const std::string& name, + DWORD attrib, + int64 size, + const FILETIME* modified) { + std::string result; + result.append("<script>addRow("); + string_escape::JavascriptDoubleQuote(name, true, &result); + result.append(","); + string_escape::JavascriptDoubleQuote( + EscapePath(name), true, &result); + if (attrib & FILE_ATTRIBUTE_DIRECTORY) { + result.append(",1,"); + } else { + result.append(",0,"); + } + + string_escape::JavascriptDoubleQuote( + FormatBytes(size, GetByteDisplayUnits(size), true), true, &result); + + result.append(","); + + string_escape::JavascriptDoubleQuote( + LocalizedDateTime(modified), true, &result); + + result.append(");</script>\n"); + + return result; +} + +std::wstring StripWWW(const std::wstring& text) { + const std::wstring www(L"www."); + return (text.compare(0, www.length(), www) == 0) ? + text.substr(www.length()) : text; +} + +std::wstring GetSuggestedFilename(const GURL& url, + const std::string& content_disposition, + const std::wstring& default_name) { + std::wstring filename = GetFileNameFromCD(content_disposition); + if (!filename.empty()) { + // Remove any path information the server may have sent, take the name + // only. + filename = file_util::GetFilenameFromPath(filename); + // Next, remove "." from the beginning and end of the file name to avoid + // tricks with hidden files, "..", and "." + TrimString(filename, L".", &filename); + } + if (filename.empty()) { + if (url.is_valid()) + filename = UnescapeAndDecodeUTF8URLComponent( + url.ExtractFileName(), UnescapeRule::SPACES | UnescapeRule::PERCENTS); + } + + // Trim '.' once more. + TrimString(filename, L".", &filename); + // If there's no filename or it gets trimed to be empty, use + // the URL hostname or default_name + if (filename.empty()) { + if (!default_name.empty()) + filename = default_name; + else if (url.is_valid()) { + // Some schemes (e.g. file) do not have a hostname. Even though it's + // not likely to reach here, let's hardcode the last fallback name. + // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451) + filename = url.host().empty() ? L"download" : UTF8ToWide(url.host()); + } else + NOTREACHED(); + } + + file_util::ReplaceIllegalCharacters(&filename, '-'); + return filename; +} + +std::wstring GetSuggestedFilename(const GURL& url, + const std::wstring& content_disposition, + const std::wstring& default_name) { + return GetSuggestedFilename( + url, WideToUTF8(content_disposition), default_name); +} + +bool IsPortAllowedByDefault(int port) { + int array_size = arraysize(kRestrictedPorts); + for (int i = 0; i < array_size; i++) { + if (kRestrictedPorts[i] == port) { + return false; + } + } + return true; +} + +bool IsPortAllowedByFtp(int port) { + int array_size = arraysize(kAllowedFtpPorts); + for (int i = 0; i < array_size; i++) { + if (kAllowedFtpPorts[i] == port) { + return true; + } + } + // Port not explicitly allowed by FTP, so return the default restrictions. + return IsPortAllowedByDefault(port); +} + +} // namespace net_util diff --git a/net/base/net_util.h b/net/base/net_util.h new file mode 100644 index 0000000..d0955a1 --- /dev/null +++ b/net/base/net_util.h @@ -0,0 +1,153 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_NET_UTIL_H__ +#define NET_BASE_NET_UTIL_H__ + +#include <string> +#include <windows.h> + +#include "base/basictypes.h" +#include "googleurl/src/url_canon.h" +#include "googleurl/src/url_parse.h" + +class GURL; + +namespace net_util { + +// Given the full path to a file name, creates a file: URL. The returned URL +// may not be valid if the input is malformed. +GURL FilePathToFileURL(const std::wstring& file_path); + +// Converts a file: URL back to a filename that can be passed to the OS. The +// file URL must be well-formed (GURL::is_valid() must return true); we don't +// handle degenerate cases here. Returns true on success, false if it isn't a +// valid file URL. On failure, *file_path will be empty. +bool FileURLToFilePath(const GURL& url, std::wstring* file_path); + +// Return the value of the HTTP response header with name 'name'. 'headers' +// should be in the format that URLRequest::GetResponseHeaders() returns. +// Returns the empty string if the header is not found. +std::wstring GetSpecificHeader(const std::wstring& headers, + const std::wstring& name); +std::string GetSpecificHeader(const std::string& headers, + const std::string& name); + +// Return the value of the HTTP response header field's parameter named +// 'param_name'. Returns the empty string if the parameter is not found or is +// improperly formatted. +std::wstring GetHeaderParamValue(const std::wstring& field, + const std::wstring& param_name); +std::string GetHeaderParamValue(const std::string& field, + const std::string& param_name); + +// Return the filename extracted from Content-Disposition header. Only two +// formats are supported: a. %-escaped UTF-8 b. RFC 2047. +// +// A non-ASCII param value is just returned as it is (assuming a NativeMB +// encoding). When a param value is ASCII, but is not in one of two forms +// supported, it is returned as it is unless it's pretty close to two supported +// formats but not well-formed. In that case, an empty string is returned. +// +// In any case, a caller must check for the empty return value and resort to +// another means to get a filename (e.g. url). +// +// This function does not do any escaping and callers are responsible for +// escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit. +// +// TODO(jungshik): revisit this issue. At the moment, the only caller +// net_util::GetSuggestedFilename and it calls ReplaceIllegalCharacters. The +// other caller is a unit test. Need to figure out expose this function only to +// net_util_unittest. +// +std::wstring GetFileNameFromCD(const std::string& header); + +// Converts the given host name to unicode characters, APPENDING them to the +// the given output string. This can be called for any host name, if the +// input is not IDN or is invalid in some way, we'll just append the ASCII +// source to the output so it is still usable. +// +// The input should be the canonicalized ASCII host name from GURL. This +// function does NOT accept UTF-8! Its length must also be given (this is +// designed to work on the substring of the host out of a URL spec). +// +// |languages| is a comma separated list of ISO 639 language codes. It +// is used to determine whether a hostname is 'comprehensible' to a user +// who understands languages listed. |host| will be converted to a +// human-readable form (Unicode) ONLY when each component of |host| is +// regarded as 'comprehensible'. Scipt-mixing is not allowed except that +// Latin letters in the ASCII range can be mixed with a limited set of +// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). +// When |languages| is empty, even that mixing is not allowed. +void IDNToUnicode(const char* host, + int host_len, + const std::wstring& languages, + std::wstring* out); + +// Canonicalizes |host| and returns it. If |is_ip_address| is non-NULL, sets it +// to true if |host| is an IP address. +template <typename str> +std::string CanonicalizeHost(const str& host, bool* is_ip_address); + +// Call these functions to get the html for a directory listing. +// They will pass non-7bit-ascii characters unescaped, allowing +// the browser to interpret the encoding (utf8, etc). +std::string GetDirectoryListingHeader(const std::string& title); +std::string GetDirectoryListingEntry(const std::string& name, DWORD attrib, + int64 size, const FILETIME* modified); + +// If text starts with "www." it is removed, otherwise text is returned +// unmodified. +std::wstring StripWWW(const std::wstring& text); + +// Gets the filename from the raw Content-Disposition header (as read from the +// network). Otherwise uses the last path component name or hostname from +// |url|. Note: it's possible for the suggested filename to be empty (e.g., +// file:/// or view-cache:). +std::wstring GetSuggestedFilename(const GURL& url, + const std::string& content_disposition, + const std::wstring& default_name); + +// DEPRECATED: Please use the above version of this method. +std::wstring GetSuggestedFilename(const GURL& url, + const std::wstring& content_disposition, + const std::wstring& default_name); + +// Checks the given port against a list of ports which are restricted by +// default. Returns true if the port is allowed, false if it is restricted. +bool IsPortAllowedByDefault(int port); + +// Checks the given port against a list of ports which are restricted by the +// FTP protocol. Returns true if the port is allowed, false if it is +// restricted. +bool IsPortAllowedByFtp(int port); + +} // namespace net_util + +#endif // NET_BASE_NET_UTIL_H__ diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc new file mode 100644 index 0000000..c0b1f7c --- /dev/null +++ b/net/base/net_util_unittest.cc @@ -0,0 +1,671 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/string_util.h" +#include "googleurl/src/gurl.h" +#include "net/base/net_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + class NetUtilTest : public testing::Test { + }; +} + +TEST(NetUtilTest, FileURLConversion) { + // a list of test file names and the corresponding URLs + const struct FileCase { + const wchar_t* file; + const wchar_t* url; + } round_trip_cases[] = { + {L"C:\\foo\\bar.txt", L"file:///C:/foo/bar.txt"}, + {L"\\\\some computer\\foo\\bar.txt", L"file://some%20computer/foo/bar.txt"}, // UNC + {L"D:\\Name;with%some symbols*#", L"file:///D:/Name%3Bwith%25some%20symbols*%23"}, + {L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc", L"file:///D:/Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD%91%E9%A1%B5.doc"}, + }; + + // First, we'll test that we can round-trip all of the above cases of URLs + std::wstring output; + for (int i = 0; i < arraysize(round_trip_cases); i++) { + // convert to the file URL + GURL file_url(net_util::FilePathToFileURL(round_trip_cases[i].file)); + EXPECT_EQ(std::wstring(round_trip_cases[i].url), + UTF8ToWide(file_url.spec())); + + // Back to the filename. + EXPECT_TRUE(net_util::FileURLToFilePath(file_url, &output)); + EXPECT_EQ(std::wstring(round_trip_cases[i].file), output); + } + + // Test that various file: URLs get decoded into the correct file type + FileCase url_cases[] = { + {L"C:\\foo\\bar.txt", L"file:c|/foo\\bar.txt"}, + {L"C:\\foo\\bar.txt", L"file:/c:/foo/bar.txt"}, + {L"\\\\foo\\bar.txt", L"file://foo\\bar.txt"}, + {L"C:\\foo\\bar.txt", L"file:///c:/foo/bar.txt"}, + {L"\\\\foo\\bar.txt", L"file:////foo\\bar.txt"}, + {L"\\\\foo\\bar.txt", L"file:/foo/bar.txt"}, + {L"\\\\foo\\bar.txt", L"file://foo\\bar.txt"}, + {L"C:\\foo\\bar.txt", L"file:\\\\\\c:/foo/bar.txt"}, + }; + for (int i = 0; i < arraysize(url_cases); i++) { + net_util::FileURLToFilePath(GURL(url_cases[i].url), &output); + EXPECT_EQ(std::wstring(url_cases[i].file), output); + } + + // Here, we test that UTF-8 encoded strings get decoded properly, even when + // they might be stored with wide characters + const wchar_t utf8[] = L"file:///d:/Chinese/\xe6\x89\x80\xe6\x9c\x89\xe4\xb8\xad\xe6\x96\x87\xe7\xbd\x91\xe9\xa1\xb5.doc"; + const wchar_t wide[] = L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc"; + EXPECT_TRUE(net_util::FileURLToFilePath(GURL(utf8), &output)); + EXPECT_EQ(std::wstring(wide), output); + + // Unfortunately, UTF8ToWide discards invalid UTF8 input. +#ifdef BUG_878908_IS_FIXED + // Test that no conversion happens if the UTF-8 input is invalid, and that + // the input is preserved in UTF-8 + const char invalid_utf8[] = "file:///d:/Blah/\xff.doc"; + const wchar_t invalid_wide[] = L"D:\\Blah\\\xff.doc"; + EXPECT_TRUE(net_util::FileURLToFilePath( + GURL(std::string(invalid_utf8)), &output)); + EXPECT_EQ(std::wstring(invalid_wide), output); +#endif + + // Test that if a file URL is malformed, we get a failure + EXPECT_FALSE(net_util::FileURLToFilePath(GURL("filefoobar"), &output)); +} + +// Just a bunch of fake headers. +const wchar_t* google_headers = + L"HTTP/1.1 200 OK\n" + L"Content-TYPE: text/html; charset=utf-8\n" + L"Content-disposition: attachment; filename=\"download.pdf\"\n" + L"Content-Length: 378557\n" + L"X-Google-Google1: 314159265\n" + L"X-Google-Google2: aaaa2:7783,bbb21:9441\n" + L"X-Google-Google4: home\n" + L"Transfer-Encoding: chunked\n" + L"Set-Cookie: HEHE_AT=6666x66beef666x6-66xx6666x66; Path=/mail\n" + L"Set-Cookie: HEHE_HELP=owned:0;Path=/\n" + L"Set-Cookie: S=gmail=Xxx-beefbeefbeef_beefb:gmail_yj=beefbeef000beefbeefbee:gmproxy=bee-fbeefbe; Domain=.google.com; Path=/\n" + L"X-Google-Google2: /one/two/three/four/five/six/seven-height/nine:9411\n" + L"Server: GFE/1.3\n" + L"Transfer-Encoding: chunked\n" + L"Date: Mon, 13 Nov 2006 21:38:09 GMT\n" + L"Expires: Tue, 14 Nov 2006 19:23:58 GMT\n" + L"X-Malformed: bla; arg=test\"\n" + L"X-Malformed2: bla; arg=\n" + L"X-Test: bla; arg1=val1; arg2=val2"; + +TEST(NetUtilTest, GetSpecificHeader) { + const struct { + const wchar_t* header_name; + const wchar_t* expected; + } tests[] = { + {L"content-type", L"text/html; charset=utf-8"}, + {L"CONTENT-LENGTH", L"378557"}, + {L"Date", L"Mon, 13 Nov 2006 21:38:09 GMT"}, + {L"Bad-Header", L""}, + {L"", L""}, + }; + + // Test first with google_headers. + for (size_t i = 0; i < arraysize(tests); ++i) { + std::wstring result = net_util::GetSpecificHeader(google_headers, + tests[i].header_name); + EXPECT_EQ(result, tests[i].expected); + } + + // Test again with empty headers. + for (size_t i = 0; i < arraysize(tests); ++i) { + std::wstring result = net_util::GetSpecificHeader(L"", tests[i].header_name); + EXPECT_EQ(result, std::wstring()); + } +} + +TEST(NetUtilTest, GetHeaderParamValue) { + const struct { + const wchar_t* header_name; + const wchar_t* param_name; + const wchar_t* expected; + } tests[] = { + {L"Content-type", L"charset", L"utf-8"}, + {L"content-disposition", L"filename", L"download.pdf"}, + {L"Content-Type", L"badparam", L""}, + {L"X-Malformed", L"arg", L"test\""}, + {L"X-Malformed2", L"arg", L""}, + {L"X-Test", L"arg1", L"val1"}, + {L"X-Test", L"arg2", L"val2"}, + {L"Bad-Header", L"badparam", L""}, + {L"Bad-Header", L"", L""}, + {L"", L"badparam", L""}, + {L"", L"", L""}, + }; + // TODO(mpcomplete): add tests for other formats of headers. + + for (size_t i = 0; i < arraysize(tests); ++i) { + std::wstring header_value = net_util::GetSpecificHeader(google_headers, + tests[i].header_name); + std::wstring result = net_util::GetHeaderParamValue(header_value, + tests[i].param_name); + EXPECT_EQ(result, tests[i].expected); + } + + for (size_t i = 0; i < arraysize(tests); ++i) { + std::wstring header_value = net_util::GetSpecificHeader(L"", + tests[i].header_name); + std::wstring result = net_util::GetHeaderParamValue(header_value, + tests[i].param_name); + EXPECT_EQ(result, std::wstring()); + } +} + +TEST(NetUtilTest, GetFileNameFromCD) { + const struct { + const char* header_field; + const wchar_t* expected; + } tests[] = { + // Test various forms of C-D header fields emitted by web servers. + {"content-disposition: inline; filename=\"abcde.pdf\"", L"abcde.pdf"}, + {"content-disposition: inline; name=\"abcde.pdf\"", L"abcde.pdf"}, + {"content-disposition: attachment; filename=abcde.pdf", L"abcde.pdf"}, + {"content-disposition: attachment; name=abcde.pdf", L"abcde.pdf"}, + {"content-disposition: attachment; filename=abc,de.pdf", L"abc,de.pdf"}, + {"content-disposition: filename=abcde.pdf", L"abcde.pdf"}, + {"content-disposition: filename= abcde.pdf", L"abcde.pdf"}, + {"content-disposition: filename =abcde.pdf", L"abcde.pdf"}, + {"content-disposition: filename = abcde.pdf", L"abcde.pdf"}, + {"content-disposition: filename\t=abcde.pdf", L"abcde.pdf"}, + {"content-disposition: filename \t\t =abcde.pdf", L"abcde.pdf"}, + {"content-disposition: name=abcde.pdf", L"abcde.pdf"}, + {"content-disposition: inline; filename=\"abc%20de.pdf\"", L"abc de.pdf"}, + // Whitespaces are converted to a space. + {"content-disposition: inline; filename=\"abc \t\nde.pdf\"", L"abc de.pdf"}, + // %-escaped UTF-8 + {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20" + "%EC%98%88%EC%88%A0.jpg\"", L"\xc608\xc220 \xc608\xc220.jpg"}, + {"Content-Disposition: attachment; filename=\"%F0%90%8C%B0%F0%90%8C%B1" + "abc.jpg\"", L"\U00010330\U00010331abc.jpg"}, + {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0 \n" + "%EC%98%88%EC%88%A0.jpg\"", L"\xc608\xc220 \xc608\xc220.jpg"}, + // RFC 2047 with various charsets and Q/B encodings + {"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" + "D13=2Epng?=\"", L"\x82b8\x8853" L"3.png"}, + {"Content-Disposition: attachment; filename==?eUc-Kr?b?v7m8+iAzLnBuZw==?=", + L"\xc608\xc220 3.png"}, + {"Content-Disposition: attachment; filename==?utf-8?Q?=E8=8A=B8=E8" + "=A1=93_3=2Epng?=", L"\x82b8\x8853 3.png"}, + {"Content-Disposition: attachment; filename==?utf-8?Q?=F0=90=8C=B0" + "_3=2Epng?=", L"\U00010330 3.png"}, + {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e3_=2epng?=\"", + L"caf\x00e3 .png"}, + // Space after an encode word should be removed. + {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E3_?= .png\"", + L"caf\x00e3 .png"}, + // Two encoded words with different charsets (not very likely to be emitted + // by web servers in the wild). Spaces between them are removed. + {"Content-Disposition: inline; filename=\"=?euc-kr?b?v7m8+iAz?=" + " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", L"\xc608\xc220 3\xc608\xc220.png"}, + {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E3?=" + " =?iso-8859-7?b?4eI=?= .png\"", L"caf\x00e3\x03b1\x03b2.png"}, + // Non-ASCII string is passed through (and treated as UTF-8). + {"Content-Disposition: attachment; filename=caf\xc3\xa3.png", + L"caf\x00e3.png"}, + // Failure cases + // Invalid hex-digit "G" + {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", L""}, + // Incomplete RFC 2047 encoded-word (missing '='' at the end) + {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", L""}, + // Extra character at the end of an encoded word + {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==", L""}, + // Extra token at the end of an encoded word + {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?", L""}, + {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=", L""}, + // Incomplete hex-escaped chars + {"Content-Disposition: attachment; filename==?windows-1252?Q?=63=61=E?=", + L""}, + {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", L""}, + // %-escaped non-UTF-8 encoding is an "error" + {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", L""}, + // Two RFC 2047 encoded words in a row without a space is an error. + {"Content-Disposition: attachment; filename==?windows-1252?Q?caf=E3?=" + "=?iso-8859-7?b?4eIucG5nCg==?=", L""}, + }; + for (size_t i = 0; i < arraysize(tests); ++i) { + EXPECT_EQ(tests[i].expected, + net_util::GetFileNameFromCD(tests[i].header_field)); + } +} + +TEST(NetUtilTest, IDNToUnicode) { + // TODO(jungshik) This is just a random sample of languages and is far + // from exhaustive. We may have to generate all the combinations + // of languages (powerset of a set of all the languages). + const wchar_t* languages[] = { + L"", L"en", L"zh-CN", L"ja", L"ko", + L"he", L"ar", L"ru", L"el", L"fr", + L"de", L"pt", L"se", L"th", L"hi", + L"de,en", L"el,en", L"zh,zh-TW,en", L"ko,ja", L"he,ru,en", + L"zh,ru,en"}; + struct IDNTest { + const char* input; + const wchar_t* unicode_output; + const bool unicode_allowed[arraysize(languages)]; + } idn_cases[] = { + // No IDN + {"www.google.com", L"www.google.com", + {true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true}}, + {"www.google.com.", L"www.google.com.", + {true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true}}, + {".", L".", + {true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true}}, + {"", L"", + {true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true}}, + // IDN + // Hanzi (Chinese) + {"xn--1lq90i.cn", L"\x5317\x4eac.cn", + {true, false, true, true, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, true, true, false, + true}}, + // Hanzi + '123' + {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", + {true, false, true, true, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, true, true, false, + true}}, + // Hanzi + Latin + {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", + {false, false, true, true, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, true, true, false, + true}}, + // Kanji + Kana (Japanese) + {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", + {true, false, false, true, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, true, false, + false}}, + #if 0 + // U+30FC is not a part of the Japanese exemplar set. + // Enable this after 'fixing' ICU data or locally working around it. + // Katakana + Latin (Japanese) + {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", + {true, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + }}, + #endif + // Hangul (Korean) + {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", + {true, false, false, false, true, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, true, false, + false}}, + // b<u-umlaut>cher (German) + {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", + {true, false, false, false, false, + false, false, false, false, true, + true, true, false, false, false, + true, false, false, false, false, + false}}, + // a with diaeresis + {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", + {true, false, false, false, false, + false, false, false, false, false, + true, false, false, false, false, + true, false, false, false, false, + false}}, + // c-cedilla (French) + {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", + {true, false, false, false, false, + false, false, false, false, true, + false, true, false, false, false, + false, false, false, false, false, + false}}, + // caf'e with acute accent' (French) + {"xn--caf-dma.fr", L"caf\x00e9.fr", + {true, false, false, false, false, + false, false, false, false, true, + false, true, false, false, false, + false, false, false, false, false, + false}}, + // c-cedillla and a with tilde (Portuguese) + {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", + {true, false, false, false, false, + false, false, false, false, false, + false, true, false, false, false, + false, false, false, false, false, + false}}, + // s with caron + {"xn--achy-f6a.com", L"\x0161" L"achy.com", + {true, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // TODO(jungshik) : Add examples with Cyrillic letters + // only used in some languages written in Cyrillic. + // Eutopia (Greek) + {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", + {true, false, false, false, false, + false, false, false, true, false, + false, false, false, false, false, + false, true, false, false, false, + false}}, + // Eutopia + 123 (Greek) + {"xn---123-pldm0haj2bk.gr", + L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", + {true, false, false, false, false, + false, false, false, true, false, + false, false, false, false, false, + false, true, false, false, false, + false}}, + // Cyrillic (Russian) + {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", + {true, false, false, false, false, + false, false, true, false, false, + false, false, false, false, false, + false, false, false, false, true, + true}}, + // Cyrillic + 123 (Russian) + {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", + {true, false, false, false, false, + false, false, true, false, false, + false, false, false, false, false, + false, false, false, false, true, + true}}, + // Arabic + {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", + {true, false, false, false, false, + false, true, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // Hebrew + {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", + {true, false, false, false, false, + true, false, false, false, false, + false, false, false, false, false, + false, false, false, false, true, + false}}, + // Thai + {"xn--12c2cc4ag3b4ccu.th", + L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", + {true, false, false, false, false, + false, false, false, false, false, + false, false, false, true, false, + false, false, false, false, false, + false}}, + // Devangari (Hindi) + {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", + {true, false, false, false, false, + false, false, false, false, false, + false, false, false, false, true, + false, false, false, false, false, + false}}, + // Invalid IDN + {"xn--hello?world.com", NULL, + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // Unsafe IDNs + // "payp<alpha>l.com" + {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // google.gr with Greek omicron and epsilon + {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // google.ru with Cyrillic o + {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // h<e with acute>llo<China in Han>.cn + {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // <Greek rho><Cyrillic a><Cyrillic u>.ru + {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + // One that's really long that will force a buffer realloc + {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + {true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + true}}, + // Test cases for characters we blacklisted although allowed in IDN. + // Embedded spaces will be turned to %20 in the display. + // TODO(jungshik): We need to have more cases. This is a typical + // data-driven trap. The following test cases need to be separated + // and tested only for a couple of languages. + {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false}}, + {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + }}, + {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", + {false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, false, false, false, + }}, +#if 0 + // These two cases are special. We need a separate test. + // U+3000 and U+3002 are normalized to ASCII space and dot. + {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", + {false, false, true, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, true, false, false, + true}}, + {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", + {false, false, true, false, false, + false, false, false, false, false, + false, false, false, false, false, + false, false, true, false, false, + true}}, +#endif + }; + + for (int i = 0; i < arraysize(idn_cases); i++) { + for (int j = 0; j < arraysize(languages); j++) { + std::wstring output; + net_util::IDNToUnicode(idn_cases[i].input, + static_cast<int>(strlen(idn_cases[i].input)), + languages[j], + &output); + std::wstring expected(idn_cases[i].unicode_allowed[j] ? + idn_cases[i].unicode_output : + ASCIIToWide(idn_cases[i].input)); + EXPECT_EQ(expected, output); + } + } +} + +TEST(NetUtilTest, StripWWW) { + EXPECT_EQ(L"", net_util::StripWWW(L"")); + EXPECT_EQ(L"", net_util::StripWWW(L"www.")); + EXPECT_EQ(L"blah", net_util::StripWWW(L"www.blah")); + EXPECT_EQ(L"blah", net_util::StripWWW(L"blah")); +} + +TEST(NetUtilTest, GetSuggestedFilename) { + struct FilenameTest { + const char* url; + const wchar_t* content_disp_header; + const wchar_t* default_filename; + const wchar_t* expected_filename; + } test_cases[] = { + {"http://www.google.com/", + L"Content-disposition: attachment; filename=test.html", + L"", + L"test.html"}, + {"http://www.google.com/", + L"Content-disposition: attachment; filename=\"test.html\"", + L"", + L"test.html"}, + {"http://www.google.com/path/test.html", + L"Content-disposition: attachment", + L"", + L"test.html"}, + {"http://www.google.com/path/test.html", + L"Content-disposition: attachment;", + L"", + L"test.html"}, + {"http://www.google.com/", + L"", + L"", + L"www.google.com"}, + {"http://www.google.com/test.html", + L"", + L"", + L"test.html"}, + // Now that we use googleurl's ExtractFileName, this case falls back + // to the hostname. If this behavior is not desirable, we'd better + // change ExtractFileName (in url_parse). + {"http://www.google.com/path/", + L"", + L"", + L"www.google.com"}, + {"http://www.google.com/path", + L"", + L"", + L"path"}, + {"file:///", + L"", + L"", + L"download"}, + {"view-cache:", + L"", + L"", + L"download"}, + {"http://www.google.com/", + L"Content-disposition: attachment; filename =\"test.html\"", + L"download", + L"test.html"}, + {"http://www.google.com/", + L"", + L"download", + L"download"}, + {"http://www.google.com/", + L"Content-disposition: attachment; filename=\"../test.html\"", + L"", + L"test.html"}, + {"http://www.google.com/", + L"Content-disposition: attachment; filename=\"..\"", + L"download", + L"download"}, + {"http://www.google.com/test.html", + L"Content-disposition: attachment; filename=\"..\"", + L"download", + L"test.html"}, + // Below is a small subset of cases taken from GetFileNameFromCD test above. + {"http://www.google.com/", + L"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20" + L"%EC%98%88%EC%88%A0.jpg\"", + L"", + L"\uc608\uc220 \uc608\uc220.jpg"}, + {"http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg", + L"", + L"download", + L"\uc608\uc220 \uc608\uc220.jpg"}, + {"http://www.google.com/", + L"Content-disposition: attachment;", + L"\uB2E4\uC6B4\uB85C\uB4DC", + L"\uB2E4\uC6B4\uB85C\uB4DC"}, + {"http://www.google.com/", + L"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" + L"D13=2Epng?=\"", + L"download", + L"\u82b8\u88533.png"}, + // Invalid C-D header. Extracts filename from url. + {"http://www.google.com/test.html", + L"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", + L"", + L"test.html"}, + }; + for (int i = 0; i < arraysize(test_cases); ++i) { + std::wstring filename = net_util::GetSuggestedFilename( + GURL(test_cases[i].url), test_cases[i].content_disp_header, + test_cases[i].default_filename); + EXPECT_EQ(std::wstring(test_cases[i].expected_filename), filename); + } +} diff --git a/net/base/registry_controlled_domain.cc b/net/base/registry_controlled_domain.cc new file mode 100644 index 0000000..b573d17 --- /dev/null +++ b/net/base/registry_controlled_domain.cc @@ -0,0 +1,351 @@ +//* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla Effective-TLD Service + * + * The Initial Developer of the Original Code is + * Google Inc. + * Portions created by the Initial Developer are Copyright (C) 2006 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Pamela Greene <pamg.bugs@gmail.com> (original author) + * Daniel Witte <dwitte@stanford.edu> + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include <windows.h> + +#include "base/logging.h" +#include "base/string_util.h" +#include "googleurl/src/gurl.h" +#include "googleurl/src/url_parse.h" +#include "net/base/net_module.h" +#include "net/base/net_resources.h" +#include "net/base/net_util.h" +#include "net/base/registry_controlled_domain.h" + +// This list of rules is used by unit tests and any other time that the main +// resource file is not available. It should be kept exceedingly short to +// avoid impacting unit test performance. +static const char kDefaultDomainData[] = "com\n" + "edu\n" + "gov\n" + "net\n" + "org\n" + "co.uk\n"; + +// static +std::string RegistryControlledDomainService::GetDomainAndRegistry( + const GURL& gurl) { + const url_parse::Component host = + gurl.parsed_for_possibly_invalid_spec().host; + if ((host.len <= 0) || gurl.HostIsIPAddress()) + return std::string(); + return GetDomainAndRegistryImpl(std::string( + gurl.possibly_invalid_spec().data() + host.begin, host.len)); +} + +// static +std::string RegistryControlledDomainService::GetDomainAndRegistry( + const std::string& host) { + bool is_ip_address; + const std::string canon_host(net_util::CanonicalizeHost(host, + &is_ip_address)); + if (canon_host.empty() || is_ip_address) + return std::string(); + return GetDomainAndRegistryImpl(canon_host); +} + +// static +std::string RegistryControlledDomainService::GetDomainAndRegistry( + const std::wstring& host) { + bool is_ip_address; + const std::string canon_host(net_util::CanonicalizeHost(host, + &is_ip_address)); + if (canon_host.empty() || is_ip_address) + return std::string(); + return GetDomainAndRegistryImpl(canon_host); +} + +// static +bool RegistryControlledDomainService::SameDomainOrHost(const GURL& gurl1, + const GURL& gurl2) { + // See if both URLs have a known domain + registry, and those values are the + // same. + const std::string domain1(GetDomainAndRegistry(gurl1)); + const std::string domain2(GetDomainAndRegistry(gurl2)); + if (!domain1.empty() || !domain2.empty()) + return domain1 == domain2; + + // No domains. See if the hosts are identical. + const url_parse::Component host1 = + gurl1.parsed_for_possibly_invalid_spec().host; + const url_parse::Component host2 = + gurl2.parsed_for_possibly_invalid_spec().host; + if ((host1.len <= 0) || (host1.len != host2.len)) + return false; + return !strncmp(gurl1.possibly_invalid_spec().data() + host1.begin, + gurl2.possibly_invalid_spec().data() + host2.begin, + host1.len); +} + +// static +size_t RegistryControlledDomainService::GetRegistryLength( + const GURL& gurl, + bool allow_unknown_registries) { + const url_parse::Component host = + gurl.parsed_for_possibly_invalid_spec().host; + if (host.len <= 0) + return std::string::npos; + if (gurl.HostIsIPAddress()) + return 0; + return GetInstance()->GetRegistryLengthImpl( + std::string(gurl.possibly_invalid_spec().data() + host.begin, host.len), + allow_unknown_registries); +} + +// static +size_t RegistryControlledDomainService::GetRegistryLength( + const std::string& host, + bool allow_unknown_registries) { + bool is_ip_address; + const std::string canon_host(net_util::CanonicalizeHost(host, + &is_ip_address)); + if (canon_host.empty()) + return std::string::npos; + if (is_ip_address) + return 0; + return GetInstance()->GetRegistryLengthImpl(canon_host, + allow_unknown_registries); +} + +// static +size_t RegistryControlledDomainService::GetRegistryLength( + const std::wstring& host, + bool allow_unknown_registries) { + bool is_ip_address; + const std::string canon_host(net_util::CanonicalizeHost(host, + &is_ip_address)); + if (canon_host.empty()) + return std::string::npos; + if (is_ip_address) + return 0; + return GetInstance()->GetRegistryLengthImpl(canon_host, + allow_unknown_registries); +} + +// static +std::string RegistryControlledDomainService::GetDomainAndRegistryImpl( + const std::string& host) { + DCHECK(!host.empty()); + + // Find the length of the registry for this host. + const size_t registry_length = + GetInstance()->GetRegistryLengthImpl(host, true); + if ((registry_length == std::string::npos) || (registry_length == 0)) + return std::string(); // No registry. + // The "2" in this next line is 1 for the dot, plus a 1-char minimum preceding + // subcomponent length. + if (registry_length > (host.length() - 2)) { + NOTREACHED() << + "Host does not have at least one subcomponent before registry!"; + return std::string(); + } + + // Move past the dot preceding the registry, and search for the next previous + // dot. Return the host from after that dot, or the whole host when there is + // no dot. + const size_t dot = host.rfind('.', host.length() - registry_length - 2); + if (dot == std::string::npos) + return host; + return host.substr(dot + 1); +} + +size_t RegistryControlledDomainService::GetRegistryLengthImpl( + const std::string& host, + bool allow_unknown_registries) { + DCHECK(!host.empty()); + + // Skip leading dots. + const size_t host_check_begin = host.find_first_not_of('.'); + if (host_check_begin == std::string::npos) + return 0; // Host is only dots. + + // A single trailing dot isn't relevant in this determination, but does need + // to be included in the final returned length. + size_t host_check_len = host.length(); + if (host[host_check_len - 1] == '.') { + --host_check_len; + DCHECK(host_check_len > 0); // If this weren't true, the host would be ".", + // and we'd have already returned above. + if (host[host_check_len - 1] == '.') + return 0; // Multiple trailing dots. + } + + // Walk up the domain tree, most specific to least specific, + // looking for matches at each level. + StringSegment match; + size_t prev_start = std::string::npos; + size_t curr_start = host_check_begin; + size_t next_dot = host.find('.', curr_start); + if (next_dot >= host_check_len) // Catches std::string::npos as well. + return 0; // This can't have a registry + domain. + while (1) { + match.Set(host.data(), curr_start, host_check_len - curr_start); + DomainMap::iterator iter = domain_map_.find(match); + if (iter != domain_map_.end()) { + DomainEntry entry = iter->second; + // Exception rules override wildcard rules when the domain is an exact + // match, but wildcards take precedence when there's a subdomain. + if (entry.wildcard && (prev_start != std::string::npos)) { + // If prev_start == host_check_begin, then the host is the registry + // itself, so return 0. + return (prev_start == host_check_begin) ? + 0 : (host.length() - prev_start); + } + + if (entry.exception) { + if (next_dot == std::string::npos) { + // If we get here, we had an exception rule with no dots (e.g. + // "!foo"). This would only be valid if we had a corresponding + // wildcard rule, which would have to be "*". But we explicitly + // disallow that case, so this kind of rule is invalid. + NOTREACHED() << "Invalid exception rule"; + return 0; + } + return host.length() - next_dot - 1; + } + + // If curr_start == host_check_begin, then the host is the registry + // itself, so return 0. + return (curr_start == host_check_begin) ? + 0 : (host.length() - curr_start); + } + + if (next_dot >= host_check_len) // Catches std::string::npos as well. + break; + + prev_start = curr_start; + curr_start = next_dot + 1; + next_dot = host.find('.', curr_start); + } + + // No rule found in the registry. curr_start now points to the first + // character of the last subcomponent of the host, so if we allow unknown + // registries, return the length of this subcomponent. + return allow_unknown_registries ? (host.length() - curr_start) : 0; +} + +RegistryControlledDomainService* RegistryControlledDomainService::instance_ = + NULL; + +// static +RegistryControlledDomainService* RegistryControlledDomainService::GetInstance() +{ + if (!instance_) { + RegistryControlledDomainService* s = new RegistryControlledDomainService(); + s->Init(); + // TODO(darin): use fix_wp64.h once it lives in base/ + if (InterlockedCompareExchangePointer( + reinterpret_cast<PVOID*>(&instance_), s, NULL)) { + // Oops, another thread initialized instance_ out from under us. + delete s; + } + } + return instance_; +} + +// static +void RegistryControlledDomainService::UseDomainData(const std::string& data) { + RegistryControlledDomainService* instance = GetInstance(); + instance->domain_data_ = data; + instance->ParseDomainData(); +} + +void RegistryControlledDomainService::Init() { + domain_data_ = NetModule::GetResource(IDR_EFFECTIVE_TLD_NAMES); + if (domain_data_.empty()) { + // The resource file isn't present for some unit tests, for example. Fall + // back to a tiny, basic list of rules in that case. + domain_data_ = kDefaultDomainData; + } + ParseDomainData(); +} + +void RegistryControlledDomainService::ParseDomainData() { + domain_map_.clear(); + + StringSegment rule; + size_t line_end = 0; + size_t line_start = 0; + while (line_start < domain_data_.size()) { + line_end = domain_data_.find('\n', line_start); + if (line_end == std::string::npos) + line_end = domain_data_.size(); + rule.Set(domain_data_.data(), line_start, line_end - line_start); + AddRule(&rule); + line_start = line_end + 1; + } +} + +void RegistryControlledDomainService::AddRule(StringSegment* rule) { + // Determine rule properties. + size_t property_offset = 0; + bool exception = false; + bool wild = false; + + // Valid rules may be either wild or exceptions, but not both. + if (rule->CharAt(0) == '!') { + exception = true; + property_offset = 1; + } else if (rule->CharAt(0) == '*' && rule->CharAt(1) == '.') { + wild = true; + property_offset = 2; + } + + // Find or create an entry for this host. + rule->TrimFromStart(property_offset); + DomainEntry entry; + DomainMap::iterator iter = domain_map_.find(*rule); + if (iter != domain_map_.end()) + entry = iter->second; + + entry.exception |= exception; + entry.wildcard |= wild; + domain_map_[*rule] = entry; +} + +bool RegistryControlledDomainService::StringSegment::operator<( + const StringSegment &other) const { + // If the segments are of equal length, compare their contents; otherwise, + // the shorter segment is "less than" the longer one. + if (len_ == other.len_) { + int comparison = strncmp(data_ + begin_, other.data_ + other.begin_, len_); + return (comparison < 0); + } + return (len_ < other.len_); +} diff --git a/net/base/registry_controlled_domain.h b/net/base/registry_controlled_domain.h new file mode 100644 index 0000000..6b5adbc --- /dev/null +++ b/net/base/registry_controlled_domain.h @@ -0,0 +1,298 @@ +//* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla TLD Service + * + * The Initial Developer of the Original Code is + * Google Inc. + * Portions created by the Initial Developer are Copyright (C) 2006 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Pamela Greene <pamg.bugs@gmail.com> (original author) + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +// NB: Modelled after Mozilla's code (originally written by Pamela Greene, +// later modified by others), but almost entirely rewritten for Chrome. + +/* + (Documentation based on the Mozilla documentation currently at + http://wiki.mozilla.org/Gecko:Effective_TLD_Service, written by the same + author.) + + The RegistryControlledDomainService examines the hostname of a GURL passed to + it and determines the longest portion that is controlled by a registrar. + Although technically the top-level domain (TLD) for a hostname is the last + dot-portion of the name (such as .com or .org), many domains (such as co.uk) + function as though they were TLDs, allocating any number of more specific, + essentially unrelated names beneath them. For example, .uk is a TLD, but + nobody is allowed to register a domain directly under .uk; the "effective" + TLDs are ac.uk, co.uk, and so on. We wouldn't want to allow any site in + *.co.uk to set a cookie for the entire co.uk domain, so it's important to be + able to identify which higher-level domains function as effective TLDs and + which can be registered. + + The service obtains its information about effective TLDs from a text resource + that must be in the following format: + + * It should use plain ASCII. + * It should contain one domain rule per line, terminated with \n, with nothing + else on the line. (The last rule in the file may omit the ending \n.) + * Rules should have been normalized using the same canonicalization that GURL + applies. For ASCII, that means they're not case-sensitive, among other + things; other normalizations are applied for other characters. + * Each rule should list the entire TLD-like domain name, with any subdomain + portions separated by dots (.) as usual. + * Rules should neither begin nor end with a dot. + * If a hostname matches more than one rule, the most specific rule (that is, + the one with more dot-levels) will be used. + * Other than in the case of wildcards (see below), rules do not implicitly + include their subcomponents. For example, "bar.baz.uk" does not imply + "baz.uk", and if "bar.baz.uk" is the only rule in the list, "foo.bar.baz.uk" + will match, but "baz.uk" and "qux.baz.uk" won't. + * The wildcard character '*' will match any valid sequence of characters. + * Wildcards may only appear as the entire most specific level of a rule. That + is, a wildcard must come at the beginning of a line and must be followed by + a dot. (You may not use a wildcard as the entire rule.) + * A wildcard rule implies a rule for the entire non-wildcard portion. For + example, the rule "*.foo.bar" implies the rule "foo.bar" (but not the rule + "bar"). This is typically important in the case of exceptions (see below). + * The exception character '!' before a rule marks an exception to a wildcard + rule. If your rules are "*.tokyo.jp" and "!pref.tokyo.jp", then + "a.b.tokyo.jp" has an effective TLD of "b.tokyo.jp", but "a.pref.tokyo.jp" + has an effective TLD of "tokyo.jp" (the exception prevents the wildcard + match, and we thus fall through to matching on the implied "tokyo.jp" rule + from the wildcard). + * If you use an exception rule without a corresponding wildcard rule, the + behavior is undefined. + + Firefox has a very similar service, and it's their data file we use to + construct our resource. However, the data expected by this implementation + differs from the Mozilla file in several important ways: + (1) We require that all single-level TLDs (com, edu, etc.) be explicitly + listed. As of this writing, Mozilla's file includes the single-level + TLDs too, but that might change. + (2) Our data is expected be in pure ASCII: all UTF-8 or otherwise encoded + items must already have been normalized. + (3) We do not allow comments, rule notes, blank lines, or line endings other + than LF. + Rules are also expected to be syntactically valid. + + The utility application tld_cleanup.exe converts a Mozilla-style file into a + Chrome one, making sure that single-level TLDs are explicitly listed, using + GURL to normalize rules, and validating the rules. +*/ + +#ifndef NET_BASE_REGISTRY_CONTROLLED_DOMAIN_H__ +#define NET_BASE_REGISTRY_CONTROLLED_DOMAIN_H__ + +#include <map> +#include <string> + +#include "base/basictypes.h" + +class GURL; + +// This class is a singleton. +class RegistryControlledDomainService { + public: + // Returns the registered, organization-identifying host and all its registry + // information, but no subdomains, from the given GURL. Returns an empty + // string if the GURL is invalid, has no host (e.g. a file: URL), has multiple + // trailing dots, is an IP address, has only one subcomponent (i.e. no dots + // other than leading/trailing ones), or is itself a recognized registry + // identifier. If no matching rule is found in the effective-TLD data (or in + // the default data, if the resource failed to load), the last subcomponent of + // the host is assumed to be the registry. + // + // Examples: + // http://www.google.com/file.html -> "google.com" (com) + // http://..google.com/file.html -> "google.com" (com) + // http://google.com./file.html -> "google.com." (com) + // http://a.b.co.uk/file.html -> "b.co.uk" (co.uk) + // file:///C:/bar.html -> "" (no host) + // http://foo.com../file.html -> "" (multiple trailing dots) + // http://192.168.0.1/file.html -> "" (IP address) + // http://bar/file.html -> "" (no subcomponents) + // http://co.uk/file.html -> "" (host is a registry) + // http://foo.bar/file.html -> "foo.bar" (no rule; assume bar) + static std::string GetDomainAndRegistry(const GURL& gurl); + + // Like the GURL version, but takes a host (which is canonicalized internally) + // instead of a full GURL. + static std::string GetDomainAndRegistry(const std::string& host); + static std::string GetDomainAndRegistry(const std::wstring& host); + + // This convenience function returns true if the two GURLs both have hosts + // and one of the following is true: + // * They each have a known domain and registry, and it is the same for both + // URLs. Note that this means the trailing dot, if any, must match too. + // * They don't have known domains/registries, but the hosts are identical. + // Effectively, callers can use this function to check whether the input URLs + // represent hosts "on the same site". + static bool SameDomainOrHost(const GURL& gurl1, const GURL& gurl2); + + // Finds the length in bytes of the registrar portion of the host in the + // given GURL. Returns std::string::npos if the GURL is invalid or has no + // host (e.g. a file: URL). Returns 0 if the GURL has multiple trailing dots, + // is an IP address, has no subcomponents, or is itself a recognized registry + // identifier. If no matching rule is found in the effective-TLD data (or in + // the default data, if the resource failed to load), returns 0 if + // |allow_unknown_registries| is false, or the length of the last subcomponent + // if |allow_unknown_registries| is true. + // + // Examples: + // http://www.google.com/file.html -> 3 (com) + // http://..google.com/file.html -> 3 (com) + // http://google.com./file.html -> 4 (com) + // http://a.b.co.uk/file.html -> 5 (co.uk) + // file:///C:/bar.html -> std::string::npos (no host) + // http://foo.com../file.html -> 0 (multiple trailing + // dots) + // http://192.168.0.1/file.html -> 0 (IP address) + // http://bar/file.html -> 0 (no subcomponents) + // http://co.uk/file.html -> 0 (host is a registry) + // http://foo.bar/file.html -> 0 or 3, depending (no rule; assume + // bar) + static size_t GetRegistryLength(const GURL& gurl, + bool allow_unknown_registries); + + // Like the GURL version, but takes a host (which is canonicalized internally) + // instead of a full GURL. + static size_t GetRegistryLength(const std::string& host, + bool allow_unknown_registries); + static size_t GetRegistryLength(const std::wstring& host, + bool allow_unknown_registries); + + protected: + // The entire protected API is only for unit testing. I mean it. Don't make + // me come over there! + RegistryControlledDomainService() { } + ~RegistryControlledDomainService() { } + + // Clears the static singleton instance. This is used by unit tests to + // create a new instance for each test, to help ensure test independence. + static void ResetInstance() { + delete instance_; + instance_ = NULL; + } + + // Sets the domain_data_ of the current instance (creating one, if necessary), + // then parses it. + static void UseDomainData(const std::string& data); + + private: + // Using the StringSegment class, we can compare portions of strings without + // needing to allocate or copy them. + class StringSegment { + public: + StringSegment() : data_(0), begin_(0), len_(0) { } + ~StringSegment() { } + + void Set(const char* data, size_t begin, size_t len) { + data_ = data; + begin_ = begin; + len_ = len; + } + + // Returns the character at the given offset from the start of the segment, + // or '\0' if the offset lies outside the segment. + char CharAt(size_t offset) const { + return (offset < len_) ? data_[begin_ + offset] : '\0'; + } + + // Removes a maximum of |trimmed| number of characters, up to the length of + // the segment, from the start of the StringSegment. + void TrimFromStart(size_t trimmed) { + if (trimmed > len_) + trimmed = len_; + begin_ += trimmed; + len_ -= trimmed; + } + + const char* data() const { return data_; } + + // This comparator is needed by std::map. Note that since we don't care + // about the exact sorting, we use a somewhat less intuitive, but efficient, + // comparison. + bool operator<(const StringSegment& other) const; + + private: + const char* data_; + size_t begin_; + size_t len_; + }; + + // The full domain rule data, loaded from a resource or set by a unit test. + std::string domain_data_; + + // An entry in the map of domain specifications, describing the properties + // that apply to that domain rule. + struct DomainEntry { + DomainEntry() : exception(false), wildcard(false) { } + bool exception; + bool wildcard; + }; + typedef std::map<StringSegment, DomainEntry> DomainMap; + + // A map from a StringSegment holding a domain name (rule) to its DomainEntry. + // The StringSegments in the domain_map_ hold pointers to the domain_data_ + // data; that's cheaper than copying the string data itself. + // TODO(pamg): Since all the domain_map_ entries have the same data_, it's + // redundant. Is it worth subclassing StringSegment to avoid that? + DomainMap domain_map_; + + // Parses a list of effective-TLD rules, building the domain_map_. Rules are + // assumed to be syntactically valid. + void ParseDomainData(); + + // The class's singleton instance. + static RegistryControlledDomainService* instance_; + + // Returns the singleton instance, after attempting to initialize it. + // NOTE that if the effective-TLD data resource can't be found, the instance + // will be initialized and continue operation with an empty domain_map_. + static RegistryControlledDomainService* GetInstance(); + + // Loads and parses the effective-TLD data resource. + void Init(); + + // Adds one rule, assumed to be valid, to the domain_map_. + // WARNING: As implied by the non-const status of the incoming rule, this + // method may MODIFY that rule (in particular, change its start and length). + // This is a performance optimization. + void AddRule(StringSegment* rule); + + // Internal workings of the static public methods. See above. + static std::string GetDomainAndRegistryImpl(const std::string& host); + size_t GetRegistryLengthImpl(const std::string& host, + bool allow_unknown_registries); + + DISALLOW_EVIL_CONSTRUCTORS(RegistryControlledDomainService); +}; + +#endif // NET_BASE_REGISTRY_CONTROLLED_DOMAIN_H__ diff --git a/net/base/registry_controlled_domain_unittest.cc b/net/base/registry_controlled_domain_unittest.cc new file mode 100644 index 0000000..cd74fc5 --- /dev/null +++ b/net/base/registry_controlled_domain_unittest.cc @@ -0,0 +1,296 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "googleurl/src/gurl.h" +#include "net/base/registry_controlled_domain.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +class TestRegistryControlledDomainService; +static TestRegistryControlledDomainService* test_instance_; + +class TestRegistryControlledDomainService : + public RegistryControlledDomainService { + public: + + // Deletes the instance so a new one will be created. + static void ResetInstance() { + RegistryControlledDomainService::ResetInstance(); + } + + // Sets and parses the given data. + static void UseDomainData(const std::string& data) { + RegistryControlledDomainService::UseDomainData(data); + } + + private: + TestRegistryControlledDomainService::TestRegistryControlledDomainService() { } + TestRegistryControlledDomainService::~TestRegistryControlledDomainService() { + } +}; + +class RegistryControlledDomainTest : public testing::Test { + protected: + virtual void SetUp() { + TestRegistryControlledDomainService::ResetInstance(); + } +}; + +// Convenience functions to shorten the names for repeated use below. +void SetTestData(const std::string& data) { + TestRegistryControlledDomainService::UseDomainData(data); +} + +std::string GetDomainFromURL(const std::string& url) { + return TestRegistryControlledDomainService::GetDomainAndRegistry(GURL(url)); +} + +std::string GetDomainFromHost(const std::wstring& host) { + return TestRegistryControlledDomainService::GetDomainAndRegistry(host); +} + +size_t GetRegistryLengthFromURL(const std::string& url, + bool allow_unknown_registries) { + return TestRegistryControlledDomainService::GetRegistryLength(GURL(url), + allow_unknown_registries); +} + +size_t GetRegistryLengthFromHost(const std::wstring& host, + bool allow_unknown_registries) { + return TestRegistryControlledDomainService::GetRegistryLength(host, + allow_unknown_registries); +} + +bool CompareDomains(const std::string& url1, const std::string& url2) { + GURL g1 = GURL(url1); + GURL g2 = GURL(url2); + return TestRegistryControlledDomainService::SameDomainOrHost(g1, g2); +} + +} // namespace + +TEST_F(RegistryControlledDomainTest, TestParsing) { + // Ensure that various simple and pathological cases parse without hanging or + // crashing. Testing the correctness of the parsing directly would require + // opening the singleton class up more. + SetTestData("com"); + SetTestData("abc.com\n"); + SetTestData("abc.com\ndef.com\n*.abc.com\n!foo.abc.com"); + SetTestData("abc.com.\n"); + SetTestData(""); + SetTestData("*."); + SetTestData("!"); + SetTestData("."); +} + +static const char kTestData[] = "jp\n" // 1 + "ac.jp\n" // 2 + "*.bar.jp\n" // 3 + "*.baz.bar.jp\n" // 4 + "*.foo.bar.jp\n" // 5 + "!foo.bar.jp\n" // 6 + "!pref.bar.jp\n" // 7 + "bar.baz.com\n" // 8 + "*.c\n" // 9 + "!b.c"; // 10 + +TEST_F(RegistryControlledDomainTest, TestGetDomainAndRegistry) { + SetTestData(kTestData); + + // Test GURL version of GetDomainAndRegistry(). + EXPECT_EQ("baz.jp", GetDomainFromURL("http://a.baz.jp/file.html")); // 1 + EXPECT_EQ("baz.jp.", GetDomainFromURL("http://a.baz.jp./file.html")); // 1 + EXPECT_EQ("", GetDomainFromURL("http://ac.jp")); // 2 + EXPECT_EQ("", GetDomainFromURL("http://a.bar.jp")); // 3 + EXPECT_EQ("", GetDomainFromURL("http://bar.jp")); // 3 + EXPECT_EQ("", GetDomainFromURL("http://baz.bar.jp")); // 3 4 + EXPECT_EQ("a.b.baz.bar.jp", GetDomainFromURL("http://a.b.baz.bar.jp")); + // 4 + EXPECT_EQ("foo.bar.jp", GetDomainFromURL("http://foo.bar.jp")); // 3 5 6 + EXPECT_EQ("pref.bar.jp", GetDomainFromURL("http://baz.pref.bar.jp")); // 7 + EXPECT_EQ("b.bar.baz.com.", GetDomainFromURL("http://a.b.bar.baz.com.")); + // 8 + EXPECT_EQ("a.d.c", GetDomainFromURL("http://a.d.c")); // 9 + EXPECT_EQ("a.d.c", GetDomainFromURL("http://.a.d.c")); // 9 + EXPECT_EQ("a.d.c", GetDomainFromURL("http://..a.d.c")); // 9 + EXPECT_EQ("b.c", GetDomainFromURL("http://a.b.c")); // 9 10 + EXPECT_EQ("baz.com", GetDomainFromURL("http://baz.com")); // none + EXPECT_EQ("baz.com.", GetDomainFromURL("http://baz.com.")); // none + + EXPECT_EQ("", GetDomainFromURL("")); + EXPECT_EQ("", GetDomainFromURL("http://")); + EXPECT_EQ("", GetDomainFromURL("file:///C:/file.html")); + EXPECT_EQ("", GetDomainFromURL("http://foo.com..")); + EXPECT_EQ("", GetDomainFromURL("http://...")); + EXPECT_EQ("", GetDomainFromURL("http://192.168.0.1")); + EXPECT_EQ("", GetDomainFromURL("http://localhost")); + EXPECT_EQ("", GetDomainFromURL("http://localhost.")); + EXPECT_EQ("", GetDomainFromURL("http:////Comment")); + + // Test std::wstring version of GetDomainAndRegistry(). Uses the same + // underpinnings as the GURL version, so this is really more of a check of + // CanonicalizeHost(). + EXPECT_EQ("baz.jp", GetDomainFromHost(L"a.baz.jp")); // 1 + EXPECT_EQ("baz.jp.", GetDomainFromHost(L"a.baz.jp.")); // 1 + EXPECT_EQ("", GetDomainFromHost(L"ac.jp")); // 2 + EXPECT_EQ("", GetDomainFromHost(L"a.bar.jp")); // 3 + EXPECT_EQ("", GetDomainFromHost(L"bar.jp")); // 3 + EXPECT_EQ("", GetDomainFromHost(L"baz.bar.jp")); // 3 4 + EXPECT_EQ("a.b.baz.bar.jp", GetDomainFromHost(L"a.b.baz.bar.jp")); // 3 4 + EXPECT_EQ("foo.bar.jp", GetDomainFromHost(L"foo.bar.jp")); // 3 5 6 + EXPECT_EQ("pref.bar.jp", GetDomainFromHost(L"baz.pref.bar.jp")); // 7 + EXPECT_EQ("b.bar.baz.com.", GetDomainFromHost(L"a.b.bar.baz.com.")); // 8 + EXPECT_EQ("a.d.c", GetDomainFromHost(L"a.d.c")); // 9 + EXPECT_EQ("a.d.c", GetDomainFromHost(L".a.d.c")); // 9 + EXPECT_EQ("a.d.c", GetDomainFromHost(L"..a.d.c")); // 9 + EXPECT_EQ("b.c", GetDomainFromHost(L"a.b.c")); // 9 10 + EXPECT_EQ("baz.com", GetDomainFromHost(L"baz.com")); // none + EXPECT_EQ("baz.com.", GetDomainFromHost(L"baz.com.")); // none + + EXPECT_EQ("", GetDomainFromHost(L"")); + EXPECT_EQ("", GetDomainFromHost(L"foo.com..")); + EXPECT_EQ("", GetDomainFromHost(L"...")); + EXPECT_EQ("", GetDomainFromHost(L"192.168.0.1")); + EXPECT_EQ("", GetDomainFromHost(L"localhost.")); + EXPECT_EQ("", GetDomainFromHost(L".localhost.")); +} + +TEST_F(RegistryControlledDomainTest, TestGetRegistryLength) { + SetTestData(kTestData); + + // Test GURL version of GetRegistryLength(). + EXPECT_EQ(2, GetRegistryLengthFromURL("http://a.baz.jp/file.html", false)); + // 1 + EXPECT_EQ(3, GetRegistryLengthFromURL("http://a.baz.jp./file.html", false)); + // 1 + EXPECT_EQ(0, GetRegistryLengthFromURL("http://ac.jp", false)); // 2 + EXPECT_EQ(0, GetRegistryLengthFromURL("http://a.bar.jp", false)); // 3 + EXPECT_EQ(0, GetRegistryLengthFromURL("http://bar.jp", false)); // 3 + EXPECT_EQ(0, GetRegistryLengthFromURL("http://baz.bar.jp", false)); // 3 4 + EXPECT_EQ(12, GetRegistryLengthFromURL("http://a.b.baz.bar.jp", false)); + // 4 + EXPECT_EQ(6, GetRegistryLengthFromURL("http://foo.bar.jp", false)); // 3 5 6 + EXPECT_EQ(6, GetRegistryLengthFromURL("http://baz.pref.bar.jp", false)); + // 7 + EXPECT_EQ(11, GetRegistryLengthFromURL("http://a.b.bar.baz.com", false)); + // 8 + EXPECT_EQ(3, GetRegistryLengthFromURL("http://a.d.c", false)); // 9 + EXPECT_EQ(3, GetRegistryLengthFromURL("http://.a.d.c", false)); // 9 + EXPECT_EQ(3, GetRegistryLengthFromURL("http://..a.d.c", false)); // 9 + EXPECT_EQ(1, GetRegistryLengthFromURL("http://a.b.c", false)); // 9 10 + EXPECT_EQ(0, GetRegistryLengthFromURL("http://baz.com", false)); // none + EXPECT_EQ(0, GetRegistryLengthFromURL("http://baz.com.", false)); // none + EXPECT_EQ(3, GetRegistryLengthFromURL("http://baz.com", true)); // none + EXPECT_EQ(4, GetRegistryLengthFromURL("http://baz.com.", true)); // none + + EXPECT_EQ(std::string::npos, GetRegistryLengthFromURL("", false)); + EXPECT_EQ(std::string::npos, GetRegistryLengthFromURL("http://", false)); + EXPECT_EQ(std::string::npos, + GetRegistryLengthFromURL("file:///C:/file.html", false)); + EXPECT_EQ(0, GetRegistryLengthFromURL("http://foo.com..", false)); + EXPECT_EQ(0, GetRegistryLengthFromURL("http://...", false)); + EXPECT_EQ(0, GetRegistryLengthFromURL("http://192.168.0.1", false)); + EXPECT_EQ(0, GetRegistryLengthFromURL("http://localhost", false)); + EXPECT_EQ(0, GetRegistryLengthFromURL("http://localhost", true)); + EXPECT_EQ(0, GetRegistryLengthFromURL("http://localhost.", false)); + EXPECT_EQ(0, GetRegistryLengthFromURL("http://localhost.", true)); + EXPECT_EQ(0, GetRegistryLengthFromURL("http:////Comment", false)); + + // Test std::wstring version of GetRegistryLength(). Uses the same + // underpinnings as the GURL version, so this is really more of a check of + // CanonicalizeHost(). + EXPECT_EQ(2, GetRegistryLengthFromHost(L"a.baz.jp", false)); // 1 + EXPECT_EQ(3, GetRegistryLengthFromHost(L"a.baz.jp.", false)); // 1 + EXPECT_EQ(0, GetRegistryLengthFromHost(L"ac.jp", false)); // 2 + EXPECT_EQ(0, GetRegistryLengthFromHost(L"a.bar.jp", false)); // 3 + EXPECT_EQ(0, GetRegistryLengthFromHost(L"bar.jp", false)); // 3 + EXPECT_EQ(0, GetRegistryLengthFromHost(L"baz.bar.jp", false)); // 3 4 + EXPECT_EQ(12, GetRegistryLengthFromHost(L"a.b.baz.bar.jp", false)); // 4 + EXPECT_EQ(6, GetRegistryLengthFromHost(L"foo.bar.jp", false)); // 3 5 6 + EXPECT_EQ(6, GetRegistryLengthFromHost(L"baz.pref.bar.jp", false)); // 7 + EXPECT_EQ(11, GetRegistryLengthFromHost(L"a.b.bar.baz.com", false)); // 8 + EXPECT_EQ(3, GetRegistryLengthFromHost(L"a.d.c", false)); // 9 + EXPECT_EQ(3, GetRegistryLengthFromHost(L".a.d.c", false)); // 9 + EXPECT_EQ(3, GetRegistryLengthFromHost(L"..a.d.c", false)); // 9 + EXPECT_EQ(1, GetRegistryLengthFromHost(L"a.b.c", false)); // 9 10 + EXPECT_EQ(0, GetRegistryLengthFromHost(L"baz.com", false)); // none + EXPECT_EQ(0, GetRegistryLengthFromHost(L"baz.com.", false)); // none + EXPECT_EQ(3, GetRegistryLengthFromHost(L"baz.com", true)); // none + EXPECT_EQ(4, GetRegistryLengthFromHost(L"baz.com.", true)); // none + + EXPECT_EQ(std::string::npos, GetRegistryLengthFromHost(L"", false)); + EXPECT_EQ(0, GetRegistryLengthFromHost(L"foo.com..", false)); + EXPECT_EQ(0, GetRegistryLengthFromHost(L"..", false)); + EXPECT_EQ(0, GetRegistryLengthFromHost(L"192.168.0.1", false)); + EXPECT_EQ(0, GetRegistryLengthFromHost(L"localhost", false)); + EXPECT_EQ(0, GetRegistryLengthFromHost(L"localhost", true)); + EXPECT_EQ(0, GetRegistryLengthFromHost(L"localhost.", false)); + EXPECT_EQ(0, GetRegistryLengthFromHost(L"localhost.", true)); +} + +TEST_F(RegistryControlledDomainTest, TestSameDomainOrHost) { + SetTestData("jp\nbar.jp"); + + EXPECT_EQ(true, CompareDomains("http://a.b.bar.jp/file.html", + "http://a.b.bar.jp/file.html")); // b.bar.jp + EXPECT_EQ(true, CompareDomains("http://a.b.bar.jp/file.html", + "http://b.b.bar.jp/file.html")); // b.bar.jp + EXPECT_EQ(false, CompareDomains("http://a.foo.jp/file.html", // foo.jp + "http://a.not.jp/file.html")); // not.jp + EXPECT_EQ(false, CompareDomains("http://a.foo.jp/file.html", // foo.jp + "http://a.foo.jp./file.html")); // foo.jp. + EXPECT_EQ(false, CompareDomains("http://a.com/file.html", // a.com + "http://b.com/file.html")); // b.com + EXPECT_EQ(true, CompareDomains("http://a.x.com/file.html", + "http://b.x.com/file.html")); // x.com + EXPECT_EQ(true, CompareDomains("http://a.x.com/file.html", + "http://.x.com/file.html")); // x.com + EXPECT_EQ(true, CompareDomains("http://a.x.com/file.html", + "http://..b.x.com/file.html")); // x.com + EXPECT_EQ(true, CompareDomains("http://intranet/file.html", + "http://intranet/file.html")); // intranet + EXPECT_EQ(true, CompareDomains("http://127.0.0.1/file.html", + "http://127.0.0.1/file.html")); // 127.0.0.1 + EXPECT_EQ(false, CompareDomains("http://192.168.0.1/file.html", // 192.168.0.1 + "http://127.0.0.1/file.html")); // 127.0.0.1 + EXPECT_EQ(false, CompareDomains("file:///C:/file.html", + "file:///C:/file.html")); // no host +} + +TEST_F(RegistryControlledDomainTest, TestDefaultData) { + // Note that no data is set: we're using the default rules. + EXPECT_EQ(3, GetRegistryLengthFromURL("http://google.com", false)); + EXPECT_EQ(3, GetRegistryLengthFromURL("http://stanford.edu", false)); + EXPECT_EQ(3, GetRegistryLengthFromURL("http://ustreas.gov", false)); + EXPECT_EQ(3, GetRegistryLengthFromURL("http://icann.net", false)); + EXPECT_EQ(3, GetRegistryLengthFromURL("http://ferretcentral.org", false)); + EXPECT_EQ(0, GetRegistryLengthFromURL("http://nowhere.foo", false)); + EXPECT_EQ(3, GetRegistryLengthFromURL("http://nowhere.foo", true)); +} diff --git a/net/base/socket.h b/net/base/socket.h new file mode 100644 index 0000000..11f0906e --- /dev/null +++ b/net/base/socket.h @@ -0,0 +1,61 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_SOCKET_H_ +#define NET_BASE_SOCKET_H_ + +#include "net/base/completion_callback.h" + +namespace net { + +// Represents a read/write socket. +class Socket { + public: + virtual ~Socket() {} + + // Read data, up to buf_len bytes, from the socket. The number of bytes read + // is returned, or an error is returned upon failure. Zero is returned to + // indicate end-of-file. ERR_IO_PENDING is returned if the operation could + // not be completed synchronously, in which case the result will be passed to + // the callback when available. + virtual int Read(char* buf, int buf_len, + CompletionCallback* callback) = 0; + + // Writes data, up to buf_len bytes, to the socket. Note: only part of the + // data may be written! The number of bytes written is returned, or an error + // is returned upon failure. ERR_IO_PENDING is returned if the operation + // could not be completed synchronously, in which case the result will be + // passed to the callback when available. + virtual int Write(const char* buf, int buf_len, + CompletionCallback* callback) = 0; +}; + +} // namespace net + +#endif // NET_BASE_SOCKET_H_ diff --git a/net/base/ssl_client_socket.cc b/net/base/ssl_client_socket.cc new file mode 100644 index 0000000..711a114 --- /dev/null +++ b/net/base/ssl_client_socket.cc @@ -0,0 +1,502 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/ssl_client_socket.h" + +#include <schnlsp.h> + +#include "base/singleton.h" +#include "base/string_util.h" +#include "net/base/net_errors.h" + +namespace net { + +//----------------------------------------------------------------------------- + +class SChannelLib { + public: + SecurityFunctionTable funcs; + + SChannelLib() { + memset(&funcs, 0, sizeof(funcs)); + lib_ = LoadLibrary(L"SCHANNEL.DLL"); + if (lib_) { + INIT_SECURITY_INTERFACE init_security_interface = + reinterpret_cast<INIT_SECURITY_INTERFACE>( + GetProcAddress(lib_, "InitSecurityInterfaceW")); + if (init_security_interface) { + PSecurityFunctionTable funcs_ptr = init_security_interface(); + if (funcs_ptr) + memcpy(&funcs, funcs_ptr, sizeof(funcs)); + } + } + } + + ~SChannelLib() { + FreeLibrary(lib_); + } + + private: + HMODULE lib_; +}; + +static inline SecurityFunctionTable& SChannel() { + return Singleton<SChannelLib>()->funcs; +} + +//----------------------------------------------------------------------------- + +static const int kRecvBufferSize = 0x10000; + +SSLClientSocket::SSLClientSocket(ClientSocket* transport_socket, + const std::string& hostname) +#pragma warning(suppress: 4355) + : io_callback_(this, &SSLClientSocket::OnIOComplete), + transport_(transport_socket), + hostname_(hostname), + user_callback_(NULL), + user_buf_(NULL), + user_buf_len_(0), + next_state_(STATE_NONE), + bytes_sent_(0), + bytes_received_(0), + completed_handshake_(false) { + memset(&stream_sizes_, 0, sizeof(stream_sizes_)); + memset(&send_buffer_, 0, sizeof(send_buffer_)); + memset(&creds_, 0, sizeof(creds_)); + memset(&ctxt_, 0, sizeof(ctxt_)); +} + +SSLClientSocket::~SSLClientSocket() { + Disconnect(); +} + +int SSLClientSocket::Connect(CompletionCallback* callback) { + DCHECK(transport_.get()); + DCHECK(next_state_ == STATE_NONE); + DCHECK(!user_callback_); + + next_state_ = STATE_CONNECT; + int rv = DoLoop(OK); + if (rv == ERR_IO_PENDING) + user_callback_ = callback; + return rv; +} + +int SSLClientSocket::ReconnectIgnoringLastError(CompletionCallback* callback) { + // TODO(darin): implement me! + return ERR_FAILED; +} + +void SSLClientSocket::Disconnect() { + transport_->Disconnect(); + + if (send_buffer_.pvBuffer) { + SChannel().FreeContextBuffer(send_buffer_.pvBuffer); + memset(&send_buffer_, 0, sizeof(send_buffer_)); + } + if (creds_.dwLower || creds_.dwUpper) { + SChannel().FreeCredentialsHandle(&creds_); + memset(&creds_, 0, sizeof(creds_)); + } + if (ctxt_.dwLower || ctxt_.dwUpper) { + SChannel().DeleteSecurityContext(&ctxt_); + memset(&ctxt_, 0, sizeof(ctxt_)); + } +} + +bool SSLClientSocket::IsConnected() const { + return completed_handshake_ && transport_->IsConnected(); +} + +int SSLClientSocket::Read(char* buf, int buf_len, + CompletionCallback* callback) { + DCHECK(completed_handshake_); + DCHECK(next_state_ == STATE_NONE); + DCHECK(!user_callback_); + + user_buf_ = buf; + user_buf_len_ = buf_len; + + next_state_ = STATE_PAYLOAD_READ; + int rv = DoLoop(OK); + if (rv == ERR_IO_PENDING) + user_callback_ = callback; + return rv; +} + +int SSLClientSocket::Write(const char* buf, int buf_len, + CompletionCallback* callback) { + DCHECK(completed_handshake_); + DCHECK(next_state_ == STATE_NONE); + DCHECK(!user_callback_); + + user_buf_ = const_cast<char*>(buf); + user_buf_len_ = buf_len; + + next_state_ = STATE_PAYLOAD_WRITE; + int rv = DoLoop(OK); + if (rv == ERR_IO_PENDING) + user_callback_ = callback; + return rv; +} + +void SSLClientSocket::DoCallback(int rv) { + DCHECK(rv != ERR_IO_PENDING); + DCHECK(user_callback_); + + // since Run may result in Read being called, clear callback_ up front. + CompletionCallback* c = user_callback_; + user_callback_ = NULL; + c->Run(rv); +} + +void SSLClientSocket::OnIOComplete(int result) { + int rv = DoLoop(result); + if (rv != ERR_IO_PENDING) + DoCallback(rv); +} + +int SSLClientSocket::DoLoop(int last_io_result) { + DCHECK(next_state_ != STATE_NONE); + int rv = last_io_result; + do { + State state = next_state_; + next_state_ = STATE_NONE; + switch (state) { + case STATE_CONNECT: + rv = DoConnect(); + break; + case STATE_CONNECT_COMPLETE: + rv = DoConnectComplete(rv); + break; + case STATE_HANDSHAKE_READ: + rv = DoHandshakeRead(); + break; + case STATE_HANDSHAKE_READ_COMPLETE: + rv = DoHandshakeReadComplete(rv); + break; + case STATE_HANDSHAKE_WRITE: + rv = DoHandshakeWrite(); + break; + case STATE_HANDSHAKE_WRITE_COMPLETE: + rv = DoHandshakeWriteComplete(rv); + break; + case STATE_PAYLOAD_READ: + rv = DoPayloadRead(); + break; + case STATE_PAYLOAD_READ_COMPLETE: + rv = DoPayloadReadComplete(rv); + break; + case STATE_PAYLOAD_WRITE: + rv = DoPayloadWrite(); + break; + case STATE_PAYLOAD_WRITE_COMPLETE: + rv = DoPayloadWriteComplete(rv); + break; + default: + rv = ERR_FAILED; + NOTREACHED() << "unexpected state"; + } + } while (rv != ERR_IO_PENDING && next_state_ != STATE_NONE); + return rv; +} + +int SSLClientSocket::DoConnect() { + next_state_ = STATE_CONNECT_COMPLETE; + return transport_->Connect(&io_callback_); +} + +int SSLClientSocket::DoConnectComplete(int result) { + if (result < 0) + return result; + + memset(&ctxt_, 0, sizeof(ctxt_)); + memset(&creds_, 0, sizeof(creds_)); + + SCHANNEL_CRED schannel_cred = {0}; + schannel_cred.dwVersion = SCHANNEL_CRED_VERSION; + schannel_cred.dwFlags |= SCH_CRED_NO_DEFAULT_CREDS | + SCH_CRED_NO_SYSTEM_MAPPER | + SCH_CRED_REVOCATION_CHECK_CHAIN; + TimeStamp expiry; + SECURITY_STATUS status; + + status = SChannel().AcquireCredentialsHandle( + NULL, + UNISP_NAME, + SECPKG_CRED_OUTBOUND, + NULL, + &schannel_cred, + NULL, + NULL, + &creds_, + &expiry); + if (status != SEC_E_OK) { + DLOG(ERROR) << "AcquireCredentialsHandle failed: " << status; + return ERR_FAILED; + } + + SecBufferDesc buffer_desc; + DWORD out_flags; + DWORD flags = ISC_REQ_SEQUENCE_DETECT | + ISC_REQ_REPLAY_DETECT | + ISC_REQ_CONFIDENTIALITY | + ISC_RET_EXTENDED_ERROR | + ISC_REQ_ALLOCATE_MEMORY | + ISC_REQ_STREAM; + + send_buffer_.pvBuffer = NULL; + send_buffer_.BufferType = SECBUFFER_TOKEN; + send_buffer_.cbBuffer = 0; + + buffer_desc.cBuffers = 1; + buffer_desc.pBuffers = &send_buffer_; + buffer_desc.ulVersion = SECBUFFER_VERSION; + + status = SChannel().InitializeSecurityContext( + &creds_, + NULL, + const_cast<wchar_t*>(ASCIIToWide(hostname_).c_str()), + flags, + 0, + SECURITY_NATIVE_DREP, + NULL, + 0, + &ctxt_, + &buffer_desc, + &out_flags, + &expiry); + if (status != SEC_I_CONTINUE_NEEDED) { + DLOG(ERROR) << "InitializeSecurityContext failed: " << status; + return ERR_FAILED; + } + + next_state_ = STATE_HANDSHAKE_WRITE; + return OK; +} + +int SSLClientSocket::DoHandshakeRead() { + next_state_ = STATE_HANDSHAKE_READ_COMPLETE; + + if (!recv_buffer_.get()) + recv_buffer_.reset(new char[kRecvBufferSize]); + + char* buf = recv_buffer_.get() + bytes_received_; + int buf_len = kRecvBufferSize - bytes_received_; + + if (buf_len <= 0) { + NOTREACHED() << "Receive buffer is too small!"; + return ERR_FAILED; + } + + return transport_->Read(buf, buf_len, &io_callback_); +} + +int SSLClientSocket::DoHandshakeReadComplete(int result) { + if (result < 0) + return result; + if (result == 0) + return ERR_FAILED; // Incomplete response :( + + bytes_received_ += result; + + // Process the contents of recv_buffer_. + SECURITY_STATUS status; + TimeStamp expiry; + DWORD out_flags; + + DWORD flags = ISC_REQ_SEQUENCE_DETECT | + ISC_REQ_REPLAY_DETECT | + ISC_REQ_CONFIDENTIALITY | + ISC_RET_EXTENDED_ERROR | + ISC_REQ_ALLOCATE_MEMORY | + ISC_REQ_STREAM; + + SecBufferDesc in_buffer_desc, out_buffer_desc; + SecBuffer in_buffers[2]; + + in_buffer_desc.cBuffers = 2; + in_buffer_desc.pBuffers = in_buffers; + in_buffer_desc.ulVersion = SECBUFFER_VERSION; + + in_buffers[0].pvBuffer = &recv_buffer_[0]; + in_buffers[0].cbBuffer = bytes_received_; + in_buffers[0].BufferType = SECBUFFER_TOKEN; + + in_buffers[1].pvBuffer = NULL; + in_buffers[1].cbBuffer = 0; + in_buffers[1].BufferType = SECBUFFER_EMPTY; + + out_buffer_desc.cBuffers = 1; + out_buffer_desc.pBuffers = &send_buffer_; + out_buffer_desc.ulVersion = SECBUFFER_VERSION; + + send_buffer_.pvBuffer = NULL; + send_buffer_.BufferType = SECBUFFER_TOKEN; + send_buffer_.cbBuffer = 0; + + status = SChannel().InitializeSecurityContext( + &creds_, + &ctxt_, + NULL, + flags, + 0, + SECURITY_NATIVE_DREP, + &in_buffer_desc, + 0, + NULL, + &out_buffer_desc, + &out_flags, + &expiry); + + if (status == SEC_E_INCOMPLETE_MESSAGE) { + next_state_ = STATE_HANDSHAKE_READ; + return OK; + } + + // OK, all of the received data was consumed. + bytes_received_ = 0; + + if (send_buffer_.cbBuffer != 0 && + (status == SEC_E_OK || + status == SEC_I_CONTINUE_NEEDED || + FAILED(status) && (out_flags & ISC_RET_EXTENDED_ERROR))) { + next_state_ = STATE_HANDSHAKE_WRITE; + return OK; + } + + if (status == SEC_E_OK) { + if (in_buffers[1].BufferType == SECBUFFER_EXTRA) { + // TODO(darin) need to save this data for later. + NOTREACHED() << "should not occur for HTTPS traffic"; + } + return DidCompleteHandshake(); + } + + if (FAILED(status)) + return ERR_FAILED; + + next_state_ = STATE_HANDSHAKE_READ; + return OK; +} + +int SSLClientSocket::DoHandshakeWrite() { + next_state_ = STATE_HANDSHAKE_WRITE_COMPLETE; + + // We should have something to send. + DCHECK(send_buffer_.pvBuffer); + DCHECK(send_buffer_.cbBuffer > 0); + + const char* buf = static_cast<char*>(send_buffer_.pvBuffer) + bytes_sent_; + int buf_len = send_buffer_.cbBuffer - bytes_sent_; + + return transport_->Write(buf, buf_len, &io_callback_); +} + +int SSLClientSocket::DoHandshakeWriteComplete(int result) { + if (result < 0) + return result; + + DCHECK(result != 0); + + // TODO(darin): worry about overflow? + bytes_sent_ += result; + DCHECK(bytes_sent_ <= static_cast<int>(send_buffer_.cbBuffer)); + + if (bytes_sent_ == static_cast<int>(send_buffer_.cbBuffer)) { + SChannel().FreeContextBuffer(send_buffer_.pvBuffer); + memset(&send_buffer_, 0, sizeof(send_buffer_)); + bytes_sent_ = 0; + next_state_ = STATE_HANDSHAKE_READ; + } else { + // Send the remaining bytes. + next_state_ = STATE_HANDSHAKE_WRITE; + } + + return OK; +} + +int SSLClientSocket::DoPayloadRead() { + next_state_ = STATE_PAYLOAD_READ_COMPLETE; + + return ERR_FAILED; +} + +int SSLClientSocket::DoPayloadReadComplete(int result) { + return ERR_FAILED; +} + +int SSLClientSocket::DoPayloadWrite() { + DCHECK(user_buf_); + DCHECK(user_buf_len_ > 0); + + next_state_ = STATE_PAYLOAD_WRITE_COMPLETE; + + size_t message_len = std::min( + stream_sizes_.cbMaximumMessage, static_cast<ULONG>(user_buf_len_)); + size_t alloc_len = + message_len + stream_sizes_.cbHeader + stream_sizes_.cbTrailer; + + /* + SecBuffer buffers[4]; + buffers[0]. + + SecBufferDesc buffer_desc; + buffer_desc.cBuffers = 4; + buffer_desc.pBuffers = //XXX + buffer_desc.ulVersion = SECBUFFER_VERSION; + + SECURITY_STATUS status = SChannel().EncryptMessage( + &ctxt_, 0, &buffer_desc, 0); + */ + + return ERR_FAILED; +} + +int SSLClientSocket::DoPayloadWriteComplete(int result) { + return ERR_FAILED; +} + +int SSLClientSocket::DidCompleteHandshake() { + SECURITY_STATUS status = SChannel().QueryContextAttributes( + &ctxt_, SECPKG_ATTR_STREAM_SIZES, &stream_sizes_); + if (status != SEC_E_OK) { + DLOG(ERROR) << "QueryContextAttributes failed: " << status; + return ERR_FAILED; + } + + // We expect not to have to worry about message padding. + DCHECK(stream_sizes_.cbBlockSize == 1); + + completed_handshake_ = true; + return OK; +} + +} // namespace net diff --git a/net/base/ssl_client_socket.h b/net/base/ssl_client_socket.h new file mode 100644 index 0000000..599b488 --- /dev/null +++ b/net/base/ssl_client_socket.h @@ -0,0 +1,125 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_SSL_CLIENT_SOCKET_H_ +#define NET_BASE_SSL_CLIENT_SOCKET_H_ + +#define SECURITY_WIN32 // Needs to be defined before including security.h + +#include <windows.h> +#include <security.h> + +#include "base/scoped_ptr.h" +#include "net/base/client_socket.h" +#include "net/base/completion_callback.h" + +namespace net { + +// NOTE: The SSL handshake occurs within the Connect method after a TCP +// connection is established. If a SSL error occurs during the handshake, +// Connect will fail. The consumer may choose to ignore certain SSL errors, +// such as a name mismatch, by calling ReconnectIgnoringLastError. +// +class SSLClientSocket : public ClientSocket { + public: + // Takes ownership of the transport_socket, which may already be connected. + // The given hostname will be compared with the name(s) in the server's + // certificate during the SSL handshake. + SSLClientSocket(ClientSocket* transport_socket, const std::string& hostname); + ~SSLClientSocket(); + + // ClientSocket methods: + virtual int Connect(CompletionCallback* callback); + virtual int ReconnectIgnoringLastError(CompletionCallback* callback); + virtual void Disconnect(); + virtual bool IsConnected() const; + + // Socket methods: + virtual int Read(char* buf, int buf_len, CompletionCallback* callback); + virtual int Write(const char* buf, int buf_len, CompletionCallback* callback); + + private: + void DoCallback(int result); + void OnIOComplete(int result); + + int DoLoop(int last_io_result); + int DoConnect(); + int DoConnectComplete(int result); + int DoHandshakeRead(); + int DoHandshakeReadComplete(int result); + int DoHandshakeWrite(); + int DoHandshakeWriteComplete(int result); + int DoPayloadRead(); + int DoPayloadReadComplete(int result); + int DoPayloadWrite(); + int DoPayloadWriteComplete(int result); + + int DidCompleteHandshake(); + + CompletionCallbackImpl<SSLClientSocket> io_callback_; + scoped_ptr<ClientSocket> transport_; + std::string hostname_; + + CompletionCallback* user_callback_; + + // Used by both Read and Write functions. + char* user_buf_; + int user_buf_len_; + + enum State { + STATE_NONE, + STATE_CONNECT, + STATE_CONNECT_COMPLETE, + STATE_HANDSHAKE_READ, + STATE_HANDSHAKE_READ_COMPLETE, + STATE_HANDSHAKE_WRITE, + STATE_HANDSHAKE_WRITE_COMPLETE, + STATE_PAYLOAD_WRITE, + STATE_PAYLOAD_WRITE_COMPLETE, + STATE_PAYLOAD_READ, + STATE_PAYLOAD_READ_COMPLETE, + }; + State next_state_; + + SecPkgContext_StreamSizes stream_sizes_; + + CredHandle creds_; + CtxtHandle ctxt_; + SecBuffer send_buffer_; + int bytes_sent_; + + scoped_array<char> recv_buffer_; + int bytes_received_; + + bool completed_handshake_; +}; + +} // namespace net + +#endif // NET_BASE_SSL_CLIENT_SOCKET_H_ diff --git a/net/base/ssl_client_socket_unittest.cc b/net/base/ssl_client_socket_unittest.cc new file mode 100644 index 0000000..a465563 --- /dev/null +++ b/net/base/ssl_client_socket_unittest.cc @@ -0,0 +1,190 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/address_list.h" +#include "net/base/net_errors.h" +#include "net/base/host_resolver.h" +#include "net/base/ssl_client_socket.h" +#include "net/base/tcp_client_socket.h" +#include "net/base/test_completion_callback.h" +#include "testing/gtest/include/gtest/gtest.h" + +//----------------------------------------------------------------------------- + +namespace { + +class SSLClientSocketTest : public testing::Test { +}; + +} // namespace + +//----------------------------------------------------------------------------- + +TEST_F(SSLClientSocketTest, Connect) { + net::AddressList addr; + net::HostResolver resolver; + TestCompletionCallback callback; + + std::string hostname = "www.verisign.com"; + int rv = resolver.Resolve(hostname, 443, &addr, NULL); + EXPECT_EQ(net::OK, rv); + + net::SSLClientSocket sock(new net::TCPClientSocket(addr), hostname); + + EXPECT_FALSE(sock.IsConnected()); + + rv = sock.Connect(&callback); + ASSERT_EQ(net::ERR_IO_PENDING, rv); + + rv = callback.WaitForResult(); + EXPECT_EQ(net::OK, rv); + + EXPECT_TRUE(sock.IsConnected()); + + sock.Disconnect(); + EXPECT_FALSE(sock.IsConnected()); +} + +#if 0 +TEST_F(SSLClientSocketTest, Read) { + net::AddressList addr; + net::HostResolver resolver; + TestCompletionCallback callback; + + std::string hostname = "www.google.com"; + int rv = resolver.Resolve(hostname, 443, &addr, &callback); + EXPECT_EQ(rv, net::ERR_IO_PENDING); + + rv = callback.WaitForResult(); + EXPECT_EQ(rv, net::OK); + + net::SSLClientSocket sock(new net::TCPClientSocket(addr), hostname); + + rv = sock.Connect(&callback); + ASSERT_EQ(rv, net::ERR_IO_PENDING); + + rv = callback.WaitForResult(); + EXPECT_EQ(rv, net::OK); + + const char request_text[] = "GET / HTTP/1.0\r\n\r\n"; + rv = sock.Write(request_text, arraysize(request_text)-1, &callback); + EXPECT_TRUE(rv >= 0 || rv == net::ERR_IO_PENDING); + + if (rv == net::ERR_IO_PENDING) { + rv = callback.WaitForResult(); + EXPECT_EQ(rv, arraysize(request_text)-1); + } + + char buf[4096]; + for (;;) { + rv = sock.Read(buf, sizeof(buf), &callback); + EXPECT_TRUE(rv >= 0 || rv == net::ERR_IO_PENDING); + + if (rv == net::ERR_IO_PENDING) + rv = callback.WaitForResult(); + + if (rv == 0) + break; + } +} + +TEST_F(TCPClientSocketTest, Read_SmallChunks) { + net::AddressList addr; + net::HostResolver resolver; + TestCompletionCallback callback; + + int rv = resolver.Resolve("www.google.com", 80, &addr, NULL); + EXPECT_EQ(rv, net::OK); + + net::TCPClientSocket sock(addr); + + rv = sock.Connect(&callback); + ASSERT_EQ(rv, net::ERR_IO_PENDING); + + rv = callback.WaitForResult(); + EXPECT_EQ(rv, net::OK); + + const char request_text[] = "GET / HTTP/1.0\r\n\r\n"; + rv = sock.Write(request_text, arraysize(request_text)-1, &callback); + EXPECT_TRUE(rv >= 0 || rv == net::ERR_IO_PENDING); + + if (rv == net::ERR_IO_PENDING) { + rv = callback.WaitForResult(); + EXPECT_EQ(rv, arraysize(request_text)-1); + } + + char buf[1]; + for (;;) { + rv = sock.Read(buf, sizeof(buf), &callback); + EXPECT_TRUE(rv >= 0 || rv == net::ERR_IO_PENDING); + + if (rv == net::ERR_IO_PENDING) + rv = callback.WaitForResult(); + + if (rv == 0) + break; + } +} + +TEST_F(TCPClientSocketTest, Read_Interrupted) { + net::AddressList addr; + net::HostResolver resolver; + TestCompletionCallback callback; + + int rv = resolver.Resolve("www.google.com", 80, &addr, NULL); + EXPECT_EQ(rv, net::OK); + + net::TCPClientSocket sock(addr); + + rv = sock.Connect(&callback); + ASSERT_EQ(rv, net::ERR_IO_PENDING); + + rv = callback.WaitForResult(); + EXPECT_EQ(rv, net::OK); + + const char request_text[] = "GET / HTTP/1.0\r\n\r\n"; + rv = sock.Write(request_text, arraysize(request_text)-1, &callback); + EXPECT_TRUE(rv >= 0 || rv == net::ERR_IO_PENDING); + + if (rv == net::ERR_IO_PENDING) { + rv = callback.WaitForResult(); + EXPECT_EQ(rv, arraysize(request_text)-1); + } + + // Do a partial read and then exit. This test should not crash! + char buf[512]; + rv = sock.Read(buf, sizeof(buf), &callback); + EXPECT_TRUE(rv >= 0 || rv == net::ERR_IO_PENDING); + + if (rv == net::ERR_IO_PENDING) + rv = callback.WaitForResult(); + + EXPECT_TRUE(rv != 0); +} +#endif diff --git a/net/base/ssl_config_service.cc b/net/base/ssl_config_service.cc new file mode 100644 index 0000000..14dad5f --- /dev/null +++ b/net/base/ssl_config_service.cc @@ -0,0 +1,129 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/ssl_config_service.h" + +#include "base/registry.h" + +namespace net { + +static const int kConfigUpdateInterval = 10; // seconds + +static const wchar_t kInternetSettingsSubKeyName[] = + L"Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings"; + +static const wchar_t kRevocationValueName[] = L"CertificateRevocation"; + +static const wchar_t kProtocolsValueName[] = L"SecureProtocols"; + +// In SecureProtocols, each SSL version is represented by a bit: +// SSL 2.0: 0x08 +// SSL 3.0: 0x20 +// TLS 1.0: 0x80 +// The bits are OR'ed to form the DWORD value. So 0xa0 means SSL 3.0 and +// TLS 1.0. +enum { + SSL2 = 0x08, + SSL3 = 0x20, + TLS1 = 0x80 +}; + +// If CertificateRevocation or SecureProtocols is missing, IE uses a default +// value. Unfortunately the default is IE version specific. We use WinHTTP's +// default. +enum { + REVOCATION_DEFAULT = 0, + PROTOCOLS_DEFAULT = SSL3 | TLS1 +}; + +SSLConfigService::SSLConfigService() { + UpdateConfig(TimeTicks::Now()); +} + +SSLConfigService::SSLConfigService(TimeTicks now) { + UpdateConfig(now); +} + +void SSLConfigService::GetSSLConfigAt(SSLConfig* config, TimeTicks now) { + if (now - config_time_ > TimeDelta::FromSeconds(kConfigUpdateInterval)) + UpdateConfig(now); + *config = config_info_; +} + +// static +bool SSLConfigService::GetSSLConfigNow(SSLConfig* config) { + RegKey internet_settings; + if (!internet_settings.Open(HKEY_CURRENT_USER, kInternetSettingsSubKeyName, + KEY_READ)) + return false; + + DWORD revocation; + if (!internet_settings.ReadValueDW(kRevocationValueName, &revocation)) + revocation = REVOCATION_DEFAULT; + + DWORD protocols; + if (!internet_settings.ReadValueDW(kProtocolsValueName, &protocols)) + protocols = PROTOCOLS_DEFAULT; + + config->rev_checking_enabled = (revocation != 0); + config->ssl2_enabled = ((protocols & SSL2) != 0); + config->ssl3_enabled = ((protocols & SSL3) != 0); + config->tls1_enabled = ((protocols & TLS1) != 0); + + return true; +} + +// static +void SSLConfigService::SetRevCheckingEnabled(bool enabled) { + DWORD value = enabled; + RegKey internet_settings(HKEY_CURRENT_USER, kInternetSettingsSubKeyName, + KEY_WRITE); + internet_settings.WriteValue(kRevocationValueName, value); +} + +// static +void SSLConfigService::SetSSL2Enabled(bool enabled) { + RegKey internet_settings(HKEY_CURRENT_USER, kInternetSettingsSubKeyName, + KEY_READ | KEY_WRITE); + DWORD value; + if (!internet_settings.ReadValueDW(kProtocolsValueName, &value)) + value = PROTOCOLS_DEFAULT; + if (enabled) + value |= SSL2; + else + value &= ~SSL2; + internet_settings.WriteValue(kProtocolsValueName, value); +} + +void SSLConfigService::UpdateConfig(TimeTicks now) { + GetSSLConfigNow(&config_info_); + config_time_ = now; +} + +} // namespace net diff --git a/net/base/ssl_config_service.h b/net/base/ssl_config_service.h new file mode 100644 index 0000000..e563f2d --- /dev/null +++ b/net/base/ssl_config_service.h @@ -0,0 +1,96 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_SSL_CONFIG_SERVICE_H__ +#define NET_BASE_SSL_CONFIG_SERVICE_H__ + +#include "base/time.h" + +namespace net { + +// A collection of SSL-related configuration settings. +struct SSLConfig { + // Default to no revocation checking. + // Default to SSL 2.0 off, SSL 3.0 on, and TLS 1.0 on. + SSLConfig() + : rev_checking_enabled(false), ssl2_enabled(false), + ssl3_enabled(true), tls1_enabled(true) { + } + + bool rev_checking_enabled; // True if server certificate revocation + // checking is enabled. + bool ssl2_enabled; // True if SSL 2.0 is enabled. + bool ssl3_enabled; // True if SSL 3.0 is enabled. + bool tls1_enabled; // True if TLS 1.0 is enabled. +}; + +// This class is responsible for getting and setting the SSL configuration. +// +// We think the SSL configuration settings should apply to all applications +// used by the user. We consider IE's Internet Options as the de facto +// system-wide network configuration settings, so we just use the values +// from IE's Internet Settings registry key. +class SSLConfigService { + public: + SSLConfigService(); + explicit SSLConfigService(TimeTicks now); // Used for testing. + ~SSLConfigService() { } + + // Get the current SSL configuration settings. Can be called on any + // thread. + static bool GetSSLConfigNow(SSLConfig* config); + + // Setters. Can be called on any thread. + static void SetRevCheckingEnabled(bool enabled); + static void SetSSL2Enabled(bool enabled); + + // Get the (cached) SSL configuration settings that are fresh within 10 + // seconds. This is cheaper than GetSSLConfigNow and is suitable when + // we don't need the absolutely current configuration settings. This + // method is not thread-safe, so it must be called on the same thread. + void GetSSLConfig(SSLConfig* config) { + GetSSLConfigAt(config, TimeTicks::Now()); + } + + // Used for testing. + void GetSSLConfigAt(SSLConfig* config, TimeTicks now); + + private: + void UpdateConfig(TimeTicks now); + + // We store the IE SSL config and the time that we fetched it. + SSLConfig config_info_; + TimeTicks config_time_; + + DISALLOW_EVIL_CONSTRUCTORS(SSLConfigService); +}; + +} // namespace net + +#endif // NET_BASE_SSL_CONFIG_SERVICE_H__ diff --git a/net/base/ssl_config_service_unittest.cc b/net/base/ssl_config_service_unittest.cc new file mode 100644 index 0000000..d16085b --- /dev/null +++ b/net/base/ssl_config_service_unittest.cc @@ -0,0 +1,108 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/ssl_config_service.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +class SSLConfigServiceTest : public testing::Test { +}; + +} // namespace + +TEST(SSLConfigServiceTest, GetNowTest) { + // Verify that the constructor sets the correct default values. + net::SSLConfig config; + EXPECT_EQ(false, config.rev_checking_enabled); + EXPECT_EQ(false, config.ssl2_enabled); + EXPECT_EQ(true, config.ssl3_enabled); + EXPECT_EQ(true, config.tls1_enabled); + + bool rv = net::SSLConfigService::GetSSLConfigNow(&config); + EXPECT_TRUE(rv); +} + +TEST(SSLConfigServiceTest, SetTest) { + // Save the current settings so we can restore them after the tests. + net::SSLConfig config_save; + bool rv = net::SSLConfigService::GetSSLConfigNow(&config_save); + EXPECT_TRUE(rv); + + net::SSLConfig config; + + // Test SetRevCheckingEnabled. + net::SSLConfigService::SetRevCheckingEnabled(true); + rv = net::SSLConfigService::GetSSLConfigNow(&config); + EXPECT_TRUE(rv); + EXPECT_TRUE(config.rev_checking_enabled); + + net::SSLConfigService::SetRevCheckingEnabled(false); + rv = net::SSLConfigService::GetSSLConfigNow(&config); + EXPECT_TRUE(rv); + EXPECT_FALSE(config.rev_checking_enabled); + + net::SSLConfigService::SetRevCheckingEnabled( + config_save.rev_checking_enabled); + + // Test SetSSL2Enabled. + net::SSLConfigService::SetSSL2Enabled(true); + rv = net::SSLConfigService::GetSSLConfigNow(&config); + EXPECT_TRUE(rv); + EXPECT_TRUE(config.ssl2_enabled); + + net::SSLConfigService::SetSSL2Enabled(false); + rv = net::SSLConfigService::GetSSLConfigNow(&config); + EXPECT_TRUE(rv); + EXPECT_FALSE(config.ssl2_enabled); + + net::SSLConfigService::SetSSL2Enabled(config_save.ssl2_enabled); +} + +TEST(SSLConfigServiceTest, GetTest) { + TimeTicks now = TimeTicks::Now(); + TimeTicks now_1 = now + TimeDelta::FromSeconds(1); + TimeTicks now_11 = now + TimeDelta::FromSeconds(11); + + net::SSLConfig config, config_1, config_11; + net::SSLConfigService config_service(now); + config_service.GetSSLConfigAt(&config, now); + + // Flip rev_checking_enabled. + net::SSLConfigService::SetRevCheckingEnabled(!config.rev_checking_enabled); + + config_service.GetSSLConfigAt(&config_1, now_1); + EXPECT_EQ(config.rev_checking_enabled, config_1.rev_checking_enabled); + + config_service.GetSSLConfigAt(&config_11, now_11); + EXPECT_EQ(!config.rev_checking_enabled, config_11.rev_checking_enabled); + + // Restore the original value. + net::SSLConfigService::SetRevCheckingEnabled(config.rev_checking_enabled); +} diff --git a/net/base/ssl_info.h b/net/base/ssl_info.h new file mode 100644 index 0000000..07655eb --- /dev/null +++ b/net/base/ssl_info.h @@ -0,0 +1,102 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_SSL_INFO_H__ +#define NET_BASE_SSL_INFO_H__ + +#include "net/base/cert_status_flags.h" +#include "net/base/net_errors.h" +#include "net/base/x509_certificate.h" + +namespace net { + +// SSL connection info. +// This is really a struct. All members are public. +class SSLInfo { + public: + SSLInfo() : cert_status(0), security_bits(-1) { } + + void Reset() { + cert = NULL; + security_bits = -1; + cert_status = 0; + } + + bool is_valid() const { return cert != NULL; } + + // Adds the specified |error| to the cert status. + void SetCertError(int error) { + int error_flag = 0; + switch (error) { + case ERR_CERT_COMMON_NAME_INVALID: + error_flag = CERT_STATUS_COMMON_NAME_INVALID; + break; + case ERR_CERT_DATE_INVALID: + error_flag = CERT_STATUS_DATE_INVALID; + break; + case ERR_CERT_AUTHORITY_INVALID: + error_flag = CERT_STATUS_AUTHORITY_INVALID; + break; + case ERR_CERT_NO_REVOCATION_MECHANISM: + error_flag = CERT_STATUS_NO_REVOCATION_MECHANISM; + break; + case ERR_CERT_UNABLE_TO_CHECK_REVOCATION: + error_flag = CERT_STATUS_UNABLE_TO_CHECK_REVOCATION; + break; + case ERR_CERT_REVOKED: + error_flag = CERT_STATUS_REVOKED; + break; + case ERR_CERT_CONTAINS_ERRORS: + case ERR_CERT_INVALID: + error_flag = CERT_STATUS_INVALID; + break; + default: + NOTREACHED(); + return; + } + cert_status |= error_flag; + } + + // The SSL certificate. + scoped_refptr<X509Certificate> cert; + + // Bitmask of status info of |cert|, representing, for example, known errors + // and extended validation (EV) status. + // See cert_status_flags.h for values. + int cert_status; + + // The security strength, in bits, of the SSL cipher suite. + // 0 means the connection is not encrypted. + // -1 means the security strength is unknown. + int security_bits; +}; + +} // namespace net + +#endif // NET_BASE_SSL_INFO_H__ diff --git a/net/base/tcp_client_socket.cc b/net/base/tcp_client_socket.cc new file mode 100644 index 0000000..fad4f2f0 --- /dev/null +++ b/net/base/tcp_client_socket.cc @@ -0,0 +1,281 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/tcp_client_socket.h" + +#include "net/base/net_errors.h" +#include "net/base/winsock_init.h" + +namespace net { + +//----------------------------------------------------------------------------- + +static int MapWinsockError(DWORD err) { + // There are numerous Winsock error codes, but these are the ones we thus far + // find interesting. + switch (err) { + case WSAENETDOWN: + return ERR_INTERNET_DISCONNECTED; + case WSAETIMEDOUT: + return ERR_TIMED_OUT; + case WSAECONNRESET: + case WSAENETRESET: + return ERR_CONNECTION_RESET; + case WSAECONNABORTED: + return ERR_CONNECTION_ABORTED; + case WSAECONNREFUSED: + return ERR_CONNECTION_REFUSED; + case WSAEDISCON: + return ERR_CONNECTION_CLOSED; + case WSAEHOSTUNREACH: + case WSAENETUNREACH: + return ERR_ADDRESS_UNREACHABLE; + case WSAEADDRNOTAVAIL: + return ERR_ADDRESS_INVALID; + case ERROR_SUCCESS: + return OK; + default: + return ERR_FAILED; + } +} + +//----------------------------------------------------------------------------- + +TCPClientSocket::TCPClientSocket(const AddressList& addresses) + : socket_(INVALID_SOCKET), + addresses_(addresses), + current_ai_(addresses_.head()), + wait_state_(NOT_WAITING) { + memset(&overlapped_, 0, sizeof(overlapped_)); + EnsureWinsockInit(); +} + +TCPClientSocket::~TCPClientSocket() { + Disconnect(); +} + +int TCPClientSocket::Connect(CompletionCallback* callback) { + // If already connected, then just return OK. + if (socket_ != INVALID_SOCKET) + return OK; + + const struct addrinfo* ai = current_ai_; + DCHECK(ai); + + int rv = CreateSocket(ai); + if (rv != OK) + return rv; + + if (!connect(socket_, ai->ai_addr, static_cast<int>(ai->ai_addrlen))) { + // Connected without waiting! + return OK; + } + + DWORD err = WSAGetLastError(); + if (err != WSAEWOULDBLOCK) { + LOG(ERROR) << "connect failed: " << err; + return MapWinsockError(err); + } + + overlapped_.hEvent = WSACreateEvent(); + WSAEventSelect(socket_, overlapped_.hEvent, FD_CONNECT); + + MessageLoop::current()->WatchObject(overlapped_.hEvent, this); + wait_state_ = WAITING_CONNECT; + callback_ = callback; + return ERR_IO_PENDING; +} + +int TCPClientSocket::ReconnectIgnoringLastError(CompletionCallback* callback) { + // No ignorable errors! + return ERR_FAILED; +} + +void TCPClientSocket::Disconnect() { + if (socket_ == INVALID_SOCKET) + return; + + // Make sure the message loop is not watching this object anymore. + MessageLoop::current()->WatchObject(overlapped_.hEvent, NULL); + + // This cancels any pending IO. + closesocket(socket_); + socket_ = INVALID_SOCKET; + + WSACloseEvent(overlapped_.hEvent); + overlapped_.hEvent = NULL; + + // Reset for next time. + current_ai_ = addresses_.head(); +} + +bool TCPClientSocket::IsConnected() const { + if (socket_ == INVALID_SOCKET || wait_state_ == WAITING_CONNECT) + return false; + + // Check if connection is alive. + char c; + int rv = recv(socket_, &c, 1, MSG_PEEK); + if (rv == 0) + return false; + if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK) + return false; + + return true; +} + +int TCPClientSocket::Read(char* buf, int buf_len, CompletionCallback* callback) { + DCHECK(socket_ != INVALID_SOCKET); + DCHECK(wait_state_ == NOT_WAITING); + DCHECK(!callback_); + + buffer_.len = buf_len; + buffer_.buf = buf; + + DWORD num, flags = 0; + int rv = WSARecv(socket_, &buffer_, 1, &num, &flags, &overlapped_, NULL); + if (rv == 0) + return static_cast<int>(num); + if (rv == SOCKET_ERROR && WSAGetLastError() == WSA_IO_PENDING) { + MessageLoop::current()->WatchObject(overlapped_.hEvent, this); + wait_state_ = WAITING_READ; + callback_ = callback; + return ERR_IO_PENDING; + } + return MapWinsockError(WSAGetLastError()); +} + +int TCPClientSocket::Write(const char* buf, int buf_len, CompletionCallback* callback) { + DCHECK(socket_ != INVALID_SOCKET); + DCHECK(wait_state_ == NOT_WAITING); + DCHECK(!callback_); + + buffer_.len = buf_len; + buffer_.buf = const_cast<char*>(buf); + + DWORD num; + int rv = WSASend(socket_, &buffer_, 1, &num, 0, &overlapped_, NULL); + if (rv == 0) + return static_cast<int>(num); + if (rv == SOCKET_ERROR && WSAGetLastError() == WSA_IO_PENDING) { + MessageLoop::current()->WatchObject(overlapped_.hEvent, this); + wait_state_ = WAITING_WRITE; + callback_ = callback; + return ERR_IO_PENDING; + } + return MapWinsockError(WSAGetLastError()); +} + +int TCPClientSocket::CreateSocket(const struct addrinfo* ai) { + socket_ = WSASocket(ai->ai_family, ai->ai_socktype, ai->ai_protocol, NULL, 0, + WSA_FLAG_OVERLAPPED); + if (socket_ == INVALID_SOCKET) { + LOG(ERROR) << "WSASocket failed: " << WSAGetLastError(); + return ERR_FAILED; + } + + // Configure non-blocking mode. + u_long non_blocking_mode = 1; + if (ioctlsocket(socket_, FIONBIO, &non_blocking_mode)) { + LOG(ERROR) << "ioctlsocket failed: " << WSAGetLastError(); + return ERR_FAILED; + } + + return OK; +} + +void TCPClientSocket::DoCallback(int rv) { + DCHECK(rv != ERR_IO_PENDING); + DCHECK(callback_); + + // since Run may result in Read being called, clear callback_ up front. + CompletionCallback* c = callback_; + callback_ = NULL; + c->Run(rv); +} + +void TCPClientSocket::DidCompleteConnect() { + int result; + + wait_state_ = NOT_WAITING; + + WSANETWORKEVENTS events; + WSAEnumNetworkEvents(socket_, overlapped_.hEvent, &events); + if (events.lNetworkEvents & FD_CONNECT) { + wait_state_ = NOT_WAITING; + DWORD error_code = static_cast<DWORD>(events.iErrorCode[FD_CONNECT_BIT]); + if (current_ai_->ai_next && ( + error_code == WSAEADDRNOTAVAIL || + error_code == WSAEAFNOSUPPORT || + error_code == WSAECONNREFUSED || + error_code == WSAENETUNREACH || + error_code == WSAEHOSTUNREACH || + error_code == WSAETIMEDOUT)) { + // Try using the next address. + const struct addrinfo* next = current_ai_->ai_next; + Disconnect(); + current_ai_ = next; + result = Connect(callback_); + } else { + result = MapWinsockError(error_code); + } + } else { + NOTREACHED(); + result = ERR_FAILED; + } + + if (result != ERR_IO_PENDING) + DoCallback(result); +} + +void TCPClientSocket::DidCompleteIO() { + DWORD num_bytes, flags; + BOOL ok = WSAGetOverlappedResult( + socket_, &overlapped_, &num_bytes, FALSE, &flags); + wait_state_ = NOT_WAITING; + DoCallback(ok ? num_bytes : MapWinsockError(WSAGetLastError())); +} + +void TCPClientSocket::OnObjectSignaled(HANDLE object) { + DCHECK(object == overlapped_.hEvent); + + MessageLoop::current()->WatchObject(overlapped_.hEvent, NULL); + + switch (wait_state_) { + case WAITING_CONNECT: + DidCompleteConnect(); + break; + case WAITING_READ: + case WAITING_WRITE: + DidCompleteIO(); + break; + } +} + +} // namespace net diff --git a/net/base/tcp_client_socket.h b/net/base/tcp_client_socket.h new file mode 100644 index 0000000..1ed37a7 --- /dev/null +++ b/net/base/tcp_client_socket.h @@ -0,0 +1,92 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_TCP_CLIENT_SOCKET_H_ +#define NET_BASE_TCP_CLIENT_SOCKET_H_ + +#include <ws2tcpip.h> + +#include "base/message_loop.h" +#include "net/base/address_list.h" +#include "net/base/client_socket.h" + +namespace net { + +class TCPClientSocket : public ClientSocket, public MessageLoop::Watcher { + public: + // The IP address(es) and port number to connect to. The TCP socket will try + // each IP address in the list until it succeeds in establishing a + // connection. + TCPClientSocket(const AddressList& addresses); + + ~TCPClientSocket(); + + // ClientSocket methods: + virtual int Connect(CompletionCallback* callback); + virtual int ReconnectIgnoringLastError(CompletionCallback* callback); + virtual void Disconnect(); + virtual bool IsConnected() const; + + // Socket methods: + virtual int Read(char* buf, int buf_len, CompletionCallback* callback); + virtual int Write(const char* buf, int buf_len, CompletionCallback* callback); + + private: + int CreateSocket(const struct addrinfo* ai); + void DoCallback(int rv); + void DidCompleteConnect(); + void DidCompleteIO(); + + virtual void OnObjectSignaled(HANDLE object); + + SOCKET socket_; + OVERLAPPED overlapped_; + WSABUF buffer_; + + CompletionCallback* callback_; + + // Stored outside of the context so we can both lazily construct the context + // as well as construct a new one if Connect is called after Close. + AddressList addresses_; + + // The addrinfo that we are attempting to use or NULL if uninitialized. + const struct addrinfo* current_ai_; + + enum WaitState { + NOT_WAITING, + WAITING_CONNECT, + WAITING_READ, + WAITING_WRITE + }; + WaitState wait_state_; +}; + +} // namespace net + +#endif // NET_BASE_TCP_CLIENT_SOCKET_H_ diff --git a/net/base/tcp_client_socket_unittest.cc b/net/base/tcp_client_socket_unittest.cc new file mode 100644 index 0000000..b0de697 --- /dev/null +++ b/net/base/tcp_client_socket_unittest.cc @@ -0,0 +1,185 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/address_list.h" +#include "net/base/net_errors.h" +#include "net/base/host_resolver.h" +#include "net/base/tcp_client_socket.h" +#include "net/base/test_completion_callback.h" +#include "testing/gtest/include/gtest/gtest.h" + +//----------------------------------------------------------------------------- + +namespace { + +class TCPClientSocketTest : public testing::Test { +}; + +} // namespace + +//----------------------------------------------------------------------------- + +TEST_F(TCPClientSocketTest, Connect) { + net::AddressList addr; + net::HostResolver resolver; + TestCompletionCallback callback; + + int rv = resolver.Resolve("www.google.com", 80, &addr, NULL); + EXPECT_EQ(rv, net::OK); + + net::TCPClientSocket sock(addr); + + EXPECT_FALSE(sock.IsConnected()); + + rv = sock.Connect(&callback); + ASSERT_EQ(rv, net::ERR_IO_PENDING); + + rv = callback.WaitForResult(); + EXPECT_EQ(rv, net::OK); + + EXPECT_TRUE(sock.IsConnected()); + + sock.Disconnect(); + EXPECT_FALSE(sock.IsConnected()); +} + +TEST_F(TCPClientSocketTest, Read) { + net::AddressList addr; + net::HostResolver resolver; + TestCompletionCallback callback; + + int rv = resolver.Resolve("www.google.com", 80, &addr, &callback); + EXPECT_EQ(rv, net::ERR_IO_PENDING); + + rv = callback.WaitForResult(); + EXPECT_EQ(rv, net::OK); + + net::TCPClientSocket sock(addr); + + rv = sock.Connect(&callback); + ASSERT_EQ(rv, net::ERR_IO_PENDING); + + rv = callback.WaitForResult(); + EXPECT_EQ(rv, net::OK); + + const char request_text[] = "GET / HTTP/1.0\r\n\r\n"; + rv = sock.Write(request_text, arraysize(request_text)-1, &callback); + EXPECT_TRUE(rv >= 0 || rv == net::ERR_IO_PENDING); + + if (rv == net::ERR_IO_PENDING) { + rv = callback.WaitForResult(); + EXPECT_EQ(rv, arraysize(request_text)-1); + } + + char buf[4096]; + for (;;) { + rv = sock.Read(buf, sizeof(buf), &callback); + EXPECT_TRUE(rv >= 0 || rv == net::ERR_IO_PENDING); + + if (rv == net::ERR_IO_PENDING) + rv = callback.WaitForResult(); + + if (rv == 0) + break; + } +} + +TEST_F(TCPClientSocketTest, Read_SmallChunks) { + net::AddressList addr; + net::HostResolver resolver; + TestCompletionCallback callback; + + int rv = resolver.Resolve("www.google.com", 80, &addr, NULL); + EXPECT_EQ(rv, net::OK); + + net::TCPClientSocket sock(addr); + + rv = sock.Connect(&callback); + ASSERT_EQ(rv, net::ERR_IO_PENDING); + + rv = callback.WaitForResult(); + EXPECT_EQ(rv, net::OK); + + const char request_text[] = "GET / HTTP/1.0\r\n\r\n"; + rv = sock.Write(request_text, arraysize(request_text)-1, &callback); + EXPECT_TRUE(rv >= 0 || rv == net::ERR_IO_PENDING); + + if (rv == net::ERR_IO_PENDING) { + rv = callback.WaitForResult(); + EXPECT_EQ(rv, arraysize(request_text)-1); + } + + char buf[1]; + for (;;) { + rv = sock.Read(buf, sizeof(buf), &callback); + EXPECT_TRUE(rv >= 0 || rv == net::ERR_IO_PENDING); + + if (rv == net::ERR_IO_PENDING) + rv = callback.WaitForResult(); + + if (rv == 0) + break; + } +} + +TEST_F(TCPClientSocketTest, Read_Interrupted) { + net::AddressList addr; + net::HostResolver resolver; + TestCompletionCallback callback; + + int rv = resolver.Resolve("www.google.com", 80, &addr, NULL); + EXPECT_EQ(rv, net::OK); + + net::TCPClientSocket sock(addr); + + rv = sock.Connect(&callback); + ASSERT_EQ(rv, net::ERR_IO_PENDING); + + rv = callback.WaitForResult(); + EXPECT_EQ(rv, net::OK); + + const char request_text[] = "GET / HTTP/1.0\r\n\r\n"; + rv = sock.Write(request_text, arraysize(request_text)-1, &callback); + EXPECT_TRUE(rv >= 0 || rv == net::ERR_IO_PENDING); + + if (rv == net::ERR_IO_PENDING) { + rv = callback.WaitForResult(); + EXPECT_EQ(rv, arraysize(request_text)-1); + } + + // Do a partial read and then exit. This test should not crash! + char buf[512]; + rv = sock.Read(buf, sizeof(buf), &callback); + EXPECT_TRUE(rv >= 0 || rv == net::ERR_IO_PENDING); + + if (rv == net::ERR_IO_PENDING) + rv = callback.WaitForResult(); + + EXPECT_TRUE(rv != 0); +} diff --git a/net/base/telnet_server.cc b/net/base/telnet_server.cc new file mode 100644 index 0000000..75d9361 --- /dev/null +++ b/net/base/telnet_server.cc @@ -0,0 +1,275 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// winsock2.h must be included first in order to ensure it is included before +// windows.h. +#include <winsock2.h> + +#include "net/base/telnet_server.h" + +#define READ_BUF_SIZE 200 + +// Telnet protocol constants. +class TelnetProtocol { + public: + // Telnet command definitions (from arpa/telnet.h). + enum Commands { + IAC = 255, // Interpret as command. + DONT = 254, // You are not to use option. + DO = 253, // Please, you use option. + WONT = 252, // I won't use option. + WILL = 251, // I will use option. + SB = 250, // Interpret as subnegotiation. + GA = 249, // You may reverse the line. + EL = 248, // Erase the current line. + EC = 247, // Erase the current character. + AYT = 246, // Are you there. + AO = 245, // Abort output--but let prog finish. + IP = 244, // Interrupt process--permanently. + BREAK = 243, // Break. + DM = 242, // Data mark--for connect. cleaning. + NOP = 241, // Nop. + SE = 240, // End sub negotiation. + EOR = 239, // End of record (transparent mode). + ABORT = 238, // Abort process. + SUSP = 237, // Suspend process. + XEOF = 236 // End of file: EOF is already used... + }; + + // Telnet options (from arpa/telnet.h). + enum Options { + BINARY = 0, // 8-bit data path. + ECHO = 1, // Echo. + SGA = 3, // Suppress go ahead. + NAWS = 31, // Window size. + LFLOW = 33 // Remote flow control. + }; + + // Fixed character definitions mentioned in RFC 854. + enum Characters { + NUL = 0x00, + LF = 0x0A, + CR = 0x0D, + BELL = 0x07, + BS = 0x08, + HT = 0x09, + VT = 0x0B, + FF = 0x0C, + DEL = 0x7F, + ESC = 0x1B + }; +}; + + +/////////////////////// + +// must run in the IO thread +TelnetServer::TelnetServer(SOCKET s, ListenSocketDelegate* del, MessageLoop *l) + : ListenSocket(s, del, l) { + input_state_ = NOT_IN_IAC_OR_ESC_SEQUENCE; +} + +// must run in the IO thread +TelnetServer::~TelnetServer() { +} + +void TelnetServer::SendIAC(int command, int option) { + char data[3]; + data[0] = static_cast<unsigned char>(TelnetProtocol::IAC); + data[1] = static_cast<unsigned char>(command); + data[2] = option; + Send(data, 3); +} + +// always fixup \n to \r\n +void TelnetServer::SendInternal(const char* data, int len) { + int begin_index = 0; + for (int i = 0; i < len; i++) { + if (data[i] == TelnetProtocol::LF) { + // Send CR before LF if missing before. + if (i == 0 || data[i - 1] != TelnetProtocol::CR) { + // Send til before LF. + ListenSocket::SendInternal(data + begin_index, i - begin_index); + // Send CRLF. + ListenSocket::SendInternal("\r\n", 2); + // Continue after LF. + begin_index = i + 1; + } + } + } + // Send what is left (the whole string is sent here if CRLF was ok) + ListenSocket::SendInternal(data + begin_index, len - begin_index); +} + +void TelnetServer::Accept() { + SOCKET conn = ListenSocket::Accept(socket_); + if (conn == INVALID_SOCKET) { + // TODO + } else { + scoped_refptr<TelnetServer> sock = + new TelnetServer(conn, socket_delegate_, loop_); + + // Setup the way we want to communicate + sock->SendIAC(TelnetProtocol::DO, TelnetProtocol::ECHO); + sock->SendIAC(TelnetProtocol::DO, TelnetProtocol::NAWS); + sock->SendIAC(TelnetProtocol::DO, TelnetProtocol::LFLOW); + sock->SendIAC(TelnetProtocol::WILL, TelnetProtocol::ECHO); + sock->SendIAC(TelnetProtocol::WILL, TelnetProtocol::SGA); + + // it's up to the delegate to AddRef if it wants to keep it around + socket_delegate_->DidAccept(this, sock); + } +} + +TelnetServer* TelnetServer::Listen(std::string ip, int port, + ListenSocketDelegate *del, MessageLoop* l) { + SOCKET s = ListenSocket::Listen(ip, port); + if (s == INVALID_SOCKET) { + // TODO + } else { + TelnetServer *serv = new TelnetServer(s, del, l); + serv->Listen(); + return serv; + } + return NULL; +} + +void TelnetServer::StateMachineStep(unsigned char c) { + switch (input_state_) { + case NOT_IN_IAC_OR_ESC_SEQUENCE: + if (c == TelnetProtocol::IAC) { + // Expect IAC command + input_state_ = EXPECTING_COMMAND; + } else if (c == TelnetProtocol::ESC) { + // Expect left suare bracket + input_state_ = EXPECTING_FIRST_ESC_CHARACTER; + } else { + char data[1]; + data[0] = c; + // handle backspace specially + if (c == TelnetProtocol::DEL) { + if (!command_line_.empty()) { + command_line_.erase(--command_line_.end()); + Send(data, 1); + } + } else { + // Collect command + if (c >= ' ') + command_line_ += c; + // Echo character to client (for now ignore control characters). + if (c >= ' ' || c == TelnetProtocol::CR) { + Send(data, 1); + } + // Check for line termination + if (c == TelnetProtocol::CR) + input_state_ = EXPECTING_NEW_LINE; + } + } + break; + case EXPECTING_NEW_LINE: + if (c == TelnetProtocol::LF) { + Send("\n", 1); + socket_delegate_->DidRead(this, command_line_); + command_line_ = ""; + } + input_state_ = NOT_IN_IAC_OR_ESC_SEQUENCE; + break; + case EXPECTING_COMMAND: + // Read command, expect option. + iac_command_ = c; + input_state_ = EXPECTING_OPTION; + break; + case EXPECTING_OPTION: + // Read option + iac_option_ = c; + // check for subnegoating if not done reading IAC. + if (iac_command_ != TelnetProtocol::SB) { + input_state_ = NOT_IN_IAC_OR_ESC_SEQUENCE; + } else { + input_state_ = SUBNEGOTIATION_EXPECTING_IAC; + } + break; + case SUBNEGOTIATION_EXPECTING_IAC: + // Currently ignore content of subnegotiation. + if (c == TelnetProtocol::IAC) + input_state_ = SUBNEGOTIATION_EXPECTING_SE; + break; + case SUBNEGOTIATION_EXPECTING_SE: + // Character must be SE and subnegotiation is finished. + input_state_ = NOT_IN_IAC_OR_ESC_SEQUENCE; + break; + case EXPECTING_FIRST_ESC_CHARACTER: + if (c == '[') { + // Expect ESC sequence content. + input_state_ = EXPECTING_NUMBER_SEMICOLON_OR_END; + } else if (c == 'O') { + // VT100 "ESC O" sequence. + input_state_ = EXPECTING_SECOND_ESC_CHARACTER; + } else { + // Unknown ESC sequence - ignore. + } + break; + case EXPECTING_SECOND_ESC_CHARACTER: + // Ignore ESC sequence content for now. + input_state_ = NOT_IN_IAC_OR_ESC_SEQUENCE; + break; + case EXPECTING_NUMBER_SEMICOLON_OR_END: + if (isdigit(c) || c ==';') { + // Ignore ESC sequence content for now. + } else { + // Final character in ESC sequence. + input_state_ = NOT_IN_IAC_OR_ESC_SEQUENCE; + } + break; + } +} + +void TelnetServer::Read() { + char buf[READ_BUF_SIZE]; + int len; + do { + len = recv(socket_, buf, READ_BUF_SIZE, 0); + if (len == SOCKET_ERROR) { + int err = WSAGetLastError(); + if (err == WSAEWOULDBLOCK) { + break; + } else { + // TODO - error + break; + } + } else { + const char *data = buf; + for (int i = 0; i < len; ++i) { + unsigned char c = static_cast<unsigned char>(*data); + StateMachineStep(c); + data++; + } + } + } while (len == READ_BUF_SIZE); +} diff --git a/net/base/telnet_server.h b/net/base/telnet_server.h new file mode 100644 index 0000000..8b509be --- /dev/null +++ b/net/base/telnet_server.h @@ -0,0 +1,78 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_TELNET_SERVER_H_ +#define NET_BASE_TELNET_SERVER_H_ + +#include "net/base/listen_socket.h" + +// Implements the telnet protocol on top of the raw socket interface. +// DidRead calls to the delegate are buffered on a line by line basis. +// (for now this means that basic line editing is handled in this object) +class TelnetServer : public ListenSocket { +public: + static TelnetServer* Listen(std::string ip, int port, + ListenSocketDelegate *del, + MessageLoop* loop); + virtual ~TelnetServer(); + +protected: + void Listen() { ListenSocket::Listen(); } + virtual void Read(); + virtual void Accept(); + virtual void SendInternal(const char* bytes, int len); + +private: + enum TelnetInputState { + NOT_IN_IAC_OR_ESC_SEQUENCE, // Currently not processing any IAC or ESC sequence. + EXPECTING_NEW_LINE, // Received carriage return (CR) expecting new line (LF). + EXPECTING_COMMAND, // Processing IAC expecting command. + EXPECTING_OPTION, // Processing IAC expecting option. + SUBNEGOTIATION_EXPECTING_IAC, // Inside subnegoation IAC,SE will end it. + SUBNEGOTIATION_EXPECTING_SE, // Ending subnegoation expecting SE. + EXPECTING_FIRST_ESC_CHARACTER, // Processing ESC sequence. + EXPECTING_SECOND_ESC_CHARACTER, // Processing ESC sequence with two characters + EXPECTING_NUMBER_SEMICOLON_OR_END // Processing "ESC [" sequence. + }; + + TelnetServer(SOCKET s, ListenSocketDelegate* del, MessageLoop* loop); + + // telnet commands + void SendIAC(int command, int option); + void StateMachineStep(unsigned char c); + + TelnetInputState input_state_; + int iac_command_; // Last command read. + int iac_option_; // Last option read. + std::string command_line_; + + DISALLOW_EVIL_CONSTRUCTORS(TelnetServer); +}; + +#endif // BASE_TELNET_SERVER_H_
\ No newline at end of file diff --git a/net/base/telnet_server_unittest.cc b/net/base/telnet_server_unittest.cc new file mode 100644 index 0000000..2ec1164 --- /dev/null +++ b/net/base/telnet_server_unittest.cc @@ -0,0 +1,99 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Tests TelnetServer. + +#include "net/base/listen_socket_unittest.h" +#include "net/base/telnet_server.h" + +namespace { + +const std::string CRLF("\r\n"); + +class TelnetServerTester : public ListenSocketTester { +public: + virtual ListenSocket* DoListen() { + return TelnetServer::Listen("127.0.0.1", TEST_PORT, this, loop_); + } + + virtual void SetUp() { + ListenSocketTester::SetUp(); + // With TelnetServer, there's some control codes sent at connect time, + // so we need to eat those to avoid affecting the subsequent tests. + // TODO(erikkay): Unfortunately, without the sleep, we don't seem to + // reliably get the 15 bytes without an EWOULDBLOCK. It would be nice if + // there were a more reliable mechanism here. + Sleep(10); + ASSERT_EQ(ClearTestSocket(), 15); + } + + virtual bool Send(SOCKET sock, const std::string& str) { + if (ListenSocketTester::Send(sock, str)) { + // TelnetServer currently calls DidRead after a CRLF, so we need to + // append one to the end of the data that we send. + if (ListenSocketTester::Send(sock, CRLF)) { + return true; + } + } + return false; + } +}; + +class TelnetServerTest: public testing::Test { +protected: + TelnetServerTest() { + tester_ = NULL; + } + + virtual void SetUp() { + tester_ = new TelnetServerTester(); + tester_->SetUp(); + } + + virtual void TearDown() { + tester_->TearDown(); + tester_ = NULL; + } + + scoped_refptr<TelnetServerTester> tester_; +}; + +} // namespace + +TEST_F(TelnetServerTest, ServerClientSend) { + tester_->TestClientSend(); +} + +TEST_F(TelnetServerTest, ClientSendLong) { + tester_->TestClientSendLong(); +} + +TEST_F(TelnetServerTest, ServerSend) { + tester_->TestServerSend(); +} diff --git a/net/base/test_completion_callback.h b/net/base/test_completion_callback.h new file mode 100644 index 0000000..7d1f8a2 --- /dev/null +++ b/net/base/test_completion_callback.h @@ -0,0 +1,79 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_TEST_COMPLETION_CALLBACK_H_ +#define NET_BASE_TEST_COMPLETION_CALLBACK_H_ + +#include "base/message_loop.h" +#include "net/base/completion_callback.h" + +//----------------------------------------------------------------------------- +// completion callback helper + +// A helper class for completion callbacks, designed to make it easy to run +// tests involving asynchronous operations. Just call WaitForResult to wait +// for the asynchronous operation to complete. +// +// NOTE: Since this runs a message loop to wait for the completion callback, +// there could be other side-effects resulting from WaitForResult. For this +// reason, this class is probably not ideal for a general application. +// +class TestCompletionCallback : public CallbackRunner< Tuple1<int> > { + public: + TestCompletionCallback() + : result_(0), + have_result_(false), + waiting_for_result_(false) { + } + + int WaitForResult() { + DCHECK(!waiting_for_result_); + while (!have_result_) { + waiting_for_result_ = true; + MessageLoop::current()->Run(); + waiting_for_result_ = false; + } + have_result_ = false; // auto-reset for next callback + return result_; + } + + private: + virtual void RunWithParams(const Tuple1<int>& params) { + result_ = params.a; + have_result_ = true; + if (waiting_for_result_) + MessageLoop::current()->Quit(); + } + + int result_; + bool have_result_; + bool waiting_for_result_; +}; + +#endif // NET_BASE_TEST_COMPLETION_CALLBACK_H_ diff --git a/net/base/upload_data.cc b/net/base/upload_data.cc new file mode 100644 index 0000000..ec15435 --- /dev/null +++ b/net/base/upload_data.cc @@ -0,0 +1,71 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <windows.h> + +#include "net/base/upload_data.h" + +namespace net { + +uint64 UploadData::GetContentLength() const { + uint64 len = 0; + std::vector<Element>::const_iterator it = elements_.begin(); + for (; it != elements_.end(); ++it) + len += (*it).GetContentLength(); + return len; +} + +uint64 UploadData::Element::GetContentLength() const { + if (type_ == TYPE_BYTES) + return static_cast<uint64>(bytes_.size()); + + DCHECK(type_ == TYPE_FILE); + + // NOTE: wininet is unable to upload files larger than 4GB, but we'll let the + // http layer worry about that. + // TODO(darin): This size calculation could be out of sync with the state of + // the file when we get around to reading it. We should probably find a way + // to lock the file or somehow protect against this error condition. + + WIN32_FILE_ATTRIBUTE_DATA info; + if (!GetFileAttributesEx(file_path_.c_str(), GetFileExInfoStandard, &info)) { + DLOG(WARNING) << "GetFileAttributesEx failed: " << GetLastError(); + return 0; + } + + uint64 length = static_cast<uint64>(info.nFileSizeHigh) << 32 | + info.nFileSizeLow; + if (file_range_offset_ >= length) + return 0; // range is beyond eof + + // compensate for the offset and clip file_range_length_ to eof + return std::min(length - file_range_offset_, file_range_length_); +} + +} // namespace net diff --git a/net/base/upload_data.h b/net/base/upload_data.h new file mode 100644 index 0000000..71b8f3d --- /dev/null +++ b/net/base/upload_data.h @@ -0,0 +1,125 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_UPLOAD_DATA_H__ +#define NET_BASE_UPLOAD_DATA_H__ + +#include <vector> + +#include "base/basictypes.h" +#include "base/ref_counted.h" + +namespace net { + +class UploadData : public base::RefCounted<UploadData> { + public: + UploadData() {} + + enum Type { + TYPE_BYTES, + TYPE_FILE + }; + + class Element { + public: + Element() : type_(TYPE_BYTES), file_range_offset_(0), + file_range_length_(kuint64max) { + } + + Type type() const { return type_; } + const std::vector<char>& bytes() const { return bytes_; } + const std::wstring& file_path() const { return file_path_; } + uint64 file_range_offset() const { return file_range_offset_; } + uint64 file_range_length() const { return file_range_length_; } + + void SetToBytes(const char* bytes, int bytes_len) { + type_ = TYPE_BYTES; + bytes_.assign(bytes, bytes + bytes_len); + } + + void SetToFilePath(const std::wstring& path) { + SetToFilePathRange(path, 0, kuint64max); + } + + void SetToFilePathRange(const std::wstring& path, + uint64 offset, uint64 length) { + type_ = TYPE_FILE; + file_path_ = path; + file_range_offset_ = offset; + file_range_length_ = length; + } + + // Returns the byte-length of the element. For files that do not exist, 0 + // is returned. This is done for consistency with Mozilla. + uint64 GetContentLength() const; + + private: + Type type_; + std::vector<char> bytes_; + std::wstring file_path_; + uint64 file_range_offset_; + uint64 file_range_length_; + }; + + void AppendBytes(const char* bytes, int bytes_len) { + if (bytes_len > 0) { + elements_.push_back(Element()); + elements_.back().SetToBytes(bytes, bytes_len); + } + } + + void AppendFile(const std::wstring& file_path) { + elements_.push_back(Element()); + elements_.back().SetToFilePath(file_path); + } + + void AppendFileRange(const std::wstring& file_path, + uint64 offset, uint64 length) { + elements_.push_back(Element()); + elements_.back().SetToFilePathRange(file_path, offset, length); + } + + // Returns the total size in bytes of the data to upload. + uint64 GetContentLength() const; + + const std::vector<Element>& elements() const { + return elements_; + } + + void set_elements(const std::vector<Element>& elements) { + elements_ = elements; + } + + private: + std::vector<Element> elements_; +}; + +} // namespace net + +#endif // NET_BASE_UPLOAD_DATA_H__ diff --git a/net/base/upload_data_stream.cc b/net/base/upload_data_stream.cc new file mode 100644 index 0000000..18ed05e --- /dev/null +++ b/net/base/upload_data_stream.cc @@ -0,0 +1,151 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/upload_data_stream.h" + +namespace net { + +UploadDataStream::UploadDataStream(const UploadData* data) + : data_(data), + next_element_handle_(INVALID_HANDLE_VALUE), + total_size_(data->GetContentLength()) { + Reset(); + FillBuf(); +} + +UploadDataStream::~UploadDataStream() { + if (next_element_handle_ != INVALID_HANDLE_VALUE) + CloseHandle(next_element_handle_); +} + +void UploadDataStream::DidConsume(size_t num_bytes) { + DCHECK(num_bytes <= buf_len_); + + buf_len_ -= num_bytes; + if (buf_len_) + memmove(buf_, buf_ + num_bytes, buf_len_); + + FillBuf(); + + current_position_ += num_bytes; +} + +void UploadDataStream::Reset() { + if (next_element_handle_ != INVALID_HANDLE_VALUE) { + CloseHandle(next_element_handle_); + next_element_handle_ = INVALID_HANDLE_VALUE; + } + buf_len_ = 0; + next_element_ = data_->elements().begin(); + next_element_offset_ = 0; + next_element_remaining_ = 0; + current_position_ = 0; +} + +void UploadDataStream::FillBuf() { + std::vector<UploadData::Element>::const_iterator end = + data_->elements().end(); + + while (buf_len_ < kBufSize && next_element_ != end) { + bool advance_to_next_element = false; + + size_t size_remaining = kBufSize - buf_len_; + if ((*next_element_).type() == UploadData::TYPE_BYTES) { + const std::vector<char>& d = (*next_element_).bytes(); + size_t count = d.size() - next_element_offset_; + + size_t bytes_copied = std::min(count, size_remaining); + + memcpy(buf_ + buf_len_, &d[next_element_offset_], bytes_copied); + buf_len_ += bytes_copied; + + if (bytes_copied == count) { + advance_to_next_element = true; + } else { + next_element_offset_ += bytes_copied; + } + } else { + DCHECK((*next_element_).type() == UploadData::TYPE_FILE); + + if (next_element_handle_ == INVALID_HANDLE_VALUE) { + next_element_handle_ = CreateFile((*next_element_).file_path().c_str(), + GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, NULL); + // If the file does not exist, that's technically okay.. we'll just + // upload an empty file. This is for consistency with Mozilla. + DLOG_IF(WARNING, next_element_handle_ == INVALID_HANDLE_VALUE) << + "Unable to open file \"" << (*next_element_).file_path() << + "\" for reading: " << GetLastError(); + + next_element_remaining_ = (*next_element_).file_range_length(); + + if ((*next_element_).file_range_offset()) { + LARGE_INTEGER offset; + offset.QuadPart = (*next_element_).file_range_offset(); + if (!SetFilePointerEx(next_element_handle_, offset, + NULL, FILE_BEGIN)) { + DLOG(WARNING) << + "Unable to set file position for file \"" << + (*next_element_).file_path() << "\": " << GetLastError(); + next_element_remaining_ = 0; + } + } + } + + // ReadFile will happily fail if given an invalid handle. + BOOL ok = FALSE; + DWORD bytes_read = 0; + uint64 amount_to_read = std::min(static_cast<uint64>(size_remaining), + next_element_remaining_); + if ((amount_to_read > 0) && + (ok = ReadFile(next_element_handle_, buf_ + buf_len_, + static_cast<DWORD>(amount_to_read), &bytes_read, + NULL))) { + buf_len_ += bytes_read; + next_element_remaining_ -= bytes_read; + } + + if (!ok || bytes_read == 0) + advance_to_next_element = true; + } + + if (advance_to_next_element) { + ++next_element_; + next_element_offset_ = 0; + if (next_element_handle_ != INVALID_HANDLE_VALUE) { + CloseHandle(next_element_handle_); + next_element_handle_ = INVALID_HANDLE_VALUE; + } + } + } +} + +} // namespace net diff --git a/net/base/upload_data_stream.h b/net/base/upload_data_stream.h new file mode 100644 index 0000000..53b237c --- /dev/null +++ b/net/base/upload_data_stream.h @@ -0,0 +1,95 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_UPLOAD_DATA_STREAM_H_ +#define NET_BASE_UPLOAD_DATA_STREAM_H_ + +#include "net/base/upload_data.h" + +namespace net { + +class UploadDataStream { + public: + UploadDataStream(const UploadData* data); + ~UploadDataStream(); + + // Returns the stream's buffer and buffer length. + const char* buf() const { return buf_; } + size_t buf_len() const { return buf_len_; } + + // Call to indicate that a portion of the stream's buffer was consumed. This + // call modifies the stream's buffer so that it contains the next segment of + // the upload data to be consumed. + void DidConsume(size_t num_bytes); + + // Call to reset the stream position to the beginning. + void Reset(); + + // Returns the total size of the data stream and the current position. + uint64 size() const { return total_size_; } + uint64 position() const { return current_position_; } + + private: + void FillBuf(); + + const UploadData* data_; + + // This buffer is filled with data to be uploaded. The data to be sent is + // always at the front of the buffer. If we cannot send all of the buffer at + // once, then we memmove the remaining portion and back-fill the buffer for + // the next "write" call. buf_len_ indicates how much data is in the buffer. + enum { kBufSize = 16384 }; + char buf_[kBufSize]; + size_t buf_len_; + + // Iterator to the upload element to be written to the send buffer next. + std::vector<UploadData::Element>::const_iterator next_element_; + + // The byte offset into next_element_'s data buffer if the next element is + // a TYPE_BYTES element. + size_t next_element_offset_; + + // A handle to the currently open file (or INVALID_HANDLE_VALUE) for + // next_element_ if the next element is a TYPE_FILE element. + HANDLE next_element_handle_; + + // The number of bytes remaining to be read from the currently open file + // if the next element is of TYPE_FILE. + uint64 next_element_remaining_; + + // Size and current read position within the stream. + uint64 total_size_; + uint64 current_position_; + + DISALLOW_EVIL_CONSTRUCTORS(UploadDataStream); +}; + +} // namespace net + +#endif // NET_BASE_UPLOAD_DATA_STREAM_H_ diff --git a/net/base/wininet_util.cc b/net/base/wininet_util.cc new file mode 100644 index 0000000..2c68891 --- /dev/null +++ b/net/base/wininet_util.cc @@ -0,0 +1,95 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/wininet_util.h" + +#include "base/logging.h" +#include "net/base/net_errors.h" + +namespace net { + +// static +int WinInetUtil::OSErrorToNetError(DWORD os_error) { + // Optimize the common case. + if (os_error == ERROR_IO_PENDING) + return net::ERR_IO_PENDING; + + switch (os_error) { + case ERROR_SUCCESS: + return net::OK; + case ERROR_FILE_NOT_FOUND: + case ERROR_PATH_NOT_FOUND: + return net::ERR_FILE_NOT_FOUND; + case ERROR_HANDLE_EOF: // TODO(wtc): return net::OK? + return net::ERR_CONNECTION_CLOSED; + case ERROR_INVALID_HANDLE: + return net::ERR_INVALID_HANDLE; + case ERROR_INVALID_PARAMETER: + return net::ERR_INVALID_ARGUMENT; + + case ERROR_INTERNET_CANNOT_CONNECT: + return net::ERR_CONNECTION_FAILED; + case ERROR_INTERNET_CONNECTION_RESET: + return net::ERR_CONNECTION_RESET; + case ERROR_INTERNET_DISCONNECTED: + return net::ERR_INTERNET_DISCONNECTED; + case ERROR_INTERNET_INVALID_URL: + return net::ERR_INVALID_URL; + case ERROR_INTERNET_NAME_NOT_RESOLVED: + return net::ERR_NAME_NOT_RESOLVED; + case ERROR_INTERNET_OPERATION_CANCELLED: + return net::ERR_ABORTED; + case ERROR_INTERNET_UNRECOGNIZED_SCHEME: + return net::ERR_UNKNOWN_URL_SCHEME; + + // SSL certificate errors + case ERROR_INTERNET_SEC_CERT_CN_INVALID: + return net::ERR_CERT_COMMON_NAME_INVALID; + case ERROR_INTERNET_SEC_CERT_DATE_INVALID: + return net::ERR_CERT_DATE_INVALID; + case ERROR_INTERNET_INVALID_CA: + return net::ERR_CERT_AUTHORITY_INVALID; + case ERROR_INTERNET_SEC_CERT_NO_REV: + return net::ERR_CERT_NO_REVOCATION_MECHANISM; + case ERROR_INTERNET_SEC_CERT_REV_FAILED: + return net::ERR_CERT_UNABLE_TO_CHECK_REVOCATION; + case ERROR_INTERNET_SEC_CERT_REVOKED: + return net::ERR_CERT_REVOKED; + case ERROR_INTERNET_SEC_CERT_ERRORS: + return net::ERR_CERT_CONTAINS_ERRORS; + case ERROR_INTERNET_SEC_INVALID_CERT: + return net::ERR_CERT_INVALID; + + case ERROR_INTERNET_EXTENDED_ERROR: + default: + return net::ERR_FAILED; + } +} + +} // namespace net diff --git a/net/base/wininet_util.h b/net/base/wininet_util.h new file mode 100644 index 0000000..9db9433 --- /dev/null +++ b/net/base/wininet_util.h @@ -0,0 +1,50 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef NET_BASE_WININET_UTIL_H__ +#define NET_BASE_WININET_UTIL_H__ + +#include <windows.h> +#include <wininet.h> + +#include <string> + +namespace net { + +// Global functions and variables for using WinInet. +class WinInetUtil { + public: + // Maps Windows error codes (returned by GetLastError()) to net::ERR_xxx + // error codes. + static int OSErrorToNetError(DWORD os_error); +}; + +} // namespace net + +#endif // NET_BASE_WININET_UTIL_H__ diff --git a/net/base/wininet_util_unittest.cc b/net/base/wininet_util_unittest.cc new file mode 100644 index 0000000..affe1c4 --- /dev/null +++ b/net/base/wininet_util_unittest.cc @@ -0,0 +1,64 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <windows.h> +#include <wininet.h> + +#include "net/base/net_errors.h" +#include "net/base/wininet_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +using net::WinInetUtil; + +namespace { + class WinInetUtilTest : public testing::Test { + }; +} + +TEST(WinInetUtilTest, ErrorCodeConversion) { + // a list of Windows error codes and the corresponding + // net::ERR_xxx error codes + static const struct { + DWORD os_error; + int net_error; + } error_cases[] = { + {ERROR_SUCCESS, net::OK}, + {ERROR_IO_PENDING, net::ERR_IO_PENDING}, + {ERROR_INTERNET_OPERATION_CANCELLED, net::ERR_ABORTED}, + {ERROR_INTERNET_CANNOT_CONNECT, net::ERR_CONNECTION_FAILED}, + {ERROR_INTERNET_NAME_NOT_RESOLVED, net::ERR_NAME_NOT_RESOLVED}, + {ERROR_INTERNET_INVALID_CA, net::ERR_CERT_AUTHORITY_INVALID}, + {999999, net::ERR_FAILED}, + }; + + for (int i = 0; i < arraysize(error_cases); i++) { + EXPECT_EQ(error_cases[i].net_error, + WinInetUtil::OSErrorToNetError(error_cases[i].os_error)); + } +} diff --git a/net/base/winsock_init.cc b/net/base/winsock_init.cc new file mode 100644 index 0000000..e164b92 --- /dev/null +++ b/net/base/winsock_init.cc @@ -0,0 +1,57 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <winsock2.h> + +#include "net/base/winsock_init.h" + +#include "base/singleton.h" + +WinsockInit::WinsockInit() : did_init_(false) { + did_init_ = Init(); +} + +bool WinsockInit::Init() { + WORD winsock_ver = MAKEWORD(2,2); + WSAData wsa_data; + return (WSAStartup(winsock_ver, &wsa_data) == 0); +} + +void WinsockInit::Cleanup() { + WSACleanup(); +} + +WinsockInit::~WinsockInit() { + if (did_init_) + Cleanup(); +} + +void EnsureWinsockInit() { + Singleton<WinsockInit>::get(); +} diff --git a/net/base/winsock_init.h b/net/base/winsock_init.h new file mode 100644 index 0000000..40d0557 --- /dev/null +++ b/net/base/winsock_init.h @@ -0,0 +1,56 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Winsock initialization must happen before any Winsock calls are made. This +// class provides a wrapper for WSAStartup and WSACleanup. There are 3 ways to +// use it: either allocate a new WinsockInit object at startup and delete when +// shutting down, manually call Init and Cleanup, or use the EnsureWinsockInit +// method, which may be called multiple times. In the second case, Cleanup +// should only be called if Init was successful. + +#ifndef NET_BASE_WINSOCK_INIT_H_ +#define NET_BASE_WINSOCK_INIT_H_ + +class WinsockInit { + public: + WinsockInit(); + ~WinsockInit(); + + static bool Init(); + static void Cleanup(); + + private: + bool did_init_; +}; + +// Force there to be a global WinsockInit object that gets created once and +// destroyed at application exit. This may be called multiple times. +void EnsureWinsockInit(); + +#endif // NET_BASE_WINSOCK_INIT_H_ diff --git a/net/base/x509_certificate.cc b/net/base/x509_certificate.cc new file mode 100644 index 0000000..445f128 --- /dev/null +++ b/net/base/x509_certificate.cc @@ -0,0 +1,569 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "net/base/x509_certificate.h" + +#include <map> + +#include "base/histogram.h" +#include "base/lock.h" +#include "base/pickle.h" +#include "base/singleton.h" +#include "base/string_tokenizer.h" +#include "base/string_util.h" +#include "net/base/cert_status_flags.h" +#include "net/base/ev_root_ca_metadata.h" + +#pragma comment(lib, "crypt32.lib") + +namespace { + +// Returns true if this cert fingerprint is the null (all zero) fingerprint. +// We use this as a bogus fingerprint value. +bool IsNullFingerprint(const X509Certificate::Fingerprint& fingerprint) { + for (int i = 0; i < arraysize(fingerprint.data); ++i) { + if (fingerprint.data[i] != 0) + return false; + } + return true; +} + +// Calculates the SHA-1 fingerprint of the certificate. Returns an empty +// (all zero) fingerprint on failure. +X509Certificate::Fingerprint CalculateFingerprint(PCCERT_CONTEXT cert) { + DCHECK(NULL != cert->pbCertEncoded); + DCHECK(0 != cert->cbCertEncoded); + + BOOL rv; + X509Certificate::Fingerprint sha1; + DWORD sha1_size = sizeof(sha1.data); + rv = CryptHashCertificate(NULL, CALG_SHA1, 0, cert->pbCertEncoded, + cert->cbCertEncoded, sha1.data, &sha1_size); + DCHECK(rv && sha1_size == sizeof(sha1.data)); + if (!rv) + memset(sha1.data, 0, sizeof(sha1.data)); + return sha1; +} + +// Wrappers of malloc and free for CRYPT_DECODE_PARA, which requires the +// WINAPI calling convention. +void* WINAPI MyCryptAlloc(size_t size) { + return malloc(size); +} + +void WINAPI MyCryptFree(void* p) { + free(p); +} + +// Decodes the cert's subjectAltName extension into a CERT_ALT_NAME_INFO +// structure and stores it in *output. +void GetCertSubjectAltName(PCCERT_CONTEXT cert, + scoped_ptr_malloc<CERT_ALT_NAME_INFO>* output) { + PCERT_EXTENSION extension = CertFindExtension(szOID_SUBJECT_ALT_NAME2, + cert->pCertInfo->cExtension, + cert->pCertInfo->rgExtension); + if (!extension) + return; + + CRYPT_DECODE_PARA decode_para; + decode_para.cbSize = sizeof(decode_para); + decode_para.pfnAlloc = MyCryptAlloc; + decode_para.pfnFree = MyCryptFree; + CERT_ALT_NAME_INFO* alt_name_info = NULL; + DWORD alt_name_info_size = 0; + BOOL rv; + rv = CryptDecodeObjectEx(X509_ASN_ENCODING | PKCS_7_ASN_ENCODING, + szOID_SUBJECT_ALT_NAME2, + extension->Value.pbData, + extension->Value.cbData, + CRYPT_DECODE_ALLOC_FLAG | CRYPT_DECODE_NOCOPY_FLAG, + &decode_para, + &alt_name_info, + &alt_name_info_size); + if (rv) + output->reset(alt_name_info); +} + +/////////////////////////////////////////////////////////////////////////// +// +// Functions used by X509Certificate::IsEV +// +/////////////////////////////////////////////////////////////////////////// + +// Constructs a certificate chain starting from the end certificate +// 'cert_context', matching any of the certificate policies. +// +// Returns the certificate chain context on success, or NULL on failure. +// The caller is responsible for freeing the certificate chain context with +// CertFreeCertificateChain. +PCCERT_CHAIN_CONTEXT ConstructCertChain( + PCCERT_CONTEXT cert_context, + const char* const* policies, + int num_policies) { + CERT_CHAIN_PARA chain_para; + memset(&chain_para, 0, sizeof(chain_para)); + chain_para.cbSize = sizeof(chain_para); + chain_para.RequestedUsage.dwType = USAGE_MATCH_TYPE_AND; + chain_para.RequestedUsage.Usage.cUsageIdentifier = 0; + chain_para.RequestedUsage.Usage.rgpszUsageIdentifier = NULL; // LPSTR* + chain_para.RequestedIssuancePolicy.dwType = USAGE_MATCH_TYPE_OR; + chain_para.RequestedIssuancePolicy.Usage.cUsageIdentifier = num_policies; + chain_para.RequestedIssuancePolicy.Usage.rgpszUsageIdentifier = + const_cast<char**>(policies); + PCCERT_CHAIN_CONTEXT chain_context; + if (!CertGetCertificateChain( + NULL, // default chain engine, HCCE_CURRENT_USER + cert_context, + NULL, // current system time + cert_context->hCertStore, // search this store + &chain_para, + CERT_CHAIN_REVOCATION_CHECK_CHAIN_EXCLUDE_ROOT | + CERT_CHAIN_CACHE_END_CERT, + NULL, // reserved + &chain_context)) { + return NULL; + } + return chain_context; +} + +// Decodes the cert's certificatePolicies extension into a CERT_POLICIES_INFO +// structure and stores it in *output. +void GetCertPoliciesInfo(PCCERT_CONTEXT cert, + scoped_ptr_malloc<CERT_POLICIES_INFO>* output) { + PCERT_EXTENSION extension = CertFindExtension(szOID_CERT_POLICIES, + cert->pCertInfo->cExtension, + cert->pCertInfo->rgExtension); + if (!extension) + return; + + CRYPT_DECODE_PARA decode_para; + decode_para.cbSize = sizeof(decode_para); + decode_para.pfnAlloc = MyCryptAlloc; + decode_para.pfnFree = MyCryptFree; + CERT_POLICIES_INFO* policies_info = NULL; + DWORD policies_info_size = 0; + BOOL rv; + rv = CryptDecodeObjectEx(X509_ASN_ENCODING | PKCS_7_ASN_ENCODING, + szOID_CERT_POLICIES, + extension->Value.pbData, + extension->Value.cbData, + CRYPT_DECODE_ALLOC_FLAG | CRYPT_DECODE_NOCOPY_FLAG, + &decode_para, + &policies_info, + &policies_info_size); + if (rv) + output->reset(policies_info); +} + +// Returns true if the policy is in the array of CERT_POLICY_INFO in +// the CERT_POLICIES_INFO structure. +bool ContainsPolicy(const CERT_POLICIES_INFO* policies_info, + const char* policy) { + int num_policies = policies_info->cPolicyInfo; + for (int i = 0; i < num_policies; i++) { + if (!strcmp(policies_info->rgPolicyInfo[i].pszPolicyIdentifier, policy)) + return true; + } + return false; +} + +// This class wraps the CertFreeCertificateChain function in a class that can +// be passed as a template argument to scoped_ptr_malloc. +class ScopedPtrMallocFreeCertChain { + public: + void operator()(const CERT_CHAIN_CONTEXT* x) const { + CertFreeCertificateChain(x); + } +}; + +typedef scoped_ptr_malloc<const CERT_CHAIN_CONTEXT, + ScopedPtrMallocFreeCertChain> ScopedCertChainContext; + +} // namespace + +bool X509Certificate::FingerprintLessThan::operator()( + const Fingerprint& lhs, + const Fingerprint& rhs) const { + for (int i = 0; i < sizeof(lhs.data); ++i) { + if (lhs.data[i] < rhs.data[i]) + return true; + if (lhs.data[i] > rhs.data[i]) + return false; + } + return false; +} + +bool X509Certificate::LessThan::operator()(X509Certificate* lhs, + X509Certificate* rhs) const { + if (lhs == rhs) + return false; + + X509Certificate::FingerprintLessThan fingerprint_functor; + return fingerprint_functor(lhs->fingerprint_, rhs->fingerprint_); +} + +// A thread-safe cache for X509Certificate objects. +// +// The cache does not hold a reference to the certificate objects. The objects +// must |Remove| themselves from the cache upon destruction (or else the cache +// will be holding dead pointers to the objects). +class X509Certificate::Cache { + public: + // Get the singleton object for the cache. + static X509Certificate::Cache* GetInstance() { + return Singleton<X509Certificate::Cache>::get(); + } + + // Insert |cert| into the cache. The cache does NOT AddRef |cert|. The cache + // must not already contain a certificate with the same fingerprint. + void Insert(X509Certificate* cert) { + AutoLock lock(lock_); + + DCHECK(!IsNullFingerprint(cert->fingerprint())) << + "Only insert certs with real fingerprints."; + DCHECK(cache_.find(cert->fingerprint()) == cache_.end()); + cache_[cert->fingerprint()] = cert; + }; + + // Remove |cert| from the cache. The cache does not assume that |cert| is + // already in the cache. + void Remove(X509Certificate* cert) { + AutoLock lock(lock_); + + CertMap::iterator pos(cache_.find(cert->fingerprint())); + if (pos == cache_.end()) + return; // It is not an error to remove a cert that is not in the cache. + cache_.erase(pos); + }; + + // Find a certificate in the cache with the given fingerprint. If one does + // not exist, this method returns NULL. + X509Certificate* Find(const Fingerprint& fingerprint) { + AutoLock lock(lock_); + + CertMap::iterator pos(cache_.find(fingerprint)); + if (pos == cache_.end()) + return NULL; + + return pos->second; + }; + + private: + typedef std::map<Fingerprint, X509Certificate*, FingerprintLessThan> CertMap; + + // Obtain an instance of X509Certificate::Cache via GetInstance(). + Cache() { } + friend DefaultSingletonTraits<X509Certificate::Cache>; + + // You must acquire this lock before using any private data of this object. + // You must not block while holding this lock. + Lock lock_; + + // The certificate cache. You must acquire |lock_| before using |cache_|. + CertMap cache_; + + DISALLOW_EVIL_CONSTRUCTORS(X509Certificate::Cache); +}; + +void X509Certificate::Initialize() { + std::wstring subject_info; + std::wstring issuer_info; + DWORD name_size; + name_size = CertNameToStr(cert_handle_->dwCertEncodingType, + &cert_handle_->pCertInfo->Subject, + CERT_X500_NAME_STR | CERT_NAME_STR_CRLF_FLAG, + NULL, 0); + name_size = CertNameToStr(cert_handle_->dwCertEncodingType, + &cert_handle_->pCertInfo->Subject, + CERT_X500_NAME_STR | CERT_NAME_STR_CRLF_FLAG, + WriteInto(&subject_info, name_size), name_size); + name_size = CertNameToStr(cert_handle_->dwCertEncodingType, + &cert_handle_->pCertInfo->Issuer, + CERT_X500_NAME_STR | CERT_NAME_STR_CRLF_FLAG, + NULL, 0); + name_size = CertNameToStr(cert_handle_->dwCertEncodingType, + &cert_handle_->pCertInfo->Issuer, + CERT_X500_NAME_STR | CERT_NAME_STR_CRLF_FLAG, + WriteInto(&issuer_info, name_size), name_size); + ParsePrincipal(WideToUTF8(subject_info), &subject_); + ParsePrincipal(WideToUTF8(issuer_info), &issuer_); + + valid_start_ = Time::FromFileTime(cert_handle_->pCertInfo->NotBefore); + valid_expiry_ = Time::FromFileTime(cert_handle_->pCertInfo->NotAfter); + + fingerprint_ = CalculateFingerprint(cert_handle_); + + // Store the certificate in the cache in case we need it later. + X509Certificate::Cache::GetInstance()->Insert(this); +} + +// static +X509Certificate* X509Certificate::CreateFromHandle(OSCertHandle cert_handle) { + DCHECK(cert_handle); + + // Check if we already have this certificate in memory. + X509Certificate::Cache* cache = X509Certificate::Cache::GetInstance(); + X509Certificate* cert = cache->Find(CalculateFingerprint(cert_handle)); + if (cert) { + // We've found a certificate with the same fingerprint in our cache. We own + // the |cert_handle|, which makes it our job to free it. + CertFreeCertificateContext(cert_handle); + DHISTOGRAM_COUNTS(L"X509CertificateReuseCount", 1); + return cert; + } + // Otherwise, allocate a new object. + return new X509Certificate(cert_handle); +} + +// static +X509Certificate* X509Certificate::CreateFromPickle(const Pickle& pickle, + void** pickle_iter) { + const char* data; + int length; + if (!pickle.ReadData(pickle_iter, &data, &length)) + return NULL; + + OSCertHandle cert_handle = NULL; + if (!CertAddSerializedElementToStore( + NULL, // the cert won't be persisted in any cert store + reinterpret_cast<const BYTE*>(data), length, + CERT_STORE_ADD_USE_EXISTING, 0, CERT_STORE_CERTIFICATE_CONTEXT_FLAG, + NULL, reinterpret_cast<const void **>(&cert_handle))) + return NULL; + + return CreateFromHandle(cert_handle); +} + +X509Certificate::X509Certificate(OSCertHandle cert_handle) + : cert_handle_(cert_handle) { + Initialize(); +} + +X509Certificate::X509Certificate(std::string subject, std::string issuer, + Time start_date, Time expiration_date) + : subject_(subject), + issuer_(issuer), + valid_start_(start_date), + valid_expiry_(expiration_date), + cert_handle_(NULL) { + memset(fingerprint_.data, 0, sizeof(fingerprint_.data)); +} + +void X509Certificate::Persist(Pickle* pickle) { + DWORD length; + if (!CertSerializeCertificateStoreElement(cert_handle_, 0, + NULL, &length)) { + NOTREACHED(); + return; + } + BYTE* data = reinterpret_cast<BYTE*>(pickle->BeginWriteData(length)); + if (!CertSerializeCertificateStoreElement(cert_handle_, 0, + data, &length)) { + NOTREACHED(); + length = 0; + } + pickle->TrimWriteData(length); +} + +X509Certificate::~X509Certificate() { + // We might not be in the cache, but it is safe to remove ourselves anyway. + X509Certificate::Cache::GetInstance()->Remove(this); + if (cert_handle_) + CertFreeCertificateContext(cert_handle_); +} + +void X509Certificate::GetDNSNames(std::vector<std::string>* dns_names) const { + dns_names->clear(); + scoped_ptr_malloc<CERT_ALT_NAME_INFO> alt_name_info; + GetCertSubjectAltName(cert_handle_, &alt_name_info); + CERT_ALT_NAME_INFO* alt_name = alt_name_info.get(); + if (alt_name) { + int num_entries = alt_name->cAltEntry; + for (int i = 0; i < num_entries; i++) { + // dNSName is an ASN.1 IA5String representing a string of ASCII + // characters, so we can use WideToASCII here. + if (alt_name->rgAltEntry[i].dwAltNameChoice == CERT_ALT_NAME_DNS_NAME) + dns_names->push_back(WideToASCII(alt_name->rgAltEntry[i].pwszDNSName)); + } + } + if (dns_names->empty()) + dns_names->push_back(subject_.common_name); +} + +bool X509Certificate::HasExpired() const { + return Time::Now() > valid_expiry(); +} + +// Returns true if the certificate is an extended-validation certificate. +// +// The certificate has already been verified by the HTTP library. cert_status +// represents the result of that verification. This function performs +// additional checks of the certificatePolicies extensions of the certificates +// in the certificate chain according to Section 7 (pp. 11-12) of the EV +// Certificate Guidelines Version 1.0 at +// http://cabforum.org/EV_Certificate_Guidelines.pdf. +bool X509Certificate::IsEV(int cert_status) const { + if (net::IsCertStatusError(cert_status) || + (cert_status & net::CERT_STATUS_REV_CHECKING_ENABLED) == 0) + return false; + + net::EVRootCAMetadata* metadata = net::EVRootCAMetadata::GetInstance(); + + PCCERT_CHAIN_CONTEXT chain_context = ConstructCertChain(cert_handle_, + metadata->GetPolicyOIDs(), metadata->NumPolicyOIDs()); + if (!chain_context) + return false; + ScopedCertChainContext scoped_chain_context(chain_context); + + DCHECK(chain_context->cChain != 0); + // If the cert doesn't match any of the policies, the + // CERT_TRUST_IS_NOT_VALID_FOR_USAGE bit (0x10) in + // chain_context->TrustStatus.dwErrorStatus is set. + DWORD error_status = chain_context->TrustStatus.dwErrorStatus; + DWORD info_status = chain_context->TrustStatus.dwInfoStatus; + if (!chain_context->cChain || error_status != CERT_TRUST_NO_ERROR) + return false; + + // Check the end certificate simple chain (chain_context->rgpChain[0]). + // If the end certificate's certificatePolicies extension contains the + // EV policy OID of the root CA, return true. + PCERT_CHAIN_ELEMENT* element = chain_context->rgpChain[0]->rgpElement; + int num_elements = chain_context->rgpChain[0]->cElement; + if (num_elements < 2) + return false; + + // Look up the EV policy OID of the root CA. + PCCERT_CONTEXT root_cert = element[num_elements - 1]->pCertContext; + X509Certificate::Fingerprint fingerprint = CalculateFingerprint(root_cert); + std::string ev_policy_oid; + if (!metadata->GetPolicyOID(fingerprint, &ev_policy_oid)) + return false; + DCHECK(!ev_policy_oid.empty()); + + // Get the certificatePolicies extension of the end certificate. + PCCERT_CONTEXT end_cert = element[0]->pCertContext; + scoped_ptr_malloc<CERT_POLICIES_INFO> policies_info; + GetCertPoliciesInfo(end_cert, &policies_info); + if (!policies_info.get()) + return false; + + return ContainsPolicy(policies_info.get(), ev_policy_oid.c_str()); +} + +// static +void X509Certificate::ParsePrincipal(const std::string& description, + Principal* principal) { + // The description of the principal is a string with each LDAP value on + // a separate line. + const std::string kDelimiters("\r\n"); + + std::vector<std::string> common_names, locality_names, state_names, + country_names, street_addresses; + + // TODO(jcampan): add business_category and serial_number. + const std::string kPrefixes[8] = { std::string("CN="), + std::string("L="), + std::string("S="), + std::string("C="), + std::string("STREET="), + std::string("O="), + std::string("OU="), + std::string("DC=") }; + + std::vector<std::string>* values[8] = { + &common_names, &locality_names, + &state_names, &country_names, + &(principal->street_addresses), + &(principal->organization_names), + &(principal->organization_unit_names), + &(principal->domain_components) }; + DCHECK(arraysize(kPrefixes) == arraysize(values)); + + StringTokenizer str_tok(description, kDelimiters); + while (str_tok.GetNext()) { + std::string entry = str_tok.token(); + for (int i = 0; i < arraysize(kPrefixes); i++) { + if (!entry.compare(0, kPrefixes[i].length(), kPrefixes[i])) { + std::string value = entry.substr(kPrefixes[i].length()); + // Remove enclosing double-quotes if any. + if (value.size() >= 2 && + value[0] == '"' && value[value.size() - 1] == '"') + value = value.substr(1, value.size() - 2); + values[i]->push_back(value); + break; + } + } + } + + // We don't expect to have more than one CN, L, S, and C. + std::vector<std::string>* single_value_lists[4] = { + &common_names, &locality_names, &state_names, &country_names }; + std::string* single_values[4] = { + &principal->common_name, &principal->locality_name, + &principal->state_or_province_name, &principal->country_name }; + for (int i = 0; i < arraysize(single_value_lists); ++i) { + int length = static_cast<int>(single_value_lists[i]->size()); + DCHECK(single_value_lists[i]->size() <= 1); + if (single_value_lists[i]->size() > 0) + *(single_values[i]) = (*(single_value_lists[i]))[0]; + } +} + +X509Certificate::Policy::Judgment X509Certificate::Policy::Check( + X509Certificate* cert) const { + // It shouldn't matter which set we check first, but we check denied first + // in case something strange has happened. + + if (denied_.find(cert->fingerprint()) != denied_.end()) { + // DCHECK that the order didn't matter. + DCHECK(allowed_.find(cert->fingerprint()) == allowed_.end()); + return DENIED; + } + + if (allowed_.find(cert->fingerprint()) != allowed_.end()) { + // DCHECK that the order didn't matter. + DCHECK(denied_.find(cert->fingerprint()) == denied_.end()); + return ALLOWED; + } + + // We don't have a policy for this cert. + return UNKNOWN; +} + +void X509Certificate::Policy::Allow(X509Certificate* cert) { + // Put the cert in the allowed set and (maybe) remove it from the denied set. + denied_.erase(cert->fingerprint()); + allowed_.insert(cert->fingerprint()); +} + +void X509Certificate::Policy::Deny(X509Certificate* cert) { + // Put the cert in the denied set and (maybe) remove it from the allowed set. + allowed_.erase(cert->fingerprint()); + denied_.insert(cert->fingerprint()); +} diff --git a/net/base/x509_certificate.h b/net/base/x509_certificate.h new file mode 100644 index 0000000..217b331 --- /dev/null +++ b/net/base/x509_certificate.h @@ -0,0 +1,213 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef CHROME_COMMON_NET_X509_CERTIFICATE_H__ +#define CHROME_COMMON_NET_X509_CERTIFICATE_H__ + +#include <windows.h> +#include <wincrypt.h> + +#include <set> +#include <string> +#include <vector> + +#include "base/ref_counted.h" +#include "base/time.h" + +class Pickle; + +// X509Certificate represents an X.509 certificate used by SSL. +class X509Certificate : public base::RefCountedThreadSafe<X509Certificate> { + public: + // SHA-1 fingerprint (160 bits) of a certificate. + struct Fingerprint { + unsigned char data[20]; + }; + + class FingerprintLessThan + : public std::binary_function<Fingerprint, Fingerprint, bool> { + public: + bool operator() (const Fingerprint& lhs, const Fingerprint& rhs) const; + }; + + // Predicate functor used in maps when X509Certificate is used as the key. + class LessThan + : public std::binary_function<X509Certificate*, X509Certificate*, bool> { + public: + bool operator() (X509Certificate* lhs, X509Certificate* rhs) const; + }; + + typedef PCCERT_CONTEXT OSCertHandle; + + // Principal represent an X.509 principal. + struct Principal { + Principal() { } + explicit Principal(std::string name) : common_name(name) { } + + // The different attributes for a principal. They may be "". + // Note that some of them can have several values. + + std::string common_name; + std::string locality_name; + std::string state_or_province_name; + std::string country_name; + + std::vector<std::string> street_addresses; + std::vector<std::string> organization_names; + std::vector<std::string> organization_unit_names; + std::vector<std::string> domain_components; + }; + + // This class is useful for maintaining policies about which certificates are + // permitted or forbidden for a particular purpose. + class Policy { + public: + // The judgments this policy can reach. + enum Judgment { + // We don't have policy information for this certificate. + UNKNOWN, + + // This certificate is allowed. + ALLOWED, + + // This certificate is denied. + DENIED, + }; + + // Returns the judgment this policy makes about this certificate. + Judgment Check(X509Certificate* cert) const; + + // Causes the policy to allow this certificate. + void Allow(X509Certificate* cert); + + // Causes the policy to deny this certificate. + void Deny(X509Certificate* cert); + + private: + // The set of fingerprints of allowed certificates. + std::set<Fingerprint, FingerprintLessThan> allowed_; + + // The set of fingerprints of denied certificates. + std::set<Fingerprint, FingerprintLessThan> denied_; + }; + + // Create an X509Certificate from a handle to the certificate object + // in the underlying crypto library. + static X509Certificate* CreateFromHandle(OSCertHandle cert_handle); + + // Create an X509Certificate from the representation stored in the given + // pickle. The data for this object is found relative to the given + // pickle_iter, which should be passed to the pickle's various Read* methods. + static X509Certificate* CreateFromPickle(const Pickle& pickle, + void** pickle_iter); + + // Creates a X509Certificate from the ground up. Used by tests that simulate + // SSL connections. + X509Certificate(std::string subject, std::string issuer, + Time start_date, Time expiration_date); + + // Appends a representation of this object to the given pickle. + void Persist(Pickle* pickle); + + // The subject of the certificate. For HTTPS server certificates, this + // represents the web server. The common name of the subject should match + // the host name of the web server. + const Principal& subject() const { return subject_; } + + // The issuer of the certificate. + const Principal& issuer() const { return issuer_; } + + // Time period during which the certificate is valid. More precisely, this + // certificate is invalid before the |valid_start| date and invalid after + // the |valid_expiry| date. + // If we were unable to parse either date from the certificate (or if the cert + // lacks either date), the date will be null (i.e., is_null() will be true). + const Time& valid_start() const { return valid_start_; } + const Time& valid_expiry() const { return valid_expiry_; } + + // The fingerprint of this certificate. + const Fingerprint& fingerprint() const { return fingerprint_; } + + // Gets the DNS names in the certificate. Pursuant to RFC 2818, Section 3.1 + // Server Identity, if the certificate has a subjectAltName extension of + // type dNSName, this method gets the DNS names in that extension. + // Otherwise, it gets the common name in the subject field. + void GetDNSNames(std::vector<std::string>* dns_names) const; + + // Convenience method that returns whether this certificate has expired as of + // now. + bool HasExpired() const; + + // Returns true if the certificate is an extended-validation (EV) + // certificate. + bool IsEV(int cert_status) const; + + OSCertHandle os_cert_handle() const { return cert_handle_; } + + private: + // A cache of X509Certificate objects. + class Cache; + + // Construct an X509Certificate from a handle to the certificate object + // in the underlying crypto library. + explicit X509Certificate(OSCertHandle cert_handle); + + friend RefCountedThreadSafe<X509Certificate>; + ~X509Certificate(); + + // Common object initialization code. Called by the constructors only. + void Initialize(); + + // Helper function to parse a principal from a WinInet description of that + // principal. + static void ParsePrincipal(const std::string& description, + Principal* principal); + + // The subject of the certificate. + Principal subject_; + + // The issuer of the certificate. + Principal issuer_; + + // This certificate is not valid before |valid_start_| + Time valid_start_; + + // This certificate is not valid after |valid_expiry_| + Time valid_expiry_; + + // The fingerprint of this certificate. + Fingerprint fingerprint_; + + // A handle to the certificate object in the underlying crypto library. + OSCertHandle cert_handle_; + + DISALLOW_EVIL_CONSTRUCTORS(X509Certificate); +}; + +#endif // CHROME_COMMON_NET_X509_CERTIFICATE_H__ |