diff options
author | ben@chromium.org <ben@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-01-16 21:47:27 +0000 |
---|---|---|
committer | ben@chromium.org <ben@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-01-16 21:47:27 +0000 |
commit | f870a32e61397ace3ed5d9da43963e7e1884d6be (patch) | |
tree | e3c58b9c9fe469d7bd87c0e4eff6eb8b84e0d883 /chrome/browser/net | |
parent | e7180497f2cb59414d06b944ccea5ceffd3c8e51 (diff) | |
download | chromium_src-f870a32e61397ace3ed5d9da43963e7e1884d6be.zip chromium_src-f870a32e61397ace3ed5d9da43963e7e1884d6be.tar.gz chromium_src-f870a32e61397ace3ed5d9da43963e7e1884d6be.tar.bz2 |
Move url_* to net subdir
Review URL: http://codereview.chromium.org/18305
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@8224 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/net')
-rw-r--r-- | chrome/browser/net/sdch_dictionary_fetcher.h | 2 | ||||
-rw-r--r-- | chrome/browser/net/url_fetcher.cc | 182 | ||||
-rw-r--r-- | chrome/browser/net/url_fetcher.h | 219 | ||||
-rw-r--r-- | chrome/browser/net/url_fetcher_protect.cc | 173 | ||||
-rw-r--r-- | chrome/browser/net/url_fetcher_protect.h | 145 | ||||
-rw-r--r-- | chrome/browser/net/url_fetcher_unittest.cc | 479 | ||||
-rw-r--r-- | chrome/browser/net/url_fixer_upper.cc | 439 | ||||
-rw-r--r-- | chrome/browser/net/url_fixer_upper.h | 54 | ||||
-rw-r--r-- | chrome/browser/net/url_fixer_upper_unittest.cc | 334 |
9 files changed, 2026 insertions, 1 deletions
diff --git a/chrome/browser/net/sdch_dictionary_fetcher.h b/chrome/browser/net/sdch_dictionary_fetcher.h index b2169b8..b943a24 100644 --- a/chrome/browser/net/sdch_dictionary_fetcher.h +++ b/chrome/browser/net/sdch_dictionary_fetcher.h @@ -14,7 +14,7 @@ #include "base/compiler_specific.h" #include "base/task.h" -#include "chrome/browser/url_fetcher.h" +#include "chrome/browser/net/url_fetcher.h" #include "net/base/sdch_manager.h" class SdchDictionaryFetcher : public URLFetcher::Delegate, diff --git a/chrome/browser/net/url_fetcher.cc b/chrome/browser/net/url_fetcher.cc new file mode 100644 index 0000000..b7ea8cd --- /dev/null +++ b/chrome/browser/net/url_fetcher.cc @@ -0,0 +1,182 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/net/url_fetcher.h" + +#include "base/compiler_specific.h" +#include "base/string_util.h" +#include "base/thread.h" +#include "chrome/browser/browser_process.h" +#include "chrome/browser/chrome_thread.h" +#include "googleurl/src/gurl.h" +#include "net/base/load_flags.h" + +URLFetcher::URLFetcher(const GURL& url, + RequestType request_type, + Delegate* d) + : ALLOW_THIS_IN_INITIALIZER_LIST( + core_(new Core(this, url, request_type, d))) { +} + +URLFetcher::~URLFetcher() { + core_->Stop(); +} + +URLFetcher::Core::Core(URLFetcher* fetcher, + const GURL& original_url, + RequestType request_type, + URLFetcher::Delegate* d) + : fetcher_(fetcher), + original_url_(original_url), + request_type_(request_type), + delegate_(d), + delegate_loop_(MessageLoop::current()), + io_loop_(ChromeThread::GetMessageLoop(ChromeThread::IO)), + request_(NULL), + load_flags_(net::LOAD_NORMAL), + response_code_(-1), + protect_entry_(URLFetcherProtectManager::GetInstance()->Register( + original_url_.host())), + num_retries_(0) { +} + +void URLFetcher::Core::Start() { + DCHECK(delegate_loop_); + DCHECK(io_loop_); + DCHECK(request_context_) << "We need an URLRequestContext!"; + io_loop_->PostDelayedTask(FROM_HERE, NewRunnableMethod( + this, &Core::StartURLRequest), + protect_entry_->UpdateBackoff(URLFetcherProtectEntry::SEND)); +} + +void URLFetcher::Core::Stop() { + DCHECK_EQ(MessageLoop::current(), delegate_loop_); + delegate_ = NULL; + io_loop_->PostTask(FROM_HERE, NewRunnableMethod( + this, &Core::CancelURLRequest)); +} + +void URLFetcher::Core::OnResponseStarted(URLRequest* request) { + DCHECK(request == request_); + DCHECK(MessageLoop::current() == io_loop_); + if (request_->status().is_success()) { + response_code_ = request_->GetResponseCode(); + response_headers_ = request_->response_headers(); + } + + int bytes_read = 0; + // Some servers may treat HEAD requests as GET requests. To free up the + // network connection as soon as possible, signal that the request has + // completed immediately, without trying to read any data back (all we care + // about is the response code and headers, which we already have). + if (request_->status().is_success() && (request_type_ != HEAD)) + request_->Read(buffer_, sizeof(buffer_), &bytes_read); + OnReadCompleted(request_, bytes_read); +} + +void URLFetcher::Core::OnReadCompleted(URLRequest* request, int bytes_read) { + DCHECK(request == request_); + DCHECK(MessageLoop::current() == io_loop_); + + url_ = request->url(); + + do { + if (!request_->status().is_success() || bytes_read <= 0) + break; + data_.append(buffer_, bytes_read); + } while (request_->Read(buffer_, sizeof(buffer_), &bytes_read)); + + if (request_->status().is_success()) + request_->GetResponseCookies(&cookies_); + + // See comments re: HEAD requests in OnResponseStarted(). + if (!request_->status().is_io_pending() || (request_type_ == HEAD)) { + delegate_loop_->PostTask(FROM_HERE, NewRunnableMethod( + this, &Core::OnCompletedURLRequest, request_->status())); + delete request_; + request_ = NULL; + } +} + +void URLFetcher::Core::StartURLRequest() { + DCHECK(MessageLoop::current() == io_loop_); + DCHECK(!request_); + + request_ = new URLRequest(original_url_, this); + request_->set_load_flags( + request_->load_flags() | net::LOAD_DISABLE_INTERCEPT | load_flags_); + request_->set_context(request_context_.get()); + + switch (request_type_) { + case GET: + break; + + case POST: + DCHECK(!upload_content_.empty()); + DCHECK(!upload_content_type_.empty()); + + request_->set_method("POST"); + if (!extra_request_headers_.empty()) + extra_request_headers_ += "\r\n"; + StringAppendF(&extra_request_headers_, + "Content-Type: %s", upload_content_type_.c_str()); + request_->AppendBytesToUpload(upload_content_.data(), + static_cast<int>(upload_content_.size())); + break; + + case HEAD: + request_->set_method("HEAD"); + break; + + default: + NOTREACHED(); + } + + if (!extra_request_headers_.empty()) + request_->SetExtraRequestHeaders(extra_request_headers_); + + request_->Start(); +} + +void URLFetcher::Core::CancelURLRequest() { + DCHECK(MessageLoop::current() == io_loop_); + if (request_) { + request_->Cancel(); + delete request_; + request_ = NULL; + } + // Release the reference to the request context. There could be multiple + // references to URLFetcher::Core at this point so it may take a while to + // delete the object, but we cannot delay the destruction of the request + // context. + request_context_ = NULL; +} + +void URLFetcher::Core::OnCompletedURLRequest(const URLRequestStatus& status) { + DCHECK(MessageLoop::current() == delegate_loop_); + + // Checks the response from server. + if (response_code_ >= 500) { + // When encountering a server error, we will send the request again + // after backoff time. + const int wait = + protect_entry_->UpdateBackoff(URLFetcherProtectEntry::FAILURE); + ++num_retries_; + // Restarts the request if we still need to notify the delegate. + if (delegate_) { + if (num_retries_ <= protect_entry_->max_retries()) { + io_loop_->PostDelayedTask(FROM_HERE, NewRunnableMethod( + this, &Core::StartURLRequest), wait); + } else { + delegate_->OnURLFetchComplete(fetcher_, url_, status, response_code_, + cookies_, data_); + } + } + } else { + protect_entry_->UpdateBackoff(URLFetcherProtectEntry::SUCCESS); + if (delegate_) + delegate_->OnURLFetchComplete(fetcher_, url_, status, response_code_, + cookies_, data_); + } +} diff --git a/chrome/browser/net/url_fetcher.h b/chrome/browser/net/url_fetcher.h new file mode 100644 index 0000000..7e4c132 --- /dev/null +++ b/chrome/browser/net/url_fetcher.h @@ -0,0 +1,219 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// This file contains URLFetcher, a wrapper around URLRequest that handles +// low-level details like thread safety, ref counting, and incremental buffer +// reading. This is useful for callers who simply want to get the data from a +// URL and don't care about all the nitty-gritty details. + +#ifndef CHROME_BROWSER_URL_FETCHER_H__ +#define CHROME_BROWSER_URL_FETCHER_H__ + +#include "base/message_loop.h" +#include "base/ref_counted.h" +#include "chrome/browser/net/url_fetcher_protect.h" +#include "net/url_request/url_request.h" + +class URLRequestContext; + +// To use this class, create an instance with the desired URL and a pointer to +// the object to be notified when the URL has been loaded: +// URLFetcher* fetcher = new URLFetcher("http://www.google.com", this); +// +// Then, optionally set properties on this object, like the request context or +// extra headers: +// fetcher->SetExtraRequestHeaders("X-Foo: bar"); +// +// Finally, start the request: +// fetcher->Start(); +// +// The object you supply as a delegate must inherit from URLFetcher::Delegate; +// when the fetch is completed, OnURLFetchComplete() will be called with the +// resulting status and (if applicable) HTTP response code. From that point +// until the original URLFetcher instance is destroyed, you may examine the +// provided status and data for the URL. (You should copy these objects if you +// need them to live longer than the URLFetcher instance.) If the URLFetcher +// instance is destroyed before the callback happens, the fetch will be +// canceled and no callback will occur. +// +// You may create the URLFetcher instance on any thread; OnURLFetchComplete() +// will be called back on the same thread you use to create the instance. +// +// NOTE: Take extra care when using URLFetcher in services that live on the +// BrowserProcess; all URLFetcher instances need to be destroyed before +// the IO thread goes away, since the URLFetcher destructor requests an +// InvokeLater operation on that thread. +// +// NOTE: URLFetcher requests will NOT be intercepted. + +class URLFetcher { + public: + enum RequestType { + GET, + POST, + HEAD, + }; + + class Delegate { + public: + // This will be called when the URL has been fetched, successfully or not. + // |response_code| is the HTTP response code (200, 404, etc.) if + // applicable. |url|, |status| and |data| are all valid until the + // URLFetcher instance is destroyed. + virtual void OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data) = 0; + }; + + // |url| is the URL to send the request to. + // |request_type| is the type of request to make. + // |d| the object that will receive the callback on fetch completion. + URLFetcher(const GURL& url, RequestType request_type, Delegate* d); + + // This should only be used by unittests, where g_browser_process->io_thread() + // does not exist and we must specify an alternate loop. Unfortunately, we + // can't put it under #ifdef UNIT_TEST since some callers (which themselves + // should only be reached in unit tests) use this. See + // chrome/browser/feeds/feed_manager.cc. + void set_io_loop(MessageLoop* io_loop) { + core_->io_loop_ = io_loop; + } + + // Sets data only needed by POSTs. All callers making POST requests should + // call this before the request is started. |upload_content_type| is the MIME + // type of the content, while |upload_content| is the data to be sent (the + // Content-Length header value will be set to the length of this data). + void set_upload_data(const std::string& upload_content_type, + const std::string& upload_content) { + core_->upload_content_type_ = upload_content_type; + core_->upload_content_ = upload_content; + } + + // Set one or more load flags as defined in net/base/load_flags.h. Must be + // called before the request is started. + void set_load_flags(int load_flags) { + core_->load_flags_ = load_flags; + } + + // Set extra headers on the request. Must be called before the request + // is started. + void set_extra_request_headers(const std::string& extra_request_headers) { + core_->extra_request_headers_ = extra_request_headers; + } + + // Set the URLRequestContext on the request. Must be called before the + // request is started. + void set_request_context(URLRequestContext* request_context) { + core_->request_context_ = request_context; + } + + // Retrieve the response headers from the request. Must only be called after + // the OnURLFetchComplete callback has run. + net::HttpResponseHeaders* response_headers() const { + return core_->response_headers_; + } + + // Start the request. After this is called, you may not change any other + // settings. + void Start() { core_->Start(); } + + // Return the URL that this fetcher is processing. + const GURL& url() const { + return core_->url_; + } + + ~URLFetcher(); + + private: + // This class is the real guts of URLFetcher. + // + // When created, delegate_loop_ is set to the message loop of the current + // thread, while io_loop_ is set to the message loop of the IO thread. These + // are used to ensure that all handling of URLRequests happens on the IO + // thread (since that class is not currently threadsafe and relies on + // underlying Microsoft APIs that we don't know to be threadsafe), while + // keeping the delegate callback on the delegate's thread. + class Core : public base::RefCountedThreadSafe<URLFetcher::Core>, + public URLRequest::Delegate { + public: + // For POST requests, set |content_type| to the MIME type of the content + // and set |content| to the data to upload. |flags| are flags to apply to + // the load operation--these should be one or more of the LOAD_* flags + // defined in url_request.h. + Core(URLFetcher* fetcher, + const GURL& original_url, + RequestType request_type, + URLFetcher::Delegate* d); + + // Starts the load. It's important that this not happen in the constructor + // because it causes the IO thread to begin AddRef()ing and Release()ing + // us. If our caller hasn't had time to fully construct us and take a + // reference, the IO thread could interrupt things, run a task, Release() + // us, and destroy us, leaving the caller with an already-destroyed object + // when construction finishes. + void Start(); + + // Stops any in-progress load and ensures no callback will happen. It is + // safe to call this multiple times. + void Stop(); + + // URLRequest::Delegate implementations + virtual void OnReceivedRedirect(URLRequest* request, + const GURL& new_url) { } + virtual void OnResponseStarted(URLRequest* request); + virtual void OnReadCompleted(URLRequest* request, int bytes_read); + + private: + // Wrapper functions that allow us to ensure actions happen on the right + // thread. + void StartURLRequest(); + void CancelURLRequest(); + void OnCompletedURLRequest(const URLRequestStatus& status); + + URLFetcher* fetcher_; // Corresponding fetcher object + GURL original_url_; // The URL we were asked to fetch + GURL url_; // The URL we eventually wound up at + RequestType request_type_; // What type of request is this? + URLFetcher::Delegate* delegate_; // Object to notify on completion + MessageLoop* delegate_loop_; // Message loop of the creating thread + MessageLoop* io_loop_; // Message loop of the IO thread + URLRequest* request_; // The actual request this wraps + int load_flags_; // Flags for the load operation + int response_code_; // HTTP status code for the request + std::string data_; // Results of the request + char buffer_[4096]; // Read buffer + scoped_refptr<URLRequestContext> request_context_; + // Cookie/cache info for the request + ResponseCookies cookies_; // Response cookies + std::string extra_request_headers_;// Extra headers for the request, if any + scoped_refptr<net::HttpResponseHeaders> response_headers_; + + std::string upload_content_; // HTTP POST payload + std::string upload_content_type_; // MIME type of POST payload + + // The overload protection entry for this URL. This is used to + // incrementally back off how rapidly we'll send requests to a particular + // URL, to avoid placing too much demand on the remote resource. We update + // this with the status of all requests as they return, and in turn use it + // to determine how long to wait before making another request. + URLFetcherProtectEntry* protect_entry_; + // |num_retries_| indicates how many times we've failed to successfully + // fetch this URL. Once this value exceeds the maximum number of retries + // specified by the protection manager, we'll give up. + int num_retries_; + + friend class URLFetcher; + DISALLOW_EVIL_CONSTRUCTORS(Core); + }; + + scoped_refptr<Core> core_; + + DISALLOW_EVIL_CONSTRUCTORS(URLFetcher); +}; + +#endif // CHROME_BROWSER_URL_FETCHER_H__ + diff --git a/chrome/browser/net/url_fetcher_protect.cc b/chrome/browser/net/url_fetcher_protect.cc new file mode 100644 index 0000000..5b7b208 --- /dev/null +++ b/chrome/browser/net/url_fetcher_protect.cc @@ -0,0 +1,173 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/net/url_fetcher_protect.h" + +// URLFetcherProtectEntry ---------------------------------------------------- + +using base::TimeDelta; +using base::TimeTicks; + +// Default parameters. Time is in milliseconds. +// static +const int URLFetcherProtectEntry::kDefaultSlidingWindowPeriod = 2000; + +const int URLFetcherProtectEntry::kDefaultMaxSendThreshold = 20; +const int URLFetcherProtectEntry::kDefaultMaxRetries = 0; + +const int URLFetcherProtectEntry::kDefaultInitialTimeout = 100; +const double URLFetcherProtectEntry::kDefaultMultiplier = 2.0; +const int URLFetcherProtectEntry::kDefaultConstantFactor = 100; +const int URLFetcherProtectEntry::kDefaultMaximumTimeout = 60000; + + +URLFetcherProtectEntry::URLFetcherProtectEntry() + : sliding_window_period_(kDefaultSlidingWindowPeriod), + max_send_threshold_(kDefaultMaxSendThreshold), + max_retries_(kDefaultMaxRetries), + initial_timeout_(kDefaultInitialTimeout), + multiplier_(kDefaultMultiplier), + constant_factor_(kDefaultConstantFactor), + maximum_timeout_(kDefaultMaximumTimeout) { + ResetBackoff(); +} + +URLFetcherProtectEntry::URLFetcherProtectEntry(int sliding_window_period, + int max_send_threshold, + int max_retries, + int initial_timeout, + double multiplier, + int constant_factor, + int maximum_timeout) + : sliding_window_period_(sliding_window_period), + max_send_threshold_(max_send_threshold), + max_retries_(max_retries), + initial_timeout_(initial_timeout), + multiplier_(multiplier), + constant_factor_(constant_factor), + maximum_timeout_(maximum_timeout) { + ResetBackoff(); +} + +int URLFetcherProtectEntry::UpdateBackoff(EventType event_type) { + // request may be sent in different threads + AutoLock lock(lock_); + + TimeDelta t; + switch (event_type) { + case SEND: + t = AntiOverload(); + break; + case SUCCESS: + t = ResetBackoff(); + break; + case FAILURE: + t = IncreaseBackoff(); + break; + default: + NOTREACHED(); + } + + int wait = static_cast<int>(t.InMilliseconds()); + DCHECK(wait >= 0); + return wait; +} + +TimeDelta URLFetcherProtectEntry::AntiOverload() { + TimeDelta sw = TimeDelta::FromMilliseconds(sliding_window_period_); + TimeTicks now = TimeTicks::Now(); + // Estimate when the next request will be sent. + release_time_ = now; + if (send_log_.size() > 0) { + release_time_ = std::max(release_time_, send_log_.back()); + } + // Checks if there are too many send events in recent time. + if (send_log_.size() >= static_cast<unsigned>(max_send_threshold_)) { + release_time_ = std::max(release_time_, send_log_.front() + sw); + } + // Logs the new send event. + send_log_.push(release_time_); + // Drops the out-of-date events in the event list. + while (!send_log_.empty() && + (send_log_.front() + sw <= send_log_.back())) { + send_log_.pop(); + } + return release_time_ - now; +} + +TimeDelta URLFetcherProtectEntry::ResetBackoff() { + timeout_period_ = initial_timeout_; + release_time_ = TimeTicks::Now(); + return TimeDelta::FromMilliseconds(0); +} + +TimeDelta URLFetcherProtectEntry::IncreaseBackoff() { + TimeTicks now = TimeTicks::Now(); + + release_time_ = std::max(release_time_, now) + + TimeDelta::FromMilliseconds(timeout_period_); + + // Calculates the new backoff time. + timeout_period_ = static_cast<int> + (multiplier_ * timeout_period_ + constant_factor_); + if (maximum_timeout_ && timeout_period_ > maximum_timeout_) + timeout_period_ = maximum_timeout_; + + return release_time_ - now; +} + +// URLFetcherProtectManager -------------------------------------------------- + +// static +scoped_ptr<URLFetcherProtectManager> URLFetcherProtectManager::protect_manager_; +Lock URLFetcherProtectManager::lock_; + +URLFetcherProtectManager::~URLFetcherProtectManager() { + // Deletes all entries + ProtectService::iterator i; + for (i = services_.begin(); i != services_.end(); ++i) { + if (i->second) + delete i->second; + } +} + +// static +URLFetcherProtectManager* URLFetcherProtectManager::GetInstance() { + AutoLock lock(lock_); + + if (protect_manager_.get() == NULL) { + protect_manager_.reset(new URLFetcherProtectManager()); + } + return protect_manager_.get(); +} + +URLFetcherProtectEntry* URLFetcherProtectManager::Register(std::string id) { + AutoLock lock(lock_); + + ProtectService::iterator i = services_.find(id); + + if (i != services_.end()) { + // The entry exists. + return i->second; + } + + // Creates a new entry. + URLFetcherProtectEntry* entry = new URLFetcherProtectEntry(); + services_[id] = entry; + return entry; +} + +URLFetcherProtectEntry* URLFetcherProtectManager::Register( + std::string id, URLFetcherProtectEntry* entry) { + AutoLock lock(lock_); + + ProtectService::iterator i = services_.find(id); + if (i != services_.end()) { + // The entry exists. + delete i->second; + } + + services_[id] = entry; + return entry; +} diff --git a/chrome/browser/net/url_fetcher_protect.h b/chrome/browser/net/url_fetcher_protect.h new file mode 100644 index 0000000..794418b --- /dev/null +++ b/chrome/browser/net/url_fetcher_protect.h @@ -0,0 +1,145 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// This file implements backoff in the suggest system so that we don't +// DOS the Suggest servers when using URLFetcher. + +#ifndef CHROME_BROWSER_URL_FETCHER_PROTECT_H__ +#define CHROME_BROWSER_URL_FETCHER_PROTECT_H__ + +#include <map> +#include <queue> +#include <string> + +#include "base/lock.h" +#include "base/logging.h" +#include "base/scoped_ptr.h" +#include "base/time.h" + + +// This class is used to manage one service's rate protection. It maintains +// a queue of connection successes and failures and analyzes the requests +// over some period of time, in order to deduce the backoff time of every +// request. +// The backoff algorithm consists of two parts. Firstly, avoid too many +// send events in a sliding window. That will prevent traffic overload. +// Secondly, exponential backoff is used when receiving an error message +// from server. Exponential backoff period is calculated using the following +// formula: +// +// initial backoff time (the first time to receive error) +// backoff = k * current_backoff + c (the second, third, ... error) +// maximum backoff time (when backoff > maximum backoff time) +// +// where |k| is the multiplier, and |c| is the constant factor. +class URLFetcherProtectEntry { + public: + enum EventType { + SEND, // request will be sent out + SUCCESS, // successful response + FAILURE // no response or error + }; + + URLFetcherProtectEntry(); + URLFetcherProtectEntry(int sliding_window_period, int max_send_threshold, + int max_retries, int initial_timeout, + double multiplier, int constant_factor, + int maximum_timeout); + + + virtual ~URLFetcherProtectEntry() { } + + // When a connection event happens, log it to the queue, and recalculate + // the timeout period. It returns the backoff time, in milliseconds, that + // indicates to the sender how long should it wait before sending the request. + // If the request is allowed to be sent immediately, the backoff time is 0. + int UpdateBackoff(EventType event_type); + + // Returns the max retries allowed. + int max_retries() const { + return max_retries_; + } + + private: + // When a request comes, calculate the release time for it. + // Returns the backoff time before sending. + base::TimeDelta AntiOverload(); + // Resets backoff when service is ok. + // Returns the backoff time before sending. + base::TimeDelta ResetBackoff(); + // Calculates new backoff when encountering a failure. + // Returns the backoff time before sending. + base::TimeDelta IncreaseBackoff(); + + // Default parameters. Time is in milliseconds. + static const int kDefaultSlidingWindowPeriod; + static const int kDefaultMaxSendThreshold; + static const int kDefaultMaxRetries; + static const int kDefaultInitialTimeout; + static const double kDefaultMultiplier; + static const int kDefaultConstantFactor; + static const int kDefaultMaximumTimeout; + + // time to consider events when checking backoff + int sliding_window_period_; + + // maximum number of requests allowed in sliding window period + int max_send_threshold_; + // maximum retris allowed + int max_retries_; + + // initial timeout on first failure + int initial_timeout_; + // factor by which to multiply on exponential backoff (e.g., 2.0) + double multiplier_; + // constant time term to add to each attempt + int constant_factor_; + // maximum amount of time between requests + int maximum_timeout_; + + // current exponential backoff period + int timeout_period_; + // time that protection is scheduled to end + base::TimeTicks release_time_; + + // Sets up a lock to ensure thread safe. + Lock lock_; + + // A list of the recent send events. We ues them to decide whether + // there are too many requests sent in sliding window. + std::queue<base::TimeTicks> send_log_; + + DISALLOW_COPY_AND_ASSIGN(URLFetcherProtectEntry); +}; + + +// This singleton class is used to manage all protect entries. +// Now we use the host name as service id. +class URLFetcherProtectManager { + public: + ~URLFetcherProtectManager(); + + // Returns the global instance of this class. + static URLFetcherProtectManager* GetInstance(); + + // Registers a new entry in this service. If the entry already exists, + // just returns it. + URLFetcherProtectEntry* Register(std::string id); + // Always registers the entry even when it exists. + URLFetcherProtectEntry* Register(std::string id, + URLFetcherProtectEntry* entry); + + private: + URLFetcherProtectManager() { } + + typedef std::map<const std::string, URLFetcherProtectEntry*> ProtectService; + + static Lock lock_; + static scoped_ptr<URLFetcherProtectManager> protect_manager_; + ProtectService services_; + + DISALLOW_COPY_AND_ASSIGN(URLFetcherProtectManager); +}; + +#endif // CHROME_BROWSER_URL_FETCHER_PROTECT_H__ diff --git a/chrome/browser/net/url_fetcher_unittest.cc b/chrome/browser/net/url_fetcher_unittest.cc new file mode 100644 index 0000000..50e24fe --- /dev/null +++ b/chrome/browser/net/url_fetcher_unittest.cc @@ -0,0 +1,479 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/thread.h" +#include "base/time.h" +#include "chrome/browser/net/url_fetcher.h" +#include "chrome/browser/net/url_fetcher_protect.h" +#if defined(OS_LINUX) +// TODO(port): ugly hack for linux +namespace ChromePluginLib { + void UnloadAllPlugins() {} +} +#else +#include "chrome/common/chrome_plugin_lib.h" +#endif +#include "net/base/ssl_test_util.h" +#include "net/url_request/url_request_unittest.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::Time; +using base::TimeDelta; + +namespace { + +const wchar_t kDocRoot[] = L"chrome/test/data"; + +class URLFetcherTest : public testing::Test, public URLFetcher::Delegate { + public: + URLFetcherTest() : fetcher_(NULL) { } + + // Creates a URLFetcher, using the program's main thread to do IO. + virtual void CreateFetcher(const GURL& url); + + // URLFetcher::Delegate + virtual void OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data); + + protected: + virtual void SetUp() { + testing::Test::SetUp(); + + // Ensure that any plugin operations done by other tests are cleaned up. + ChromePluginLib::UnloadAllPlugins(); + } + + // URLFetcher is designed to run on the main UI thread, but in our tests + // we assume that the current thread is the IO thread where the URLFetcher + // dispatches its requests to. When we wish to simulate being used from + // a UI thread, we dispatch a worker thread to do so. + MessageLoopForIO io_loop_; + + URLFetcher* fetcher_; +}; + +// Version of URLFetcherTest that does a POST instead +class URLFetcherPostTest : public URLFetcherTest { + public: + virtual void CreateFetcher(const GURL& url); + + // URLFetcher::Delegate + virtual void OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data); +}; + +// Version of URLFetcherTest that tests headers. +class URLFetcherHeadersTest : public URLFetcherTest { + public: + // URLFetcher::Delegate + virtual void OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data); +}; + +// Version of URLFetcherTest that tests overload proctection. +class URLFetcherProtectTest : public URLFetcherTest { + public: + virtual void CreateFetcher(const GURL& url); + // URLFetcher::Delegate + virtual void OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data); + private: + Time start_time_; +}; + +// Version of URLFetcherTest that tests bad HTTPS requests. +class URLFetcherBadHTTPSTest : public URLFetcherTest { + public: + URLFetcherBadHTTPSTest(); + + // URLFetcher::Delegate + virtual void OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data); + + protected: + FilePath GetExpiredCertPath(); + SSLTestUtil util_; + + private: + FilePath cert_dir_; +}; + +// Version of URLFetcherTest that tests request cancellation on shutdown. +class URLFetcherCancelTest : public URLFetcherTest { + public: + virtual void CreateFetcher(const GURL& url); + // URLFetcher::Delegate + virtual void OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data); + + void CancelRequest(); + void TestContextReleased(); + + private: + base::OneShotTimer<URLFetcherCancelTest> timer_; + bool context_released_; +}; + +// Version of TestURLRequestContext that let us know if the request context +// is properly released. +class CancelTestURLRequestContext : public TestURLRequestContext { + public: + explicit CancelTestURLRequestContext(bool* destructor_called) + : destructor_called_(destructor_called) { + *destructor_called_ = false; + } + + virtual ~CancelTestURLRequestContext() { + *destructor_called_ = true; + } + + private: + bool* destructor_called_; +}; + +// Wrapper that lets us call CreateFetcher() on a thread of our choice. We +// could make URLFetcherTest refcounted and use PostTask(FROM_HERE.. ) to call +// CreateFetcher() directly, but the ownership of the URLFetcherTest is a bit +// confusing in that case because GTest doesn't know about the refcounting. +// It's less confusing to just do it this way. +class FetcherWrapperTask : public Task { + public: + FetcherWrapperTask(URLFetcherTest* test, const GURL& url) + : test_(test), url_(url) { } + virtual void Run() { + test_->CreateFetcher(url_); + }; + + private: + URLFetcherTest* test_; + GURL url_; +}; + +void URLFetcherTest::CreateFetcher(const GURL& url) { + fetcher_ = new URLFetcher(url, URLFetcher::GET, this); + fetcher_->set_request_context(new TestURLRequestContext()); + fetcher_->set_io_loop(&io_loop_); + fetcher_->Start(); +} + +void URLFetcherTest::OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data) { + EXPECT_TRUE(status.is_success()); + EXPECT_EQ(200, response_code); // HTTP OK + EXPECT_FALSE(data.empty()); + + delete fetcher_; // Have to delete this here and not in the destructor, + // because the destructor won't necessarily run on the + // same thread that CreateFetcher() did. + + io_loop_.PostTask(FROM_HERE, new MessageLoop::QuitTask()); + // If MessageLoop::current() != io_loop_, it will be shut down when the + // main loop returns and this thread subsequently goes out of scope. +} + +void URLFetcherPostTest::CreateFetcher(const GURL& url) { + fetcher_ = new URLFetcher(url, URLFetcher::POST, this); + fetcher_->set_request_context(new TestURLRequestContext()); + fetcher_->set_io_loop(&io_loop_); + fetcher_->set_upload_data("application/x-www-form-urlencoded", + "bobsyeruncle"); + fetcher_->Start(); +} + +void URLFetcherPostTest::OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data) { + EXPECT_EQ(std::string("bobsyeruncle"), data); + URLFetcherTest::OnURLFetchComplete(source, url, status, response_code, + cookies, data); +} + +void URLFetcherHeadersTest::OnURLFetchComplete( + const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data) { + std::string header; + EXPECT_TRUE(source->response_headers()->GetNormalizedHeader("cache-control", + &header)); + EXPECT_EQ("private", header); + URLFetcherTest::OnURLFetchComplete(source, url, status, response_code, + cookies, data); +} + +void URLFetcherProtectTest::CreateFetcher(const GURL& url) { + fetcher_ = new URLFetcher(url, URLFetcher::GET, this); + fetcher_->set_request_context(new TestURLRequestContext()); + fetcher_->set_io_loop(&io_loop_); + start_time_ = Time::Now(); + fetcher_->Start(); +} + +void URLFetcherProtectTest::OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data) { + const TimeDelta one_second = TimeDelta::FromMilliseconds(1000); + if (response_code >= 500) { + // Now running ServerUnavailable test. + // It takes more than 1 second to finish all 11 requests. + EXPECT_TRUE(Time::Now() - start_time_ >= one_second); + EXPECT_TRUE(status.is_success()); + EXPECT_FALSE(data.empty()); + delete fetcher_; + io_loop_.Quit(); + } else { + // Now running Overload test. + static int count = 0; + count++; + if (count < 20) { + fetcher_->Start(); + } else { + // We have already sent 20 requests continuously. And we expect that + // it takes more than 1 second due to the overload pretection settings. + EXPECT_TRUE(Time::Now() - start_time_ >= one_second); + URLFetcherTest::OnURLFetchComplete(source, url, status, response_code, + cookies, data); + } + } +} + +URLFetcherBadHTTPSTest::URLFetcherBadHTTPSTest() { + PathService::Get(base::DIR_SOURCE_ROOT, &cert_dir_); + cert_dir_ = cert_dir_.Append(FILE_PATH_LITERAL("chrome")); + cert_dir_ = cert_dir_.Append(FILE_PATH_LITERAL("test")); + cert_dir_ = cert_dir_.Append(FILE_PATH_LITERAL("data")); + cert_dir_ = cert_dir_.Append(FILE_PATH_LITERAL("ssl")); + cert_dir_ = cert_dir_.Append(FILE_PATH_LITERAL("certificates")); +} + +// The "server certificate expired" error should result in automatic +// cancellation of the request by +// URLRequest::Delegate::OnSSLCertificateError. +void URLFetcherBadHTTPSTest::OnURLFetchComplete( + const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data) { + // This part is different from URLFetcherTest::OnURLFetchComplete + // because this test expects the request to be cancelled. + EXPECT_EQ(URLRequestStatus::CANCELED, status.status()); + EXPECT_EQ(net::ERR_ABORTED, status.os_error()); + EXPECT_EQ(-1, response_code); + EXPECT_TRUE(cookies.empty()); + EXPECT_TRUE(data.empty()); + + // The rest is the same as URLFetcherTest::OnURLFetchComplete. + delete fetcher_; + io_loop_.Quit(); +} + +FilePath URLFetcherBadHTTPSTest::GetExpiredCertPath() { + return cert_dir_.Append(FILE_PATH_LITERAL("expired_cert.pem")); +} + +void URLFetcherCancelTest::CreateFetcher(const GURL& url) { + fetcher_ = new URLFetcher(url, URLFetcher::GET, this); + fetcher_->set_request_context( + new CancelTestURLRequestContext(&context_released_)); + fetcher_->set_io_loop(&io_loop_); + fetcher_->Start(); + // Make sure we give the IO thread a chance to run. + timer_.Start(TimeDelta::FromMilliseconds(100), this, + &URLFetcherCancelTest::CancelRequest); +} + +void URLFetcherCancelTest::OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data) { + // We should have cancelled the request before completion. + ADD_FAILURE(); + delete fetcher_; + io_loop_.PostTask(FROM_HERE, new MessageLoop::QuitTask()); +} + +void URLFetcherCancelTest::CancelRequest() { + delete fetcher_; + timer_.Stop(); + // Make sure we give the IO thread a chance to run. + timer_.Start(TimeDelta::FromMilliseconds(100), this, + &URLFetcherCancelTest::TestContextReleased); +} + +void URLFetcherCancelTest::TestContextReleased() { + EXPECT_TRUE(context_released_); + timer_.Stop(); + io_loop_.PostTask(FROM_HERE, new MessageLoop::QuitTask()); +} + +} // namespace. + +TEST_F(URLFetcherTest, SameThreadsTest) { + // Create the fetcher on the main thread. Since IO will happen on the main + // thread, this will test URLFetcher's ability to do everything on one + // thread. + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot); + ASSERT_TRUE(NULL != server.get()); + + CreateFetcher(GURL(server->TestServerPage("defaultresponse"))); + + MessageLoop::current()->Run(); +} + +TEST_F(URLFetcherTest, DifferentThreadsTest) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot); + ASSERT_TRUE(NULL != server.get()); + // Create a separate thread that will create the URLFetcher. The current + // (main) thread will do the IO, and when the fetch is complete it will + // terminate the main thread's message loop; then the other thread's + // message loop will be shut down automatically as the thread goes out of + // scope. + base::Thread t("URLFetcher test thread"); + t.Start(); + t.message_loop()->PostTask(FROM_HERE, new FetcherWrapperTask(this, + GURL(server->TestServerPage("defaultresponse")))); + + MessageLoop::current()->Run(); +} + +TEST_F(URLFetcherPostTest, Basic) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot); + ASSERT_TRUE(NULL != server.get()); + CreateFetcher(GURL(server->TestServerPage("echo"))); + MessageLoop::current()->Run(); +} + +TEST_F(URLFetcherHeadersTest, Headers) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(L"net/data/url_request_unittest"); + ASSERT_TRUE(NULL != server.get()); + CreateFetcher(GURL(server->TestServerPage("files/with-headers.html"))); + MessageLoop::current()->Run(); + // The actual tests are in the URLFetcherHeadersTest fixture. +} + +TEST_F(URLFetcherProtectTest, Overload) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(kDocRoot); + ASSERT_TRUE(NULL != server.get()); + GURL url = GURL(server->TestServerPage("defaultresponse")); + + // Registers an entry for test url. It only allows 3 requests to be sent + // in 200 milliseconds. + URLFetcherProtectManager* manager = URLFetcherProtectManager::GetInstance(); + URLFetcherProtectEntry* entry = + new URLFetcherProtectEntry(200, 3, 11, 1, 2.0, 0, 256); + manager->Register(url.host(), entry); + + CreateFetcher(url); + + MessageLoop::current()->Run(); +} + +TEST_F(URLFetcherProtectTest, ServerUnavailable) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(L"chrome/test/data"); + ASSERT_TRUE(NULL != server.get()); + GURL url = GURL(server->TestServerPage("files/server-unavailable.html")); + + // Registers an entry for test url. The backoff time is calculated by: + // new_backoff = 2.0 * old_backoff + 0 + // and maximum backoff time is 256 milliseconds. + // Maximum retries allowed is set to 11. + URLFetcherProtectManager* manager = URLFetcherProtectManager::GetInstance(); + URLFetcherProtectEntry* entry = + new URLFetcherProtectEntry(200, 3, 11, 1, 2.0, 0, 256); + manager->Register(url.host(), entry); + + CreateFetcher(url); + + MessageLoop::current()->Run(); +} + +#if defined(OS_WIN) +TEST_F(URLFetcherBadHTTPSTest, BadHTTPSTest) { +#else +// TODO(port): Enable BadHTTPSTest. Currently asserts in +// URLFetcherBadHTTPSTest::OnURLFetchComplete don't pass. +TEST_F(URLFetcherBadHTTPSTest, DISABLED_BadHTTPSTest) { +#endif + scoped_refptr<HTTPSTestServer> server = + HTTPSTestServer::CreateServer(util_.kHostName, util_.kBadHTTPSPort, + kDocRoot, util_.GetExpiredCertPath().ToWStringHack()); + ASSERT_TRUE(NULL != server.get()); + + CreateFetcher(GURL(server->TestServerPage("defaultresponse"))); + + MessageLoop::current()->Run(); +} + +TEST_F(URLFetcherCancelTest, ReleasesContext) { + scoped_refptr<HTTPTestServer> server = + HTTPTestServer::CreateServer(L"chrome/test/data"); + ASSERT_TRUE(NULL != server.get()); + GURL url = GURL(server->TestServerPage("files/server-unavailable.html")); + + // Registers an entry for test url. The backoff time is calculated by: + // new_backoff = 2.0 * old_backoff + 0 + // The initial backoff is 2 seconds and maximum backoff is 4 seconds. + // Maximum retries allowed is set to 2. + URLFetcherProtectManager* manager = URLFetcherProtectManager::GetInstance(); + URLFetcherProtectEntry* entry = + new URLFetcherProtectEntry(200, 3, 2, 2000, 2.0, 0, 4000); + manager->Register(url.host(), entry); + + // Create a separate thread that will create the URLFetcher. The current + // (main) thread will do the IO, and when the fetch is complete it will + // terminate the main thread's message loop; then the other thread's + // message loop will be shut down automatically as the thread goes out of + // scope. + base::Thread t("URLFetcher test thread"); + t.Start(); + t.message_loop()->PostTask(FROM_HERE, new FetcherWrapperTask(this, url)); + + MessageLoop::current()->Run(); +} diff --git a/chrome/browser/net/url_fixer_upper.cc b/chrome/browser/net/url_fixer_upper.cc new file mode 100644 index 0000000..1aca150 --- /dev/null +++ b/chrome/browser/net/url_fixer_upper.cc @@ -0,0 +1,439 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <algorithm> +#include <windows.h> + +#include "chrome/browser/net/url_fixer_upper.h" + +#include "base/file_util.h" +#include "base/logging.h" +#include "base/string_util.h" +#include "chrome/common/gfx/text_elider.h" +#include "googleurl/src/gurl.h" +#include "googleurl/src/url_canon.h" +#include "googleurl/src/url_file.h" +#include "googleurl/src/url_parse.h" +#include "googleurl/src/url_util.h" +#include "net/base/escape.h" +#include "net/base/net_util.h" +#include "net/base/registry_controlled_domain.h" + +using namespace std; + +// does some basic fixes for input that we want to test for file-ness +static void PrepareStringForFileOps(const wstring& text, wstring* output) { + TrimWhitespace(text, TRIM_ALL, output); + replace(output->begin(), output->end(), '/', '\\'); +} + +// Tries to create a full path from |text|. If the result is valid and the +// file exists, returns true and sets |full_path| to the result. Otherwise, +// returns false and leaves |full_path| unchanged. +static bool ValidPathForFile(const wstring& text, wstring* full_path) { + wchar_t file_path[MAX_PATH]; + if (!_wfullpath(file_path, text.c_str(), MAX_PATH)) + return false; + + if (!file_util::PathExists(file_path)) + return false; + + full_path->assign(file_path); + return true; +} + +// Tries to create a file: URL from |text| if it looks like a filename, even if +// it doesn't resolve as a valid path or to an existing file. Returns true +// with a (possibly invalid) file: URL in |fixed_up_url| for input beginning +// with a drive specifier or "\\". Returns false in other cases (including +// file: URLs: these don't look like filenames), leaving fixed_up_url +// unchanged. +static wstring FixupPath(const wstring& text) { + DCHECK(text.length() >= 2); + + wstring filename; + PrepareStringForFileOps(text, &filename); + + if (filename[1] == '|') + filename[1] = ':'; + + // Here, we know the input looks like a file. + GURL file_url = net::FilePathToFileURL(filename); + if (file_url.is_valid()) + return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring()); + + // Invalid file URL, just return the input. + return text; +} + +// Checks |domain| to see if a valid TLD is already present. If not, appends +// |desired_tld| to the domain, and prepends "www." unless it's already present. +// Then modifies |fixed_up_url| to reflect the changes. +static void AddDesiredTLD(const wstring& desired_tld, + wstring* domain) { + if (desired_tld.empty() || domain->empty()) + return; + + // Check the TLD. If the return value is positive, we already have a TLD, so + // abort; if the return value is wstring::npos, there's no valid host (e.g. if + // the user pasted in garbage for which HistoryURLProvider is trying to + // suggest an exact match), so adding a TLD makes no sense. The only useful + // case is where the return value is 0 (there's a valid host with no known + // TLD). We disallow unknown registries here so users can input "mail.yahoo" + // and hit ctrl-enter to get "www.mail.yahoo.com". + const size_t registry_length = + net::RegistryControlledDomainService::GetRegistryLength(*domain, false); + if (registry_length != 0) + return; + + // Add the suffix at the end of the domain. + const size_t domain_length(domain->length()); + DCHECK(domain_length > 0); + DCHECK(desired_tld[0] != '.'); + if ((*domain)[domain_length - 1] != '.') + domain->push_back('.'); + domain->append(desired_tld); + + // Now, if the domain begins with "www.", stop. + const wstring prefix(L"www."); + if (domain->compare(0, prefix.length(), prefix) != 0) { + // Otherwise, add www. to the beginning of the URL. + domain->insert(0, prefix); + } +} + +static inline void FixupUsername(const wstring& text, + const url_parse::Component& part, + wstring* url) { + if (!part.is_valid()) + return; + + // We don't fix up the username at the moment. + url->append(text, part.begin, part.len); + // Do not append the trailing '@' because we might need to include the user's + // password. FixupURL itself will append the '@' for us. +} + +static inline void FixupPassword(const wstring& text, + const url_parse::Component& part, + wstring* url) { + if (!part.is_valid()) + return; + + // We don't fix up the password at the moment. + url->append(L":"); + url->append(text, part.begin, part.len); +} + +static void FixupHost(const wstring& text, + const url_parse::Component& part, + bool has_scheme, + const wstring& desired_tld, + wstring* url) { + if (!part.is_valid()) + return; + + // Make domain valid. + // Strip all leading dots and all but one trailing dot, unless the user only + // typed dots, in which case their input is totally invalid and we should just + // leave it unchanged. + wstring domain(text, part.begin, part.len); + const size_t first_nondot(domain.find_first_not_of('.')); + if (first_nondot != wstring::npos) { + domain.erase(0, first_nondot); + size_t last_nondot(domain.find_last_not_of('.')); + DCHECK(last_nondot != wstring::npos); + last_nondot += 2; // Point at second period in ending string + if (last_nondot < domain.length()) + domain.erase(last_nondot); + } + + // Add any user-specified TLD, if applicable. + AddDesiredTLD(desired_tld, &domain); + + url->append(domain); +} + +// Looks for a port number, including initial colon, at port_start. If +// something invalid (which cannot be fixed up) is found, like ":foo" or +// ":7:7", returns false. Otherwise, removes any extra colons +// ("::1337" -> ":1337", ":/" -> "/") and returns true. +static void FixupPort(const wstring& text, + const url_parse::Component& part, + wstring* url) { + if (!part.is_valid()) + return; + + // Look for non-digit in port and strip if found. + wstring port(text, part.begin, part.len); + for (wstring::iterator i = port.begin(); i != port.end(); ) { + if (IsAsciiDigit(*i)) + ++i; + else + i = port.erase(i); + } + + if (port.empty()) + return; // Nothing to append. + + url->append(L":"); + url->append(port); +} + +static inline void FixupPath(const wstring& text, + const url_parse::Component& part, + wstring* url) { + if (!part.is_valid() || part.len == 0) { + // We should always have a path. + url->append(L"/"); + return; + } + + // Append the path as is. + url->append(text, part.begin, part.len); +} + +static inline void FixupQuery(const wstring& text, + const url_parse::Component& part, + wstring* url) { + if (!part.is_valid()) + return; + + // We don't fix up the query at the moment. + url->append(L"?"); + url->append(text, part.begin, part.len); +} + +static inline void FixupRef(const wstring& text, + const url_parse::Component& part, + wstring* url) { + if (!part.is_valid()) + return; + + // We don't fix up the ref at the moment. + url->append(L"#"); + url->append(text, part.begin, part.len); +} + +static void OffsetComponent(int offset, url_parse::Component* part) { + DCHECK(part); + + if (part->is_valid()) { + // Offset the location of this component. + part->begin += offset; + + // This part might not have existed in the original text. + if (part->begin < 0) + part->reset(); + } +} + +static bool HasPort(const std::wstring& original_text, + const url_parse::Component& scheme_component, + const std::wstring& scheme) { + // Find the range between the ":" and the "/". + size_t port_start = scheme_component.end() + 1; + size_t port_end = port_start; + while ((port_end < original_text.length()) && + !url_parse::IsAuthorityTerminator(original_text[port_end])) + ++port_end; + if (port_end == port_start) + return false; + + // Scan the range to see if it is entirely digits. + for (size_t i = port_start; i < port_end; ++i) { + if (!IsAsciiDigit(original_text[i])) + return false; + } + + return true; +} + +wstring URLFixerUpper::SegmentURL(const wstring& text, + url_parse::Parsed* parts) { + // Initialize the result. + *parts = url_parse::Parsed(); + + wstring trimmed; + TrimWhitespace(text, TRIM_ALL, &trimmed); + if (trimmed.empty()) + return wstring(); // Nothing to segment. + + int trimmed_length = static_cast<int>(trimmed.length()); + if (url_parse::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) + || url_parse::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, false)) + return L"file"; + + // Otherwise, we need to look at things carefully. + wstring scheme; + if (url_parse::ExtractScheme(text.data(), + static_cast<int>(text.length()), + &parts->scheme)) { + // We were able to extract a scheme. Remember what we have, but we may + // decide to change our minds later. + scheme.assign(text.substr(parts->scheme.begin, parts->scheme.len)); + + if (parts->scheme.is_valid() && + // Valid schemes are ASCII-only. + (!IsStringASCII(scheme) || + // We need to fix up the segmentation for "www.example.com:/". For this + // case, we guess that schemes with a "." are not actually schemes. + (scheme.find(L".") != wstring::npos) || + // We need to fix up the segmentation for "www:123/". For this case, we + // will add an HTTP scheme later and make the URL parser happy. + // TODO(pkasting): Maybe we should try to use GURL's parser for this? + HasPort(text, parts->scheme, scheme))) + parts->scheme.reset(); + } + + // When we couldn't find a scheme in the input, we need to pick one. Normally + // we choose http, but if the URL starts with "ftp.", we match other browsers + // and choose ftp. + if (!parts->scheme.is_valid()) + scheme.assign(StartsWith(text, L"ftp.", false) ? L"ftp" : L"http"); + + // Cannonicalize the scheme. + StringToLowerASCII(&scheme); + + // Not segmenting file schemes or nonstandard schemes. + if ((scheme == L"file") || + !url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()), + url_parse::Component(0, static_cast<int>(scheme.length())))) + return scheme; + + if (parts->scheme.is_valid()) { + // Have the GURL parser do the heavy lifting for us. + url_parse::ParseStandardURL(text.data(), static_cast<int>(text.length()), + parts); + return scheme; + } + + // We need to add a scheme in order for ParseStandardURL to be happy. + // Find the first non-whitespace character. + wstring::const_iterator first_nonwhite = text.begin(); + while ((first_nonwhite != text.end()) && IsWhitespace(*first_nonwhite)) + ++first_nonwhite; + + // Construct the text to parse by inserting the scheme. + wstring inserted_text(scheme); + inserted_text.append(L"://"); + wstring text_to_parse(text.begin(), first_nonwhite); + text_to_parse.append(inserted_text); + text_to_parse.append(first_nonwhite, text.end()); + + // Have the GURL parser do the heavy lifting for us. + url_parse::ParseStandardURL(text_to_parse.data(), + static_cast<int>(text_to_parse.length()), + parts); + + // Offset the results of the parse to match the original text. + const int offset = -static_cast<int>(inserted_text.length()); + OffsetComponent(offset, &parts->scheme); + OffsetComponent(offset, &parts->username); + OffsetComponent(offset, &parts->password); + OffsetComponent(offset, &parts->host); + OffsetComponent(offset, &parts->port); + OffsetComponent(offset, &parts->path); + OffsetComponent(offset, &parts->query); + OffsetComponent(offset, &parts->ref); + + return scheme; +} + +std::wstring URLFixerUpper::FixupURL(const wstring& text, + const wstring& desired_tld) { + wstring trimmed; + TrimWhitespace(text, TRIM_ALL, &trimmed); + if (trimmed.empty()) + return wstring(); // Nothing here. + + // Segment the URL. + url_parse::Parsed parts; + wstring scheme(SegmentURL(trimmed, &parts)); + + // We handle the file scheme separately. + if (scheme == L"file") + return (parts.scheme.is_valid() ? text : FixupPath(text)); + + // For some schemes whose layouts we understand, we rebuild it. + if (url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()), + url_parse::Component(0, static_cast<int>(scheme.length())))) { + wstring url(scheme); + url.append(L"://"); + + // We need to check whether the |username| is valid because it is our + // responsibility to append the '@' to delineate the user information from + // the host portion of the URL. + if (parts.username.is_valid()) { + FixupUsername(trimmed, parts.username, &url); + FixupPassword(trimmed, parts.password, &url); + url.append(L"@"); + } + + FixupHost(trimmed, parts.host, parts.scheme.is_valid(), desired_tld, &url); + FixupPort(trimmed, parts.port, &url); + FixupPath(trimmed, parts.path, &url); + FixupQuery(trimmed, parts.query, &url); + FixupRef(trimmed, parts.ref, &url); + + return url; + } + + // In the worst-case, we insert a scheme if the URL lacks one. + if (!parts.scheme.is_valid()) { + wstring fixed_scheme(scheme); + fixed_scheme.append(L"://"); + trimmed.insert(0, fixed_scheme); + } + + return trimmed; +} + +// The rules are different here than for regular fixup, since we need to handle +// input like "hello.html" and know to look in the current directory. Regular +// fixup will look for cues that it is actually a file path before trying to +// figure out what file it is. If our logic doesn't work, we will fall back on +// regular fixup. +wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir, + const wstring& text) { + wchar_t old_cur_directory[MAX_PATH]; + if (!base_dir.empty()) { + // save the old current directory before we move to the new one + // TODO: in the future, we may want to handle paths longer than MAX_PATH + GetCurrentDirectory(MAX_PATH, old_cur_directory); + SetCurrentDirectory(base_dir.c_str()); + } + + // allow funny input with extra whitespace and the wrong kind of slashes + wstring trimmed; + PrepareStringForFileOps(text, &trimmed); + + bool is_file = true; + wstring full_path; + if (!ValidPathForFile(trimmed, &full_path)) { + // Not a path as entered, try unescaping it in case the user has + // escaped things. We need to go through 8-bit since the escaped values + // only represent 8-bit values. + std::wstring unescaped = UTF8ToWide(UnescapeURLComponent( + WideToUTF8(trimmed), + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS)); + if (!ValidPathForFile(unescaped, &full_path)) + is_file = false; + } + + // Put back the current directory if we saved it. + if (!base_dir.empty()) + SetCurrentDirectory(old_cur_directory); + + if (is_file) { + GURL file_url = net::FilePathToFileURL(full_path); + if (file_url.is_valid()) + return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring()); + // Invalid files fall through to regular processing. + } + + // Fall back on regular fixup for this input. + return FixupURL(text, L""); +} + diff --git a/chrome/browser/net/url_fixer_upper.h b/chrome/browser/net/url_fixer_upper.h new file mode 100644 index 0000000..c7f3f00 --- /dev/null +++ b/chrome/browser/net/url_fixer_upper.h @@ -0,0 +1,54 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_URL_FIXER_UPPER_H__ +#define CHROME_BROWSER_URL_FIXER_UPPER_H__ + +#include <string> + +#include "googleurl/src/url_parse.h" + +// This object is designed to convert various types of input into URLs that we +// know are valid. For example, user typing in the URL bar or command line +// options. This is NOT the place for converting between different types of +// URLs or parsing them, see net_util.h for that. +namespace URLFixerUpper { + + // Segments the given text string into parts of a URL. This is most useful + // for schemes such as http, https, and ftp where |SegmentURL| will find many + // segments. Currently does not segment "file" schemes. + std::wstring SegmentURL(const std::wstring& text, url_parse::Parsed* parts); + + // Converts |text| to a fixed-up URL and returns it. Attempts to make + // some "smart" adjustments to obviously-invalid input where possible. + // |text| may be an absolute path to a file, which will get converted to a + // "file:" URL. + // + // The result will be a "more" valid URL than the input. It may still not + // be valid, convert to a GURL for that. + // + // If |desired_tld| is non-empty, it represents the TLD the user wishes to + // append in the case of an incomplete domain. We check that this is not a + // file path and there does not appear to be a valid TLD already, then append + // |desired_tld| to the domain and prepend "www." (unless it, or a scheme, + // are already present.) This TLD should not have a leading '.' (use "com" + // instead of ".com"). + std::wstring FixupURL(const std::wstring& text, + const std::wstring& desired_tld); + + // Converts |text| to a fixed-up URL, allowing it to be a relative path on + // the local filesystem. Begin searching in |base_dir|; if empty, use the + // current working directory. If this resolves to a file on disk, convert it + // to a "file:" URL in |fixed_up_url|; otherwise, fall back to the behavior + // of FixupURL(). + // + // For "regular" input, even if it is possibly a file with a full path, you + // should use FixupURL() directly. This function should only be used when + // relative path handling is desired, as for command line processing. + std::wstring FixupRelativeFile(const std::wstring& base_dir, + const std::wstring& text); +}; + +#endif // #ifndef CHROME_BROWSER_URL_FIXER_UPPER_H__ + diff --git a/chrome/browser/net/url_fixer_upper_unittest.cc b/chrome/browser/net/url_fixer_upper_unittest.cc new file mode 100644 index 0000000..024060c --- /dev/null +++ b/chrome/browser/net/url_fixer_upper_unittest.cc @@ -0,0 +1,334 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stdlib.h> +#include <windows.h> + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/path_service.h" +#include "base/string_util.h" +#include "chrome/browser/net/url_fixer_upper.h" +#include "chrome/common/chrome_paths.h" +#include "googleurl/src/url_parse.h" +#include "googleurl/src/gurl.h" +#include "net/base/net_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + class URLFixerUpperTest : public testing::Test { + }; +}; + +std::ostream& operator<<(std::ostream& os, const url_parse::Component& part) { + return os << "(begin=" << part.begin << ", len=" << part.len << ")"; +} + +struct segment_case { + const std::wstring input; + const std::wstring result; + const url_parse::Component scheme; + const url_parse::Component username; + const url_parse::Component password; + const url_parse::Component host; + const url_parse::Component port; + const url_parse::Component path; + const url_parse::Component query; + const url_parse::Component ref; +}; + +static const segment_case segment_cases[] = { + { L"http://www.google.com/", L"http", + url_parse::Component(0, 4), // scheme + url_parse::Component(), // username + url_parse::Component(), // password + url_parse::Component(7, 14), // host + url_parse::Component(), // port + url_parse::Component(21, 1), // path + url_parse::Component(), // query + url_parse::Component(), // ref + }, + { L"aBoUt:vErSiOn", L"about", + url_parse::Component(0, 5), // scheme + url_parse::Component(), // username + url_parse::Component(), // password + url_parse::Component(), // host + url_parse::Component(), // port + url_parse::Component(), // path + url_parse::Component(), // query + url_parse::Component(), // ref + }, + { L" www.google.com:124?foo#", L"http", + url_parse::Component(), // scheme + url_parse::Component(), // username + url_parse::Component(), // password + url_parse::Component(4, 14), // host + url_parse::Component(19, 3), // port + url_parse::Component(), // path + url_parse::Component(23, 3), // query + url_parse::Component(27, 0), // ref + }, + { L"user@www.google.com", L"http", + url_parse::Component(), // scheme + url_parse::Component(0, 4), // username + url_parse::Component(), // password + url_parse::Component(5, 14), // host + url_parse::Component(), // port + url_parse::Component(), // path + url_parse::Component(), // query + url_parse::Component(), // ref + }, + { L"ftp:/user:P:a$$Wd@..ftp.google.com...::23///pub?foo#bar", L"ftp", + url_parse::Component(0, 3), // scheme + url_parse::Component(5, 4), // username + url_parse::Component(10, 7), // password + url_parse::Component(18, 20), // host + url_parse::Component(39, 2), // port + url_parse::Component(41, 6), // path + url_parse::Component(48, 3), // query + url_parse::Component(52, 3), // ref + }, +}; + +TEST(URLFixerUpperTest, SegmentURL) { + std::wstring result; + url_parse::Parsed parts; + + for (int i = 0; i < arraysize(segment_cases); ++i) { + segment_case value = segment_cases[i]; + result = URLFixerUpper::SegmentURL(value.input, &parts); + EXPECT_EQ(value.result, result); + EXPECT_EQ(value.scheme, parts.scheme); + EXPECT_EQ(value.username, parts.username); + EXPECT_EQ(value.password, parts.password); + EXPECT_EQ(value.host, parts.host); + EXPECT_EQ(value.port, parts.port); + EXPECT_EQ(value.path, parts.path); + EXPECT_EQ(value.query, parts.query); + EXPECT_EQ(value.ref, parts.ref); + } +} + +// Creates a file and returns its full name as well as the decomposed +// version. Example: +// full_path = "c:\foo\bar.txt" +// dir = "c:\foo" +// file_name = "bar.txt" +static bool MakeTempFile(const std::wstring& dir, + const std::wstring& file_name, + std::wstring* full_path) { + *full_path = dir + L"\\" + file_name; + + HANDLE hfile = CreateFile(full_path->c_str(), GENERIC_READ | GENERIC_WRITE, + 0, NULL, CREATE_ALWAYS, 0, NULL); + if (hfile == NULL || hfile == INVALID_HANDLE_VALUE) + return false; + CloseHandle(hfile); + return true; +} + +// Returns true if the given URL is a file: URL that matches the given file +static bool IsMatchingFileURL(const std::wstring& url, + const std::wstring& full_file_path) { + if (url.length() <= 8) + return false; + if (std::wstring(L"file:///") != url.substr(0, 8)) + return false; // no file:/// prefix + if (url.find('\\') != std::wstring::npos) + return false; // contains backslashes + + std::wstring derived_path; + net::FileURLToFilePath(GURL(url), &derived_path); + return (derived_path.length() == full_file_path.length()) && + std::equal(derived_path.begin(), derived_path.end(), + full_file_path.begin(), CaseInsensitiveCompare<wchar_t>()); +} + +struct fixup_case { + const std::wstring input; + const std::wstring desired_tld; + const std::wstring output; +} fixup_cases[] = { + {L"www.google.com", L"", L"http://www.google.com/"}, + {L" www.google.com ", L"", L"http://www.google.com/"}, + {L" foo.com/asdf bar", L"", L"http://foo.com/asdf bar"}, + {L"..www.google.com..", L"", L"http://www.google.com./"}, + {L"http://......", L"", L"http://....../"}, + {L"http://host.com:ninety-two/", L"", L"http://host.com/"}, + {L"http://host.com:ninety-two?foo", L"", L"http://host.com/?foo"}, + {L"google.com:123", L"", L"http://google.com:123/"}, + {L"about:", L"", L"about:"}, + {L"about:version", L"", L"about:version"}, + {L"www:123", L"", L"http://www:123/"}, + {L" www:123", L"", L"http://www:123/"}, + {L"www.google.com?foo", L"", L"http://www.google.com/?foo"}, + {L"www.google.com#foo", L"", L"http://www.google.com/#foo"}, + {L"www.google.com?", L"", L"http://www.google.com/?"}, + {L"www.google.com#", L"", L"http://www.google.com/#"}, + {L"www.google.com:123?foo#bar", L"", L"http://www.google.com:123/?foo#bar"}, + {L"user@www.google.com", L"", L"http://user@www.google.com/"}, + {L"\x6C34.com", L"", L"http://\x6C34.com/" }, + // It would be better if this next case got treated as http, but I don't see + // a clean way to guess this isn't the new-and-exciting "user" scheme. + {L"user:passwd@www.google.com:8080/", L"", L"user:passwd@www.google.com:8080/"}, + //{L"file:///c:/foo/bar%20baz.txt", L"", L"file:///C:/foo/bar%20baz.txt"}, + {L"ftp.google.com", L"", L"ftp://ftp.google.com/"}, + {L" ftp.google.com", L"", L"ftp://ftp.google.com/"}, + {L"FTP.GooGle.com", L"", L"ftp://FTP.GooGle.com/"}, + {L"ftpblah.google.com", L"", L"http://ftpblah.google.com/"}, + {L"ftp", L"", L"http://ftp/"}, + {L"google.ftp.com", L"", L"http://google.ftp.com/"}, +}; + +TEST(URLFixerUpperTest, FixupURL) { + std::wstring output; + + for (int i = 0; i < arraysize(fixup_cases); ++i) { + fixup_case value = fixup_cases[i]; + output = URLFixerUpper::FixupURL(value.input, value.desired_tld); + EXPECT_EQ(value.output, output); + } + + // Check the TLD-appending functionality + fixup_case tld_cases[] = { + {L"google", L"com", L"http://www.google.com/"}, + {L"google.", L"com", L"http://www.google.com/"}, + {L"google..", L"com", L"http://www.google.com/"}, + {L".google", L"com", L"http://www.google.com/"}, + {L"www.google", L"com", L"http://www.google.com/"}, + {L"google.com", L"com", L"http://google.com/"}, + {L"http://google", L"com", L"http://www.google.com/"}, + {L"..google..", L"com", L"http://www.google.com/"}, + {L"http://www.google", L"com", L"http://www.google.com/"}, + {L"google/foo", L"com", L"http://www.google.com/foo"}, + {L"google.com/foo", L"com", L"http://google.com/foo"}, + {L"google/?foo=.com", L"com", L"http://www.google.com/?foo=.com"}, + {L"www.google/?foo=www.", L"com", L"http://www.google.com/?foo=www."}, + {L"google.com/?foo=.com", L"com", L"http://google.com/?foo=.com"}, + {L"http://www.google.com", L"com", L"http://www.google.com/"}, + {L"google:123", L"com", L"http://www.google.com:123/"}, + {L"http://google:123", L"com", L"http://www.google.com:123/"}, + }; + for (int i = 0; i < arraysize(tld_cases); ++i) { + fixup_case value = tld_cases[i]; + output = URLFixerUpper::FixupURL(value.input, value.desired_tld); + EXPECT_EQ(value.output, output); + } +} + +// Test different types of file inputs to URIFixerUpper::FixupURL. This +// doesn't go into the nice array of fixups above since the file input +// has to exist. +TEST(URLFixerUpperTest, FixupFile) { + // this "original" filename is the one we tweak to get all the variations + std::wstring dir; + std::wstring original; + ASSERT_TRUE(PathService::Get(chrome::DIR_APP, &dir)); + ASSERT_TRUE(MakeTempFile(dir, L"url fixer upper existing file.txt", + &original)); + + // reference path + std::wstring golden = + UTF8ToWide(net::FilePathToFileURL(original).spec()); + + // c:\foo\bar.txt -> file:///c:/foo/bar.txt (basic) + std::wstring fixedup = URLFixerUpper::FixupURL(original, L""); + EXPECT_EQ(golden, fixedup); + + // c|/foo\bar.txt -> file:///c:/foo/bar.txt (pipe allowed instead of colon) + std::wstring cur(original); + EXPECT_EQ(':', cur[1]); + cur[1] = '|'; + fixedup = URLFixerUpper::FixupURL(cur, L""); + EXPECT_EQ(golden, fixedup); + + fixup_case file_cases[] = { + // File URLs go through GURL, which tries to escape intelligently. + {L"c:\\This%20is a non-existent file.txt", L"", L"file:///C:/This%2520is%20a%20non-existent%20file.txt"}, + + // \\foo\bar.txt -> file://foo/bar.txt + // UNC paths, this file won't exist, but since there are no escapes, it + // should be returned just converted to a file: URL. + {L"\\\\SomeNonexistentHost\\foo\\bar.txt", L"", L"file://somenonexistenthost/foo/bar.txt"}, + {L"//SomeNonexistentHost\\foo/bar.txt", L"", L"file://somenonexistenthost/foo/bar.txt"}, + {L"file:///C:/foo/bar", L"", L"file:///C:/foo/bar"}, + + // These are fixups we don't do, but could consider: + // + // {L"file://C:/foo/bar", L"", L"file:///C:/foo/bar"}, + // {L"file:c:", L"", L"file:///c:/"}, + // {L"file:c:WINDOWS", L"", L"file:///c:/WINDOWS"}, + // {L"file:c|Program Files", L"", L"file:///c:/Program Files"}, + // {L"file:///foo:/bar", L"", L"file://foo/bar"}, + // {L"file:/file", L"", L"file://file/"}, + // {L"file:////////c:\\foo", L"", L"file:///c:/foo"}, + // {L"file://server/folder/file", L"", L"file://server/folder/file"}, + // {L"file:/\\/server\\folder/file", L"", L"file://server/folder/file"}, + }; + for (int i = 0; i < arraysize(file_cases); i++) { + fixedup = URLFixerUpper::FixupURL(file_cases[i].input, + file_cases[i].desired_tld); + EXPECT_EQ(file_cases[i].output, fixedup); + } + + EXPECT_TRUE(DeleteFile(original.c_str())); +} + +TEST(URLFixerUpperTest, FixupRelativeFile) { + std::wstring full_path, dir; + std::wstring file_part(L"url_fixer_upper_existing_file.txt"); + ASSERT_TRUE(PathService::Get(chrome::DIR_APP, &dir)); + ASSERT_TRUE(MakeTempFile(dir, file_part, &full_path)); + + // make sure we pass through good URLs + std::wstring fixedup; + for (int i = 0; i < arraysize(fixup_cases); ++i) { + fixup_case value = fixup_cases[i]; + fixedup = URLFixerUpper::FixupRelativeFile(dir, value.input); + EXPECT_EQ(value.output, fixedup); + } + + // make sure the existing file got fixed-up to a file URL, and that there + // are no backslashes + fixedup = URLFixerUpper::FixupRelativeFile(dir, file_part); + EXPECT_PRED2(IsMatchingFileURL, fixedup, full_path); + EXPECT_TRUE(DeleteFile(full_path.c_str())); + + // create a filename we know doesn't exist and make sure it doesn't get + // fixed up to a file URL + std::wstring nonexistent_file(L"url_fixer_upper_nonexistent_file.txt"); + fixedup = URLFixerUpper::FixupRelativeFile(dir, nonexistent_file); + EXPECT_NE(std::wstring(L"file:///"), fixedup.substr(0, 8)); + EXPECT_FALSE(IsMatchingFileURL(fixedup, nonexistent_file)); + + // make a subdir to make sure relative paths with directories work, also + // test spaces: "app_dir\url fixer-upper dir\url fixer-upper existing file.txt" + std::wstring sub_dir(L"url fixer-upper dir"); + std::wstring sub_file(L"url fixer-upper existing file.txt"); + std::wstring new_dir = dir + L"\\" + sub_dir; + CreateDirectory(new_dir.c_str(), NULL); + ASSERT_TRUE(MakeTempFile(new_dir, sub_file, &full_path)); + + // test file in the subdir + std::wstring relative_file = sub_dir + L"\\" + sub_file; + fixedup = URLFixerUpper::FixupRelativeFile(dir, relative_file); + EXPECT_PRED2(IsMatchingFileURL, fixedup, full_path); + + // test file in the subdir with different slashes and escaping + relative_file = sub_dir + L"/" + sub_file; + ReplaceSubstringsAfterOffset(&relative_file, 0, L" ", L"%20"); + fixedup = URLFixerUpper::FixupRelativeFile(dir, relative_file); + EXPECT_PRED2(IsMatchingFileURL, fixedup, full_path); + + // test relative directories and duplicate slashes + // (should resolve to the same file as above) + relative_file = sub_dir + L"\\../" + sub_dir + L"\\\\\\.\\" + sub_file; + fixedup = URLFixerUpper::FixupRelativeFile(dir, relative_file); + EXPECT_PRED2(IsMatchingFileURL, fixedup, full_path); + + // done with the subdir + EXPECT_TRUE(DeleteFile(full_path.c_str())); + EXPECT_TRUE(RemoveDirectory(new_dir.c_str())); +} + |