net/proxy/proxy_script_fetcher.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365

// Copyright (c) 2008 The Chromium Authors. All rights reserved.  Use of this
// source code is governed by a BSD-style license that can be found in the
// LICENSE file.

#include "net/proxy/proxy_script_fetcher.h"

#include "base/compiler_specific.h"
#include "base/i18n/icu_string_conversions.h"
#include "base/logging.h"
#include "base/message_loop.h"
#include "base/ref_counted.h"
#include "base/string_util.h"
#include "base/utf_string_conversions.h"
#include "net/base/io_buffer.h"
#include "net/base/load_flags.h"
#include "net/base/net_errors.h"
#include "net/http/http_response_headers.h"
#include "net/url_request/url_request.h"

// TODO(eroman):
//   - Support auth-prompts.

namespace net {

namespace {

// The maximum size (in bytes) allowed for a PAC script. Responses exceeding
// this will fail with ERR_FILE_TOO_BIG.
int max_response_bytes = 1048576;  // 1 megabyte

// The maximum duration (in milliseconds) allowed for fetching the PAC script.
// Responses exceeding this will fail with ERR_TIMED_OUT.
int max_duration_ms = 300000;  // 5 minutes

// Returns true if |mime_type| is one of the known PAC mime type.
bool IsPacMimeType(const std::string& mime_type) {
  static const char * const kSupportedPacMimeTypes[] = {
    "application/x-ns-proxy-autoconfig",
    "application/x-javascript-config",
  };
  for (size_t i = 0; i < arraysize(kSupportedPacMimeTypes); ++i) {
    if (LowerCaseEqualsASCII(mime_type, kSupportedPacMimeTypes[i]))
      return true;
  }
  return false;
}

// Convert |bytes| (which is encoded by |charset|) in place to UTF8.
// If |charset| is empty, then we don't know what it was and guess.
void ConvertResponseToUTF8(const std::string& charset, std::string* bytes) {
  const char* codepage;

  if (charset.empty()) {
    // Assume ISO-8859-1 if no charset was specified.
    codepage = base::kCodepageLatin1;
  } else {
    // Otherwise trust the charset that was provided.
    codepage = charset.c_str();
  }

  // We will be generous in the conversion -- if any characters lie
  // outside of |charset| (i.e. invalid), then substitute them with
  // U+FFFD rather than failing.
  std::wstring tmp_wide;
  base::CodepageToWide(*bytes, codepage,
                       base::OnStringConversionError::SUBSTITUTE,
                       &tmp_wide);
  // TODO(eroman): would be nice to have a CodepageToUTF8() function.
  *bytes = WideToUTF8(tmp_wide);
}

}  // namespace

class ProxyScriptFetcherImpl : public ProxyScriptFetcher,
                               public URLRequest::Delegate {
 public:
  // Creates a ProxyScriptFetcher that issues requests through
  // |url_request_context|. |url_request_context| must remain valid for the
  // lifetime of ProxyScriptFetcherImpl.
  explicit ProxyScriptFetcherImpl(URLRequestContext* url_request_context);

  virtual ~ProxyScriptFetcherImpl();

  // ProxyScriptFetcher methods:

  virtual int Fetch(const GURL& url, std::string* bytes,
                    CompletionCallback* callback);
  virtual void Cancel();
  virtual URLRequestContext* GetRequestContext();

  // URLRequest::Delegate methods:

  virtual void OnAuthRequired(URLRequest* request,
                              AuthChallengeInfo* auth_info);
  virtual void OnSSLCertificateError(URLRequest* request, int cert_error,
                                     X509Certificate* cert);
  virtual void OnResponseStarted(URLRequest* request);
  virtual void OnReadCompleted(URLRequest* request, int num_bytes);
  virtual void OnResponseCompleted(URLRequest* request);

 private:
  // Read more bytes from the response.
  void ReadBody(URLRequest* request);

  // Called once the request has completed to notify the caller of
  // |response_code_| and |response_bytes_|.
  void FetchCompleted();

  // Clear out the state for the current request.
  void ResetCurRequestState();

  // Callback for time-out task of request with id |id|.
  void OnTimeout(int id);

  // Factory for creating the time-out task. This takes care of revoking
  // outstanding tasks when |this| is deleted.
  ScopedRunnableMethodFactory<ProxyScriptFetcherImpl> task_factory_;

  // The context used for making network requests.
  URLRequestContext* url_request_context_;

  // Buffer that URLRequest writes into.
  enum { kBufSize = 4096 };
  scoped_refptr<net::IOBuffer> buf_;

  // The next ID to use for |cur_request_| (monotonically increasing).
  int next_id_;

  // The current (in progress) request, or NULL.
  scoped_ptr<URLRequest> cur_request_;

  // State for current request (only valid when |cur_request_| is not NULL):

  // Unique ID for the current request.
  int cur_request_id_;

  // Callback to invoke on completion of the fetch.
  CompletionCallback* callback_;

  // Holds the error condition that was hit on the current request, or OK.
  int result_code_;

  // Holds the bytes read so far. Will not exceed |max_response_bytes|. This
  // buffer is owned by the owner of |callback|.
  std::string* result_bytes_;
};

ProxyScriptFetcherImpl::ProxyScriptFetcherImpl(
    URLRequestContext* url_request_context)
    : ALLOW_THIS_IN_INITIALIZER_LIST(task_factory_(this)),
      url_request_context_(url_request_context),
      buf_(new net::IOBuffer(kBufSize)),
      next_id_(0),
      cur_request_(NULL),
      cur_request_id_(0),
      callback_(NULL),
      result_code_(OK),
      result_bytes_(NULL) {
  DCHECK(url_request_context);
}

ProxyScriptFetcherImpl::~ProxyScriptFetcherImpl() {
  // The URLRequest's destructor will cancel the outstanding request, and
  // ensure that the delegate (this) is not called again.
}

int ProxyScriptFetcherImpl::Fetch(const GURL& url,
                                  std::string* bytes,
                                  CompletionCallback* callback) {
  // It is invalid to call Fetch() while a request is already in progress.
  DCHECK(!cur_request_.get());

  DCHECK(callback);
  DCHECK(bytes);

  cur_request_.reset(new URLRequest(url, this));
  cur_request_->set_context(url_request_context_);
  cur_request_->set_method("GET");

  // Make sure that the PAC script is downloaded using a direct connection,
  // to avoid circular dependencies (fetching is a part of proxy resolution).
  // Also disable the use of the disk cache. The cache is disabled so that if
  // the user switches networks we don't potentially use the cached response
  // from old network when we should in fact be re-fetching on the new network.
  cur_request_->set_load_flags(LOAD_BYPASS_PROXY | LOAD_DISABLE_CACHE);

  // Save the caller's info for notification on completion.
  callback_ = callback;
  result_bytes_ = bytes;
  result_bytes_->clear();

  // Post a task to timeout this request if it takes too long.
  cur_request_id_ = ++next_id_;
  MessageLoop::current()->PostDelayedTask(FROM_HERE,
      task_factory_.NewRunnableMethod(&ProxyScriptFetcherImpl::OnTimeout,
                                      cur_request_id_),
      static_cast<int>(max_duration_ms));

  // Start the request.
  cur_request_->Start();
  return ERR_IO_PENDING;
}

void ProxyScriptFetcherImpl::Cancel() {
  // ResetCurRequestState will free the URLRequest, which will cause
  // cancellation.
  ResetCurRequestState();
}

URLRequestContext* ProxyScriptFetcherImpl::GetRequestContext() {
  return url_request_context_;
}

void ProxyScriptFetcherImpl::OnAuthRequired(URLRequest* request,
                                            AuthChallengeInfo* auth_info) {
  DCHECK(request == cur_request_.get());
  // TODO(eroman):
  LOG(WARNING) << "Auth required to fetch PAC script, aborting.";
  result_code_ = ERR_NOT_IMPLEMENTED;
  request->CancelAuth();
}

void ProxyScriptFetcherImpl::OnSSLCertificateError(URLRequest* request,
                                                   int cert_error,
                                                   X509Certificate* cert) {
  DCHECK(request == cur_request_.get());
  LOG(WARNING) << "SSL certificate error when fetching PAC script, aborting.";
  // Certificate errors are in same space as net errors.
  result_code_ = cert_error;
  request->Cancel();
}

void ProxyScriptFetcherImpl::OnResponseStarted(URLRequest* request) {
  DCHECK(request == cur_request_.get());

  if (!request->status().is_success()) {
    OnResponseCompleted(request);
    return;
  }

  // Require HTTP responses to have a success status code.
  if (request->url().SchemeIs("http") || request->url().SchemeIs("https")) {
    // NOTE about status codes: We are like Firefox 3 in this respect.
    // {IE 7, Safari 3, Opera 9.5} do not care about the status code.
    if (request->GetResponseCode() != 200) {
      LOG(INFO) << "Fetched PAC script had (bad) status line: "
                << request->response_headers()->GetStatusLine();
      result_code_ = ERR_PAC_STATUS_NOT_OK;
      request->Cancel();
      return;
    }

    // NOTE about mime types: We do not enforce mime types on PAC files.
    // This is for compatibility with {IE 7, Firefox 3, Opera 9.5}. We will
    // however log mismatches to help with debugging.
    std::string mime_type;
    cur_request_->GetMimeType(&mime_type);
    if (!IsPacMimeType(mime_type)) {
      LOG(INFO) << "Fetched PAC script does not have a proper mime type: "
                << mime_type;
    }
  }

  ReadBody(request);
}

void ProxyScriptFetcherImpl::OnReadCompleted(URLRequest* request,
                                             int num_bytes) {
  DCHECK(request == cur_request_.get());
  if (num_bytes > 0) {
    // Enforce maximum size bound.
    if (num_bytes + result_bytes_->size() >
        static_cast<size_t>(max_response_bytes)) {
      result_code_ = ERR_FILE_TOO_BIG;
      request->Cancel();
      return;
    }
    result_bytes_->append(buf_->data(), num_bytes);
    ReadBody(request);
  } else {  // Error while reading, or EOF
    OnResponseCompleted(request);
  }
}

void ProxyScriptFetcherImpl::OnResponseCompleted(URLRequest* request) {
  DCHECK(request == cur_request_.get());

  // Use |result_code_| as the request's error if we have already set it to
  // something specific.
  if (result_code_ == OK && !request->status().is_success())
    result_code_ = request->status().os_error();

  FetchCompleted();
}

void ProxyScriptFetcherImpl::ReadBody(URLRequest* request) {
  int num_bytes;
  if (request->Read(buf_, kBufSize, &num_bytes)) {
    OnReadCompleted(request, num_bytes);
  } else if (!request->status().is_io_pending()) {
    // Read failed synchronously.
    OnResponseCompleted(request);
  }
}

void ProxyScriptFetcherImpl::FetchCompleted() {
  if (result_code_ == OK) {
    // The caller expects the response to be encoded as UTF8.
    std::string charset;
    cur_request_->GetCharset(&charset);
    ConvertResponseToUTF8(charset, result_bytes_);
  } else {
    // On error, the caller expects empty string for bytes.
    result_bytes_->clear();
  }

  int result_code = result_code_;
  CompletionCallback* callback = callback_;

  ResetCurRequestState();

  callback->Run(result_code);
}

void ProxyScriptFetcherImpl::ResetCurRequestState() {
  cur_request_.reset();
  cur_request_id_ = 0;
  callback_ = NULL;
  result_code_ = OK;
  result_bytes_ = NULL;
}

void ProxyScriptFetcherImpl::OnTimeout(int id) {
  // Timeout tasks may outlive the URLRequest they reference. Make sure it
  // is still applicable.
  if (cur_request_id_ != id)
    return;

  DCHECK(cur_request_.get());
  result_code_ = ERR_TIMED_OUT;
  cur_request_->Cancel();
}

// static
ProxyScriptFetcher* ProxyScriptFetcher::Create(
    URLRequestContext* url_request_context) {
  return new ProxyScriptFetcherImpl(url_request_context);
}

// static
int ProxyScriptFetcher::SetTimeoutConstraintForUnittest(
    int timeout_ms) {
  int prev = max_duration_ms;
  max_duration_ms = timeout_ms;
  return prev;
}

// static
size_t ProxyScriptFetcher::SetSizeConstraintForUnittest(size_t size_bytes) {
  size_t prev = max_response_bytes;
  max_response_bytes = size_bytes;
  return prev;
}

}  // namespace net