From 8f3c963473091104513c05328fe2fe98989e8339 Mon Sep 17 00:00:00 2001 From: "eroman@chromium.org" Date: Tue, 22 Sep 2009 03:06:54 +0000 Subject: Respect the charset specified in PAC file responses. I have updated the documentation of ProxyResolver and ProxyScriptFetcher to indicate that the response must always be given as UTF8. So ProxyScriptFetcher is responsible for any charset conversions internally. This CL also adds a unit-test to make sure that content-encodings are respected (like gzip). This was not previously broken, but it is a related area (and wasn't being tested.) BUG=http://crbug.com/22310 Review URL: http://codereview.chromium.org/210028 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@26790 0039d316-1c4b-4281-b951-d872f2087c98 --- net/proxy/proxy_script_fetcher.cc | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'net/proxy/proxy_script_fetcher.cc') diff --git a/net/proxy/proxy_script_fetcher.cc b/net/proxy/proxy_script_fetcher.cc index d93c5c2..83189ef 100644 --- a/net/proxy/proxy_script_fetcher.cc +++ b/net/proxy/proxy_script_fetcher.cc @@ -43,6 +43,30 @@ bool IsPacMimeType(const std::string& mime_type) { return false; } +// Convert |bytes| (which is encoded by |charset|) in place to UTF8. +// If |charset| is empty, then we don't know what it was and guess. +void ConvertResponseToUTF8(const std::string& charset, std::string* bytes) { + const char* codepage; + + if (charset.empty()) { + // Assume ISO-8859-1 if no charset was specified. + codepage = "ISO-8859-1"; + } else { + // Otherwise trust the charset that was provided. + codepage = charset.c_str(); + } + + // We will be generous in the conversion -- if any characters lie + // outside of |charset| (i.e. invalid), then substitute them with + // U+FFFD rather than failing. + std::wstring tmp_wide; + CodepageToWide(*bytes, codepage, + OnStringUtilConversionError::SUBSTITUTE, + &tmp_wide); + // TODO(eroman): would be nice to have a CodepageToUTF8() function. + *bytes = WideToUTF8(tmp_wide); +} + } // namespace class ProxyScriptFetcherImpl : public ProxyScriptFetcher, @@ -273,9 +297,15 @@ void ProxyScriptFetcherImpl::ReadBody(URLRequest* request) { } void ProxyScriptFetcherImpl::FetchCompleted() { - // On error, the caller expects empty string for bytes. - if (result_code_ != OK) + if (result_code_ == OK) { + // The caller expects the response to be encoded as UTF8. + std::string charset; + cur_request_->GetCharset(&charset); + ConvertResponseToUTF8(charset, result_bytes_); + } else { + // On error, the caller expects empty string for bytes. result_bytes_->clear(); + } int result_code = result_code_; CompletionCallback* callback = callback_; -- cgit v1.1