Respect the charset specified in PAC file responses.

I have updated the documentation of ProxyResolver and ProxyScriptFetcher to indicate that the response must always be given as UTF8. So ProxyScriptFetcher is responsible for any charset conversions internally. This CL also adds a unit-test to make sure that content-encodings are respected (like gzip). This was not previously broken, but it is a related area (and wasn't being tested.) BUG=http://crbug.com/22310 Review URL: http://codereview.chromium.org/210028 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@26790 0039d316-1c4b-4281-b951-d872f2087c98
author: eroman@chromium.org <eroman@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-09-22 03:06:54 +0000
committer: eroman@chromium.org <eroman@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-09-22 03:06:54 +0000
commit: 8f3c963473091104513c05328fe2fe98989e8339 (patch)
tree: 6dcf38d7305ee622a13b56c5f30143eb833148ef /net/proxy/proxy_script_fetcher.cc
parent: 0f3dfb420337c569091ac303081c30c9e060f842 (diff)
download: chromium_src-8f3c963473091104513c05328fe2fe98989e8339.zip
chromium_src-8f3c963473091104513c05328fe2fe98989e8339.tar.gz
chromium_src-8f3c963473091104513c05328fe2fe98989e8339.tar.bz2
1 files changed, 32 insertions, 2 deletions
diff --git a/net/proxy/proxy_script_fetcher.cc b/net/proxy/proxy_script_fetcher.cc
index d93c5c2..83189ef 100644
--- a/net/proxy/proxy_script_fetcher.cc
+++ b/net/proxy/proxy_script_fetcher.cc
@@ -43,6 +43,30 @@ bool IsPacMimeType(const std::string& mime_type) {
   return false;
 }
 
+// Convert |bytes| (which is encoded by |charset|) in place to UTF8.
+// If |charset| is empty, then we don't know what it was and guess.
+void ConvertResponseToUTF8(const std::string& charset, std::string* bytes) {
+  const char* codepage;
+
+  if (charset.empty()) {
+    // Assume ISO-8859-1 if no charset was specified.
+    codepage = "ISO-8859-1";
+  } else {
+    // Otherwise trust the charset that was provided.
+    codepage = charset.c_str();
+  }
+
+  // We will be generous in the conversion -- if any characters lie
+  // outside of |charset| (i.e. invalid), then substitute them with
+  // U+FFFD rather than failing.
+  std::wstring tmp_wide;
+  CodepageToWide(*bytes, codepage,
+                  OnStringUtilConversionError::SUBSTITUTE,
+                  &tmp_wide);
+  // TODO(eroman): would be nice to have a CodepageToUTF8() function.
+  *bytes = WideToUTF8(tmp_wide);
+}
+
 }  // namespace
 
 class ProxyScriptFetcherImpl : public ProxyScriptFetcher,
@@ -273,9 +297,15 @@ void ProxyScriptFetcherImpl::ReadBody(URLRequest* request) {
 }
 
 void ProxyScriptFetcherImpl::FetchCompleted() {
-  // On error, the caller expects empty string for bytes.
-  if (result_code_ != OK)
+  if (result_code_ == OK) {
+    // The caller expects the response to be encoded as UTF8.
+    std::string charset;
+    cur_request_->GetCharset(&charset);
+    ConvertResponseToUTF8(charset, result_bytes_);
+  } else {
+    // On error, the caller expects empty string for bytes.
     result_bytes_->clear();
+  }
 
   int result_code = result_code_;
   CompletionCallback* callback = callback_;
author	eroman@chromium.org <eroman@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-09-22 03:06:54 +0000
committer	eroman@chromium.org <eroman@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-09-22 03:06:54 +0000
commit	8f3c963473091104513c05328fe2fe98989e8339 (patch)
tree	6dcf38d7305ee622a13b56c5f30143eb833148ef /net/proxy/proxy_script_fetcher.cc
parent	0f3dfb420337c569091ac303081c30c9e060f842 (diff)
download	chromium_src-8f3c963473091104513c05328fe2fe98989e8339.zip chromium_src-8f3c963473091104513c05328fe2fe98989e8339.tar.gz chromium_src-8f3c963473091104513c05328fe2fe98989e8339.tar.bz2