summaryrefslogtreecommitdiffstats
path: root/net/proxy/proxy_script_fetcher.cc
diff options
context:
space:
mode:
authoreroman@chromium.org <eroman@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-09-22 03:06:54 +0000
committereroman@chromium.org <eroman@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-09-22 03:06:54 +0000
commit8f3c963473091104513c05328fe2fe98989e8339 (patch)
tree6dcf38d7305ee622a13b56c5f30143eb833148ef /net/proxy/proxy_script_fetcher.cc
parent0f3dfb420337c569091ac303081c30c9e060f842 (diff)
downloadchromium_src-8f3c963473091104513c05328fe2fe98989e8339.zip
chromium_src-8f3c963473091104513c05328fe2fe98989e8339.tar.gz
chromium_src-8f3c963473091104513c05328fe2fe98989e8339.tar.bz2
Respect the charset specified in PAC file responses.
I have updated the documentation of ProxyResolver and ProxyScriptFetcher to indicate that the response must always be given as UTF8. So ProxyScriptFetcher is responsible for any charset conversions internally. This CL also adds a unit-test to make sure that content-encodings are respected (like gzip). This was not previously broken, but it is a related area (and wasn't being tested.) BUG=http://crbug.com/22310 Review URL: http://codereview.chromium.org/210028 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@26790 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/proxy/proxy_script_fetcher.cc')
-rw-r--r--net/proxy/proxy_script_fetcher.cc34
1 files changed, 32 insertions, 2 deletions
diff --git a/net/proxy/proxy_script_fetcher.cc b/net/proxy/proxy_script_fetcher.cc
index d93c5c2..83189ef 100644
--- a/net/proxy/proxy_script_fetcher.cc
+++ b/net/proxy/proxy_script_fetcher.cc
@@ -43,6 +43,30 @@ bool IsPacMimeType(const std::string& mime_type) {
return false;
}
+// Convert |bytes| (which is encoded by |charset|) in place to UTF8.
+// If |charset| is empty, then we don't know what it was and guess.
+void ConvertResponseToUTF8(const std::string& charset, std::string* bytes) {
+ const char* codepage;
+
+ if (charset.empty()) {
+ // Assume ISO-8859-1 if no charset was specified.
+ codepage = "ISO-8859-1";
+ } else {
+ // Otherwise trust the charset that was provided.
+ codepage = charset.c_str();
+ }
+
+ // We will be generous in the conversion -- if any characters lie
+ // outside of |charset| (i.e. invalid), then substitute them with
+ // U+FFFD rather than failing.
+ std::wstring tmp_wide;
+ CodepageToWide(*bytes, codepage,
+ OnStringUtilConversionError::SUBSTITUTE,
+ &tmp_wide);
+ // TODO(eroman): would be nice to have a CodepageToUTF8() function.
+ *bytes = WideToUTF8(tmp_wide);
+}
+
} // namespace
class ProxyScriptFetcherImpl : public ProxyScriptFetcher,
@@ -273,9 +297,15 @@ void ProxyScriptFetcherImpl::ReadBody(URLRequest* request) {
}
void ProxyScriptFetcherImpl::FetchCompleted() {
- // On error, the caller expects empty string for bytes.
- if (result_code_ != OK)
+ if (result_code_ == OK) {
+ // The caller expects the response to be encoded as UTF8.
+ std::string charset;
+ cur_request_->GetCharset(&charset);
+ ConvertResponseToUTF8(charset, result_bytes_);
+ } else {
+ // On error, the caller expects empty string for bytes.
result_bytes_->clear();
+ }
int result_code = result_code_;
CompletionCallback* callback = callback_;