diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-11-07 01:34:53 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-11-07 01:34:53 +0000 |
commit | ce85f60cd9d399109dab39fe5a9613879ab9a8f7 (patch) | |
tree | 0e9e0072d2e5eadfeec08eef0f06a43c56dc1751 /net/base | |
parent | d90684d0cf0aa16389c9202153c97d373829b7f3 (diff) | |
download | chromium_src-ce85f60cd9d399109dab39fe5a9613879ab9a8f7.zip chromium_src-ce85f60cd9d399109dab39fe5a9613879ab9a8f7.tar.gz chromium_src-ce85f60cd9d399109dab39fe5a9613879ab9a8f7.tar.bz2 |
Fix various problems with inline autocomplete and URLs that change length during fixup:
* URLs with http auth info, which gets stripped
* URLs with IDN hosts
* URLs with escaped values that get unescaped
In cases like these, we'd inline autocomplete from the wrong locations, highlight the wrong portions of the URL as matches, and sometimes DCHECK() in debug mode.
The fix is to track how fixup affects the offsets into the URL we care about. Plumbing this required an enormous number of additions :(
There is also a fix here to the URL Fixer Upper, which was obviously modified at some point in the past to use the Parsed components, but without updating the comments or some of the functionality to match. Since this isn't supposed to "fix up" things that aren't simple typos, I removed some code to "fix" bogus ports, which was causing bizarre effects when typing HTTP auth URLs ("http://foo:bar" would be fixed to "http://foo" and then matched for inline autocompletion, which was clearly wrong). This is tested incidentally by one of the new History URL Provider tests (which is how I discovered it).
BUG=4010
TEST=Covered by unittests
Review URL: http://codereview.chromium.org/372017
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@31352 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base')
-rw-r--r-- | net/base/escape.cc | 42 | ||||
-rw-r--r-- | net/base/escape.h | 24 | ||||
-rw-r--r-- | net/base/escape_unittest.cc | 125 | ||||
-rw-r--r-- | net/base/net_util.cc | 355 | ||||
-rw-r--r-- | net/base/net_util.h | 59 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 183 |
6 files changed, 550 insertions, 238 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc index 3d2aca2..5196eb6 100644 --- a/net/base/escape.cc +++ b/net/base/escape.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -107,7 +107,14 @@ const char kUrlUnescape[128] = { }; std::string UnescapeURLImpl(const std::string& escaped_text, - UnescapeRule::Type rules) { + UnescapeRule::Type rules, + size_t* offset_for_adjustment) { + size_t offset_temp = std::wstring::npos; + if (!offset_for_adjustment) + offset_for_adjustment = &offset_temp; + else if (*offset_for_adjustment >= escaped_text.length()) + *offset_for_adjustment = std::wstring::npos; + // Do not unescape anything, return the |escaped_text| text. if (rules == UnescapeRule::NONE) return escaped_text; @@ -136,8 +143,17 @@ std::string UnescapeURLImpl(const std::string& escaped_text, // Additionally allow control characters if requested. (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { // Use the unescaped version of the character. + size_t length_before_append = result.length(); result.push_back(value); i += 2; + + // Adjust offset to match length change. + if (*offset_for_adjustment != std::string::npos) { + if (*offset_for_adjustment > (length_before_append + 2)) + *offset_for_adjustment -= 2; + else if (*offset_for_adjustment > length_before_append) + *offset_for_adjustment = std::string::npos; + } } else { // Keep escaped. Append a percent and we'll get the following two // digits on the next loops through. @@ -231,19 +247,27 @@ bool EscapeQueryParamValue(const std::wstring& text, const char* codepage, return true; } -std::wstring UnescapeAndDecodeURLComponent(const std::string& text, - const char* codepage, - UnescapeRule::Type rules) { +std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text, + UnescapeRule::Type rules, + size_t* offset_for_adjustment) { std::wstring result; - if (base::CodepageToWide(UnescapeURLImpl(text, rules), codepage, - base::OnStringConversionError::FAIL, &result)) + size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0; + if (base::CodepageToWideAndAdjustOffset( + UnescapeURLImpl(text, rules, offset_for_adjustment), + "UTF-8", base::OnStringConversionError::FAIL, &result, + offset_for_adjustment)) return result; // Character set looks like it's valid. - return UTF8ToWide(text); // Return the escaped version when it's not. + + // Not valid. Return the escaped version. Undo our changes to + // |offset_for_adjustment| since we haven't changed the string after all. + if (offset_for_adjustment) + *offset_for_adjustment = original_offset; + return UTF8ToWideAndAdjustOffset(text, offset_for_adjustment); } std::string UnescapeURLComponent(const std::string& escaped_text, UnescapeRule::Type rules) { - return UnescapeURLImpl(escaped_text, rules); + return UnescapeURLImpl(escaped_text, rules, NULL); } template <class str> diff --git a/net/base/escape.h b/net/base/escape.h index 8761d4d..9ff17b6 100644 --- a/net/base/escape.h +++ b/net/base/escape.h @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -93,17 +93,17 @@ std::string UnescapeURLComponent(const std::string& escaped_text, UnescapeRule::Type rules); // Unescapes the given substring as a URL, and then tries to interpret the -// result as being encoded in the given code page. If the result is convertable -// into the code page, it will be returned as converted. If it is not, the -// original escaped string will be converted into a wide string and returned. -std::wstring UnescapeAndDecodeURLComponent(const std::string& text, - const char* codepage, - UnescapeRule::Type rules); -inline std::wstring UnescapeAndDecodeUTF8URLComponent( - const std::string& text, - UnescapeRule::Type rules) { - return UnescapeAndDecodeURLComponent(text, "UTF-8", rules); -} +// result as being encoded as UTF-8. If the result is convertable into UTF-8, it +// will be returned as converted. If it is not, the original escaped string will +// be converted into a wide string and returned. +// +// |offset_for_adjustment| may be NULL; if not, it is an offset into |text| that +// will be adjusted to point at the same logical place in the result string. If +// this isn't possible because it points into the middle of an escape sequence +// or past the end of the string, it will be set to std::wstring::npos. +std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text, + UnescapeRule::Type rules, + size_t* offset_for_adjustment); // Deprecated ------------------------------------------------------------------ diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc index 44bb9972..8e5e7dc 100644 --- a/net/base/escape_unittest.cc +++ b/net/base/escape_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -24,8 +24,7 @@ struct UnescapeURLCase { const char* output; }; -struct UnescapeAndDecodeURLCase { - const char* encoding; +struct UnescapeAndDecodeCase { const char* input; // The expected output when run through UnescapeURL. @@ -38,6 +37,12 @@ struct UnescapeAndDecodeURLCase { const wchar_t* decoded; }; +struct AdjustOffsetCase { + const char* input; + size_t input_offset; + size_t output_offset; +}; + struct EscapeForHTMLCase { const char* input; const char* expected_output; @@ -45,7 +50,7 @@ struct EscapeForHTMLCase { } // namespace -TEST(Escape, EscapeTextForFormSubmission) { +TEST(EscapeTest, EscapeTextForFormSubmission) { const EscapeCase escape_cases[] = { {L"foo", L"foo"}, {L"foo bar", L"foo+bar"}, @@ -93,7 +98,7 @@ TEST(Escape, EscapeTextForFormSubmission) { EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str)); } -TEST(Escape, EscapePath) { +TEST(EscapeTest, EscapePath) { ASSERT_EQ( // Most of the character space we care about, un-escaped EscapePath( @@ -108,7 +113,7 @@ TEST(Escape, EscapePath) { "%7B%7C%7D~%7F%80%FF"); } -TEST(Escape, EscapeUrlEncodedData) { +TEST(EscapeTest, EscapeUrlEncodedData) { ASSERT_EQ( // Most of the character space we care about, un-escaped EscapeUrlEncodedData( @@ -123,7 +128,7 @@ TEST(Escape, EscapeUrlEncodedData) { "%7B%7C%7D~%7F%80%FF"); } -TEST(Escape, UnescapeURLComponent) { +TEST(EscapeTest, UnescapeURLComponent) { const UnescapeURLCase unescape_cases[] = { {"", UnescapeRule::NORMAL, ""}, {"%2", UnescapeRule::NORMAL, "%2"}, @@ -184,40 +189,48 @@ TEST(Escape, UnescapeURLComponent) { EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); } -TEST(Escape, UnescapeAndDecodeURLComponent) { - const UnescapeAndDecodeURLCase unescape_cases[] = { - {"UTF8", "%", "%", "%", L"%"}, - {"UTF8", "+", "+", " ", L"+"}, - {"UTF8", "%2+", "%2+", "%2 ", L"%2+"}, - {"UTF8", "+%%%+%%%", "+%%%+%%%", " %%% %%%", L"+%%%+%%%"}, - {"UTF8", "Don't escape anything", - "Don't escape anything", - "Don't escape anything", - L"Don't escape anything"}, - {"UTF8", "+Invalid %escape %2+", - "+Invalid %escape %2+", - " Invalid %escape %2 ", - L"+Invalid %escape %2+"}, - {"UTF8", "Some random text %25%3bOK", - "Some random text %25;OK", - "Some random text %25;OK", - L"Some random text %25;OK"}, - {"UTF8", "%01%02%03%04%05%06%07%08%09", - "%01%02%03%04%05%06%07%08%09", - "%01%02%03%04%05%06%07%08%09", - L"%01%02%03%04%05%06%07%08%09"}, - {"UTF8", "%E4%BD%A0+%E5%A5%BD", - "\xE4\xBD\xA0+\xE5\xA5\xBD", - "\xE4\xBD\xA0 \xE5\xA5\xBD", - L"\x4f60+\x597d"}, - {"BIG5", "%A7A%A6n", - "\xA7\x41\xA6n", - "\xA7\x41\xA6n", - L"\x4f60\x597d"}, - {"UTF8", "%ED%ED", // Invalid UTF-8. - "\xED\xED", - "\xED\xED", - L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped. +TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) { + const UnescapeAndDecodeCase unescape_cases[] = { + { "%", + "%", + "%", + L"%"}, + { "+", + "+", + " ", + L"+"}, + { "%2+", + "%2+", + "%2 ", + L"%2+"}, + { "+%%%+%%%", + "+%%%+%%%", + " %%% %%%", + L"+%%%+%%%"}, + { "Don't escape anything", + "Don't escape anything", + "Don't escape anything", + L"Don't escape anything"}, + { "+Invalid %escape %2+", + "+Invalid %escape %2+", + " Invalid %escape %2 ", + L"+Invalid %escape %2+"}, + { "Some random text %25%3BOK", + "Some random text %25;OK", + "Some random text %25;OK", + L"Some random text %25;OK"}, + { "%01%02%03%04%05%06%07%08%09", + "%01%02%03%04%05%06%07%08%09", + "%01%02%03%04%05%06%07%08%09", + L"%01%02%03%04%05%06%07%08%09"}, + { "%E4%BD%A0+%E5%A5%BD", + "\xE4\xBD\xA0+\xE5\xA5\xBD", + "\xE4\xBD\xA0 \xE5\xA5\xBD", + L"\x4f60+\x597d"}, + { "%ED%ED", // Invalid UTF-8. + "\xED\xED", + "\xED\xED", + L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped. }; for (size_t i = 0; i < arraysize(unescape_cases); i++) { @@ -230,14 +243,36 @@ TEST(Escape, UnescapeAndDecodeURLComponent) { EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped); // TODO: Need to test unescape_spaces and unescape_percent. - std::wstring decoded = UnescapeAndDecodeURLComponent( - unescape_cases[i].input, unescape_cases[i].encoding, - UnescapeRule::NORMAL); + std::wstring decoded = UnescapeAndDecodeUTF8URLComponent( + unescape_cases[i].input, UnescapeRule::NORMAL, NULL); EXPECT_EQ(std::wstring(unescape_cases[i].decoded), decoded); } } -TEST(Escape, EscapeForHTML) { +TEST(EscapeTest, AdjustOffset) { + const AdjustOffsetCase adjust_cases[] = { + {"", 0, std::wstring::npos}, + {"test", 0, 0}, + {"test", 2, 2}, + {"test", 4, std::wstring::npos}, + {"test", std::wstring::npos, std::wstring::npos}, + {"%3Btest", 6, 4}, + {"%3Btest", 2, std::wstring::npos}, + {"test%3B", 2, 2}, + {"%E4%BD%A0+%E5%A5%BD", 9, 1}, + {"%E4%BD%A0+%E5%A5%BD", 6, std::wstring::npos}, + {"%ED%B0%80+%E5%A5%BD", 6, 6}, + }; + + for (size_t i = 0; i < arraysize(adjust_cases); i++) { + size_t offset = adjust_cases[i].input_offset; + UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input, + UnescapeRule::NORMAL, &offset); + EXPECT_EQ(adjust_cases[i].output_offset, offset); + } +} + +TEST(EscapeTest, EscapeForHTML) { const EscapeForHTMLCase tests[] = { { "hello", "hello" }, { "<hello>", "<hello>" }, diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 85151e9..9171e54 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -650,60 +650,51 @@ bool IsIDNComponentSafe(const char16* str, } // Converts one component of a host (between dots) to IDN if safe. The result -// will be APPENDED to the given output string and will be the same as the -// input if it is not IDN or the IDN is unsafe to display. -void IDNToUnicodeOneComponent(const char16* comp, - int comp_len, +// will be APPENDED to the given output string and will be the same as the input +// if it is not IDN or the IDN is unsafe to display. Returns whether any +// conversion was performed. +bool IDNToUnicodeOneComponent(const char16* comp, + size_t comp_len, const std::wstring& languages, string16* out) { - DCHECK(comp_len >= 0); + DCHECK(out); if (comp_len == 0) - return; + return false; - // Expand the output string to make room for a possibly longer string - // (we'll expand if it's still not big enough below). - int extra_space = 64; - size_t host_begin_in_output = out->size(); - - // Just copy the input if it can't be an IDN component. - if (comp_len < 4 || - comp[0] != 'x' || comp[1] != 'n' || comp[2] != '-' || comp[3] != '-') { - out->resize(host_begin_in_output + comp_len); - for (int i = 0; i < comp_len; i++) - (*out)[host_begin_in_output + i] = comp[i]; - return; - } + // Only transform if the input can be an IDN component. + static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; + if ((comp_len > arraysize(kIdnPrefix)) && + !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { + // Repeatedly expand the output string until it's big enough. It looks like + // ICU will return the required size of the buffer, but that's not + // documented, so we'll just grow by 2x. This should be rare and is not on a + // critical path. + size_t original_length = out->length(); + for (int extra_space = 64; ; extra_space *= 2) { + UErrorCode status = U_ZERO_ERROR; + out->resize(out->length() + extra_space); + int output_chars = uidna_IDNToUnicode(comp, + static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space, + UIDNA_DEFAULT, NULL, &status); + if (status == U_ZERO_ERROR) { + // Converted successfully. + out->resize(original_length + output_chars); + if (IsIDNComponentSafe(out->data() + original_length, output_chars, + languages)) + return true; + } - while (true) { - UErrorCode status = U_ZERO_ERROR; - out->resize(out->size() + extra_space); - int output_chars = - uidna_IDNToUnicode(comp, comp_len, &(*out)[host_begin_in_output], - extra_space, UIDNA_DEFAULT, NULL, &status); - if (status == U_ZERO_ERROR) { - // Converted successfully. - out->resize(host_begin_in_output + output_chars); - if (!IsIDNComponentSafe(&out->data()[host_begin_in_output], - output_chars, - languages)) - break; // The error handling below will undo the IDN. - return; + if (status != U_BUFFER_OVERFLOW_ERROR) + break; } - if (status != U_BUFFER_OVERFLOW_ERROR) - break; - - // Need to loop again with a bigger buffer. It looks like ICU will - // return the required size of the buffer, but that's not documented, - // so we'll just grow by 2x. This should be rare and is not on a - // critical path. - extra_space *= 2; + // Failed, revert back to original string. + out->resize(original_length); } - // We get here on error, in which case we replace anything that was added - // with the literal input. - out->resize(host_begin_in_output + comp_len); - for (int i = 0; i < comp_len; i++) - (*out)[host_begin_in_output + i] = comp[i]; + // We get here with no IDN or on error, in which case we just append the + // literal input. + out->append(comp, comp_len); + return false; } // Helper for FormatUrl(). @@ -712,19 +703,23 @@ std::wstring FormatViewSourceUrl(const GURL& url, bool omit_username_password, UnescapeRule::Type unescape_rules, url_parse::Parsed* new_parsed, - size_t* prefix_end) { + size_t* prefix_end, + size_t* offset_for_adjustment) { DCHECK(new_parsed); const wchar_t* const kWideViewSource = L"view-source:"; const size_t kViewSourceLengthPlus1 = 12; GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); + size_t temp_offset = (*offset_for_adjustment == std::wstring::npos) ? + std::wstring::npos : (*offset_for_adjustment - kViewSourceLengthPlus1); + size_t* temp_offset_ptr = (*offset_for_adjustment < kViewSourceLengthPlus1) ? + NULL : &temp_offset; std::wstring result = net::FormatUrl(real_url, languages, - omit_username_password, unescape_rules, new_parsed, prefix_end); + omit_username_password, unescape_rules, new_parsed, prefix_end, + temp_offset_ptr); result.insert(0, kWideViewSource); // Adjust position values. - if (prefix_end) - *prefix_end += kViewSourceLengthPlus1; if (new_parsed->scheme.is_nonempty()) { // Assume "view-source:real-scheme" as a scheme. new_parsed->scheme.len += kViewSourceLengthPlus1; @@ -746,6 +741,12 @@ std::wstring FormatViewSourceUrl(const GURL& url, new_parsed->query.begin += kViewSourceLengthPlus1; if (new_parsed->ref.is_nonempty()) new_parsed->ref.begin += kViewSourceLengthPlus1; + if (prefix_end) + *prefix_end += kViewSourceLengthPlus1; + if (temp_offset_ptr) { + *offset_for_adjustment = (temp_offset == std::wstring::npos) ? + std::wstring::npos : (temp_offset + kViewSourceLengthPlus1); + } return result; } @@ -769,12 +770,20 @@ std::set<int> explicitly_allowed_ports; // Appends the substring |in_component| inside of the URL |spec| to |output|, // and the resulting range will be filled into |out_component|. |unescape_rules| -// defines how to clean the URL for human readability. +// defines how to clean the URL for human readability. |offset_for_adjustment| +// is an offset into |output| which will be adjusted based on how it maps to the +// component being converted; if it is less than output->length(), it will be +// untouched, and if it is greater than output->length() + in_component.len it +// will be shortened by the difference in lengths between the input and output +// components. Otherwise it points into the component being converted, and is +// adjusted to point to the same logical place in |output|. +// |offset_for_adjustment| may not be NULL. static void AppendFormattedComponent(const std::string& spec, const url_parse::Component& in_component, UnescapeRule::Type unescape_rules, std::wstring* output, - url_parse::Component* out_component); + url_parse::Component* out_component, + size_t* offset_for_adjustment); GURL FilePathToFileURL(const FilePath& path) { // Produce a URL like "file:///C:/foo" for a regular file, or @@ -849,58 +858,56 @@ std::string GetHeaderParamValue(const std::string& field, // // We may want to skip this step in the case of file URLs to allow unicode // UNC hostnames regardless of encodings. -void IDNToUnicode(const char* host, - int host_len, - const std::wstring& languages, - std::wstring* out) { +std::wstring IDNToUnicode(const char* host, + size_t host_len, + const std::wstring& languages, + size_t* offset_for_adjustment) { // Convert the ASCII input to a wide string for ICU. string16 input16; input16.reserve(host_len); - for (int i = 0; i < host_len; i++) - input16.push_back(host[i]); + std::copy(host, host + host_len, std::back_inserter(input16)); string16 out16; - // The output string is appended to, so convert what's already there if - // needed. -#if defined(WCHAR_T_IS_UTF32) - WideToUTF16(out->data(), out->length(), &out16); - out->clear(); // for equivalence with the swap below -#elif defined(WCHAR_T_IS_UTF16) - out->swap(out16); -#endif + size_t output_offset = offset_for_adjustment ? + *offset_for_adjustment : std::wstring::npos; // Do each component of the host separately, since we enforce script matching // on a per-component basis. - size_t cur_begin = 0; // Beginning of the current component (inclusive). - while (cur_begin < input16.size()) { - // Find the next dot or the end of the string. - size_t next_dot = input16.find_first_of('.', cur_begin); - if (next_dot == std::wstring::npos) - next_dot = input16.size(); // For getting the last component. - - if (next_dot > cur_begin) { + for (size_t component_start = 0, component_end; + component_start < input16.length(); + component_start = component_end + 1) { + // Find the end of the component. + component_end = input16.find('.', component_start); + if (component_end == string16::npos) + component_end = input16.length(); // For getting the last component. + size_t component_length = component_end - component_start; + + size_t output_component_start = out16.length(); + bool converted_idn = false; + if (component_end > component_start) { // Add the substring that we just found. - IDNToUnicodeOneComponent(&input16[cur_begin], - static_cast<int>(next_dot - cur_begin), - languages, - &out16); + converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, + component_length, languages, &out16); + } + size_t output_component_length = out16.length() - output_component_start; + + if ((output_offset != std::wstring::npos) && + (*offset_for_adjustment > component_start)) { + if ((*offset_for_adjustment < component_end) && converted_idn) + output_offset = std::wstring::npos; + else + output_offset += output_component_length - component_length; } - // Need to add the dot we just found (if we found one). This needs to be - // done before we break out below in case the URL ends in a dot. - if (next_dot < input16.size()) + // Need to add the dot we just found (if we found one). + if (component_end < input16.length()) out16.push_back('.'); - else - break; // No more components left. - - cur_begin = next_dot + 1; } -#if defined(WCHAR_T_IS_UTF32) - UTF16ToWide(out16.data(), out16.length(), out); -#elif defined(WCHAR_T_IS_UTF16) - out->swap(out16); -#endif + if (offset_for_adjustment) + *offset_for_adjustment = output_offset; + + return UTF16ToWideAndAdjustOffset(out16, offset_for_adjustment); } std::string CanonicalizeHost(const std::string& host, @@ -1262,31 +1269,48 @@ void GetIdentityFromURL(const GURL& url, std::wstring* username, std::wstring* password) { UnescapeRule::Type flags = UnescapeRule::SPACES; - *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags); - *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags); + *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); + *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); } void AppendFormattedHost(const GURL& url, const std::wstring& languages, std::wstring* output, - url_parse::Parsed* new_parsed) { + url_parse::Parsed* new_parsed, + size_t* offset_for_adjustment) { + DCHECK(output); const url_parse::Component& host = url.parsed_for_possibly_invalid_spec().host; if (host.is_nonempty()) { // Handle possible IDN in the host name. + int new_host_begin = static_cast<int>(output->length()); if (new_parsed) - new_parsed->host.begin = static_cast<int>(output->length()); + new_parsed->host.begin = new_host_begin; + size_t offset_past_current_output = + (!offset_for_adjustment || + (*offset_for_adjustment == std::wstring::npos) || + (*offset_for_adjustment < output->length())) ? + std::wstring::npos : (*offset_for_adjustment - output->length()); + size_t* offset_into_host = + (offset_past_current_output >= static_cast<size_t>(host.len)) ? + NULL : &offset_past_current_output; const std::string& spec = url.possibly_invalid_spec(); DCHECK(host.begin >= 0 && ((spec.length() == 0 && host.begin == 0) || host.begin < static_cast<int>(spec.length()))); - net::IDNToUnicode(&spec[host.begin], host.len, languages, output); + output->append(net::IDNToUnicode(&spec[host.begin], + static_cast<size_t>(host.len), languages, offset_into_host)); - if (new_parsed) { - new_parsed->host.len = - static_cast<int>(output->length()) - new_parsed->host.begin; + int new_host_len = static_cast<int>(output->length()) - new_host_begin; + if (new_parsed) + new_parsed->host.len = new_host_len; + if (offset_into_host) { + *offset_for_adjustment = (*offset_into_host == std::wstring::npos) ? + std::wstring::npos : (new_host_begin + *offset_into_host); + } else if (offset_past_current_output != std::wstring::npos) { + *offset_for_adjustment += new_host_len - host.len; } } else if (new_parsed) { new_parsed->host.reset(); @@ -1298,19 +1322,36 @@ void AppendFormattedComponent(const std::string& spec, const url_parse::Component& in_component, UnescapeRule::Type unescape_rules, std::wstring* output, - url_parse::Component* out_component) { + url_parse::Component* out_component, + size_t* offset_for_adjustment) { + DCHECK(output); + DCHECK(offset_for_adjustment); if (in_component.is_nonempty()) { out_component->begin = static_cast<int>(output->length()); + size_t offset_past_current_output = + ((*offset_for_adjustment == std::wstring::npos) || + (*offset_for_adjustment < output->length())) ? + std::wstring::npos : (*offset_for_adjustment - output->length()); + size_t* offset_into_component = + (offset_past_current_output >= static_cast<size_t>(in_component.len)) ? + NULL : &offset_past_current_output; if (unescape_rules == UnescapeRule::NONE) { - output->append(UTF8ToWide(spec.substr( - in_component.begin, in_component.len))); + output->append(UTF8ToWideAndAdjustOffset( + spec.substr(in_component.begin, in_component.len), + offset_into_component)); } else { output->append(UnescapeAndDecodeUTF8URLComponent( - spec.substr(in_component.begin, in_component.len), - unescape_rules)); + spec.substr(in_component.begin, in_component.len), unescape_rules, + offset_into_component)); } out_component->len = static_cast<int>(output->length()) - out_component->begin; + if (offset_into_component) { + *offset_for_adjustment = (*offset_into_component == std::wstring::npos) ? + std::wstring::npos : (out_component->begin + *offset_into_component); + } else if (offset_past_current_output != std::wstring::npos) { + *offset_for_adjustment += out_component->len - in_component.len; + } } else { out_component->reset(); } @@ -1321,10 +1362,14 @@ std::wstring FormatUrl(const GURL& url, bool omit_username_password, UnescapeRule::Type unescape_rules, url_parse::Parsed* new_parsed, - size_t* prefix_end) { + size_t* prefix_end, + size_t* offset_for_adjustment) { url_parse::Parsed parsed_temp; if (!new_parsed) new_parsed = &parsed_temp; + size_t offset_temp = std::wstring::npos; + if (!offset_for_adjustment) + offset_for_adjustment = &offset_temp; std::wstring url_string; @@ -1332,6 +1377,7 @@ std::wstring FormatUrl(const GURL& url, if (url.is_empty()) { if (prefix_end) *prefix_end = 0; + *offset_for_adjustment = std::wstring::npos; return url_string; } @@ -1343,19 +1389,22 @@ std::wstring FormatUrl(const GURL& url, if (url.SchemeIs(kViewSource) && !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { return FormatViewSourceUrl(url, languages, omit_username_password, - unescape_rules, new_parsed, prefix_end); + unescape_rules, new_parsed, prefix_end, offset_for_adjustment); } // We handle both valid and invalid URLs (this will give us the spec // regardless of validity). const std::string& spec = url.possibly_invalid_spec(); const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); + if (*offset_for_adjustment >= spec.length()) + *offset_for_adjustment = std::wstring::npos; // Copy everything before the username (the scheme and the separators.) // These are ASCII. - int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true); - for (int i = 0; i < pre_end; ++i) - url_string.push_back(spec[i]); + std::copy(spec.begin(), + spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, + true), + std::back_inserter(url_string)); new_parsed->scheme = parsed.scheme; if (omit_username_password) { @@ -1364,16 +1413,41 @@ std::wstring FormatUrl(const GURL& url, // e.g. "http://google.com:search@evil.ru/" new_parsed->username.reset(); new_parsed->password.reset(); + if ((*offset_for_adjustment != std::wstring::npos) && + (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { + if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { + // The seeming off-by-one and off-by-two in these first two lines are to + // account for the ':' after the username and '@' after the password. + if (*offset_for_adjustment > + static_cast<size_t>(parsed.password.end())) { + *offset_for_adjustment -= + (parsed.username.len + parsed.password.len + 2); + } else if (*offset_for_adjustment > + static_cast<size_t>(parsed.username.begin)) { + *offset_for_adjustment = std::wstring::npos; + } + } else { + const url_parse::Component* nonempty_component = + parsed.username.is_nonempty() ? &parsed.username : &parsed.password; + // The seeming off-by-one in these first two lines is to account for the + // '@' after the username/password. + if (*offset_for_adjustment > + static_cast<size_t>(nonempty_component->end())) { + *offset_for_adjustment -= (nonempty_component->len + 1); + } else if (*offset_for_adjustment > + static_cast<size_t>(nonempty_component->begin)) { + *offset_for_adjustment = std::wstring::npos; + } + } + } } else { - AppendFormattedComponent( - spec, parsed.username, unescape_rules, - &url_string, &new_parsed->username); + AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, + &new_parsed->username, offset_for_adjustment); if (parsed.password.is_valid()) { url_string.push_back(':'); } - AppendFormattedComponent( - spec, parsed.password, unescape_rules, - &url_string, &new_parsed->password); + AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, + &new_parsed->password, offset_for_adjustment); if (parsed.username.is_valid() || parsed.password.is_valid()) { url_string.push_back('@'); } @@ -1381,39 +1455,56 @@ std::wstring FormatUrl(const GURL& url, if (prefix_end) *prefix_end = static_cast<size_t>(url_string.length()); - AppendFormattedHost(url, languages, &url_string, new_parsed); + AppendFormattedHost(url, languages, &url_string, new_parsed, + offset_for_adjustment); // Port. if (parsed.port.is_nonempty()) { url_string.push_back(':'); - int begin = url_string.length(); - for (int i = parsed.port.begin; i < parsed.port.end(); ++i) - url_string.push_back(spec[i]); - new_parsed->port.begin = begin; - new_parsed->port.len = url_string.length() - begin; + new_parsed->port.begin = url_string.length(); + std::copy(spec.begin() + parsed.port.begin, + spec.begin() + parsed.port.end(), std::back_inserter(url_string)); + new_parsed->port.len = url_string.length() - new_parsed->port.begin; } else { new_parsed->port.reset(); } // Path and query both get the same general unescape & convert treatment. - AppendFormattedComponent( - spec, parsed.path, unescape_rules, &url_string, - &new_parsed->path); + AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, + &new_parsed->path, offset_for_adjustment); if (parsed.query.is_valid()) url_string.push_back('?'); - AppendFormattedComponent( - spec, parsed.query, unescape_rules, &url_string, - &new_parsed->query); + AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, + &new_parsed->query, offset_for_adjustment); // Reference is stored in valid, unescaped UTF-8, so we can just convert. if (parsed.ref.is_valid()) { url_string.push_back('#'); - int begin = url_string.length(); - if (parsed.ref.len > 0) - url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin], - parsed.ref.len))); - new_parsed->ref.begin = begin; - new_parsed->ref.len = url_string.length() - begin; + new_parsed->ref.begin = url_string.length(); + size_t offset_past_current_output = + ((*offset_for_adjustment == std::wstring::npos) || + (*offset_for_adjustment < url_string.length())) ? + std::wstring::npos : (*offset_for_adjustment - url_string.length()); + size_t* offset_into_ref = + (offset_past_current_output >= static_cast<size_t>(parsed.ref.len)) ? + NULL : &offset_past_current_output; + if (parsed.ref.len > 0) { + url_string.append(UTF8ToWideAndAdjustOffset(spec.substr(parsed.ref.begin, + parsed.ref.len), + offset_into_ref)); + } + new_parsed->ref.len = url_string.length() - new_parsed->ref.begin; + if (offset_into_ref) { + *offset_for_adjustment = (*offset_into_ref == std::wstring::npos) ? + std::wstring::npos : (new_parsed->ref.begin + *offset_into_ref); + } else if (offset_past_current_output != std::wstring::npos) { + // We clamped the offset near the beginning of this function to ensure it + // was within the input URL. If we reach here, the input was something + // invalid and non-parseable such that the offset was past any component + // we could figure out. In this case it won't be represented in the + // output string, so reset it. + *offset_for_adjustment = std::wstring::npos; + } } return url_string; diff --git a/net/base/net_util.h b/net/base/net_util.h index 1f1516f..d9affe6 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -129,10 +129,9 @@ std::string GetHeaderParamValue(const std::string& field, std::string GetFileNameFromCD(const std::string& header, const std::string& referrer_charset); -// Converts the given host name to unicode characters, APPENDING them to the -// the given output string. This can be called for any host name, if the -// input is not IDN or is invalid in some way, we'll just append the ASCII -// source to the output so it is still usable. +// Converts the given host name to unicode characters. This can be called for +// any host name, if the input is not IDN or is invalid in some way, we'll just +// return the ASCII source so it is still usable. // // The input should be the canonicalized ASCII host name from GURL. This // function does NOT accept UTF-8! Its length must also be given (this is @@ -146,10 +145,16 @@ std::string GetFileNameFromCD(const std::string& header, // Latin letters in the ASCII range can be mixed with a limited set of // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). // When |languages| is empty, even that mixing is not allowed. -void IDNToUnicode(const char* host, - int host_len, - const std::wstring& languages, - std::wstring* out); +// +// |offset_for_adjustment| is an offset into |host|, which will be adjusted to +// point at the same logical place in the output string. If this isn't possible +// because it points past the end of |host| or into the middle of a punycode +// sequence, it will be set to std::wstring::npos. |offset_for_adjustment| may +// be NULL. +std::wstring IDNToUnicode(const char* host, + size_t host_len, + const std::wstring& languages, + size_t* offset_for_adjustment); // Canonicalizes |host| and returns it. Also fills |host_info| with // IP address information. |host_info| must not be NULL. @@ -228,31 +233,47 @@ int SetNonBlocking(int fd); // the user. The given parsed structure will be updated. The host name formatter // also takes the same accept languages component as ElideURL. |new_parsed| may // be null. -void AppendFormattedHost(const GURL& url, const std::wstring& languages, - std::wstring* output, url_parse::Parsed* new_parsed); - -// Creates a string representation of |url|. The IDN host name may -// be in Unicode if |languages| accepts the Unicode representation. -// If |omit_username_password| is true, the username and the password are -// omitted. |unescape_rules| defines how to clean the URL for human readability. +void AppendFormattedHost(const GURL& url, + const std::wstring& languages, + std::wstring* output, + url_parse::Parsed* new_parsed, + size_t* offset_for_adjustment); + +// Creates a string representation of |url|. The IDN host name may be in Unicode +// if |languages| accepts the Unicode representation. If +// |omit_username_password| is true, any username and password are removed. +// |unescape_rules| defines how to clean the URL for human readability. // You will generally want |UnescapeRule::SPACES| for display to the user if you // can handle spaces, or |UnescapeRule::NORMAL| if not. If the path part and the // query part seem to be encoded in %-encoded UTF-8, decodes %-encoding and -// UTF-8. |new_parsed| will have parsing parameters of the resultant URL. +// UTF-8. +// +// The last three parameters may be NULL. +// |new_parsed| will be set to the parsing parameters of the resultant URL. // |prefix_end| will be the length before the hostname of the resultant URL. -// |new_parsed| and |prefix_end| may be NULL. +// |offset_for_adjustment| is an offset into the original |url|'s spec(), which +// will be modified to reflect changes this function makes to the output string; +// for example, if |url| is "http://a:b@c.com/", |omit_username_password| is +// true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return +// the output string will be "http://c.com/" and |offset_for_adjustment| will be +// 8. If the offset cannot be successfully adjusted (e.g. because it points +// into the middle of a component that was entirely removed, past the end of the +// string, or into the middle of an encoding sequence), it will be set to +// std::wstring::npos. std::wstring FormatUrl(const GURL& url, const std::wstring& languages, bool omit_username_password, UnescapeRule::Type unescape_rules, url_parse::Parsed* new_parsed, - size_t* prefix_end); + size_t* prefix_end, + size_t* offset_for_adjustment); // Creates a string representation of |url| for display to the user. // This is a shorthand of the above function with omit_username_password=true, // unescape=SPACES, new_parsed=NULL, and prefix_end=NULL. inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) { - return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL); + return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL, + NULL); } // Strip the portions of |url| that aren't core to the network request. diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index 07ec17c..308ef80 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -345,6 +345,11 @@ const IDNTestCase idn_cases[] = { #endif }; +struct AdjustOffsetCase { + size_t input_offset; + size_t output_offset; +}; + struct CompliantHostCase { const char* host; bool expected_output; @@ -782,14 +787,10 @@ TEST(NetUtilTest, IDNToUnicodeFast) { // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow if (j == 3 || j == 17 || j == 18) continue; - std::wstring output; - net::IDNToUnicode(idn_cases[i].input, - static_cast<int>(strlen(idn_cases[i].input)), - kLanguages[j], - &output); + std::wstring output(net::IDNToUnicode(idn_cases[i].input, + strlen(idn_cases[i].input), kLanguages[j], NULL)); std::wstring expected(idn_cases[i].unicode_allowed[j] ? - idn_cases[i].unicode_output : - ASCIIToWide(idn_cases[i].input)); + idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input)); AppendLanguagesToOutputs(kLanguages[j], &expected, &output); EXPECT_EQ(expected, output); } @@ -802,20 +803,43 @@ TEST(NetUtilTest, IDNToUnicodeSlow) { // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast if (!(j == 3 || j == 17 || j == 18)) continue; - std::wstring output; - net::IDNToUnicode(idn_cases[i].input, - static_cast<int>(strlen(idn_cases[i].input)), - kLanguages[j], - &output); + std::wstring output(net::IDNToUnicode(idn_cases[i].input, + strlen(idn_cases[i].input), kLanguages[j], NULL)); std::wstring expected(idn_cases[i].unicode_allowed[j] ? - idn_cases[i].unicode_output : - ASCIIToWide(idn_cases[i].input)); + idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input)); AppendLanguagesToOutputs(kLanguages[j], &expected, &output); EXPECT_EQ(expected, output); } } } +TEST(NetUtilTest, IDNToUnicodeAdjustOffset) { + const AdjustOffsetCase adjust_cases[] = { + {0, 0}, + {2, 2}, + {4, 4}, + {5, 5}, + {6, std::wstring::npos}, + {16, std::wstring::npos}, + {17, 7}, + {18, 8}, + {19, std::wstring::npos}, + {25, std::wstring::npos}, + {34, 12}, + {35, 13}, + {38, 16}, + {39, std::wstring::npos}, + {std::wstring::npos, std::wstring::npos}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(adjust_cases); ++i) { + size_t offset = adjust_cases[i].input_offset; + // "test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test" + net::IDNToUnicode("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39, L"zh-CN", + &offset); + EXPECT_EQ(adjust_cases[i].output_offset, offset); + } +} + TEST(NetUtilTest, CompliantHost) { const CompliantHostCase compliant_host_cases[] = { {"", false}, @@ -1328,7 +1352,7 @@ TEST(NetUtilTest, FormatUrl) { size_t prefix_len; std::wstring formatted = net::FormatUrl( GURL(tests[i].input), tests[i].languages, tests[i].omit, - tests[i].escape_rules, NULL, &prefix_len); + tests[i].escape_rules, NULL, &prefix_len, NULL); EXPECT_EQ(tests[i].output, formatted) << tests[i].description; EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; } @@ -1340,7 +1364,7 @@ TEST(NetUtilTest, FormatUrlParsed) { std::wstring formatted = net::FormatUrl( GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - L"ja", false, UnescapeRule::NONE, &parsed, NULL); + L"ja", false, UnescapeRule::NONE, &parsed, NULL, NULL); EXPECT_EQ(L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" L"/%E3%82%B0/?q=%E3%82%B0#\x30B0", formatted); EXPECT_EQ(L"%E3%82%B0", @@ -1360,7 +1384,7 @@ TEST(NetUtilTest, FormatUrlParsed) { formatted = net::FormatUrl( GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - L"ja", false, UnescapeRule::NORMAL, &parsed, NULL); + L"ja", false, UnescapeRule::NORMAL, &parsed, NULL, NULL); EXPECT_EQ(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" L"/\x30B0/?q=\x30B0#\x30B0", formatted); EXPECT_EQ(L"\x30B0", @@ -1379,7 +1403,7 @@ TEST(NetUtilTest, FormatUrlParsed) { formatted = net::FormatUrl( GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - L"ja", true, UnescapeRule::NORMAL, &parsed, NULL); + L"ja", true, UnescapeRule::NORMAL, &parsed, NULL, NULL); EXPECT_EQ(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" L"/\x30B0/?q=\x30B0#\x30B0", formatted); EXPECT_FALSE(parsed.username.is_valid()); @@ -1395,7 +1419,7 @@ TEST(NetUtilTest, FormatUrlParsed) { // View-source case. formatted = net::FormatUrl( GURL("view-source:http://user:passwd@host:81/path?query#ref"), - L"", true, UnescapeRule::NORMAL, &parsed, NULL); + L"", true, UnescapeRule::NORMAL, &parsed, NULL, NULL); EXPECT_EQ(L"view-source:http://host:81/path?query#ref", formatted); EXPECT_EQ(L"view-source:http", formatted.substr(parsed.scheme.begin, parsed.scheme.len)); @@ -1408,6 +1432,124 @@ TEST(NetUtilTest, FormatUrlParsed) { EXPECT_EQ(L"ref", formatted.substr(parsed.ref.begin, parsed.ref.len)); } +TEST(NetUtilTest, FormatUrlAdjustOffset) { + const AdjustOffsetCase basic_cases[] = { + {0, 0}, + {3, 3}, + {5, 5}, + {6, 6}, + {13, 13}, + {21, 21}, + {22, 22}, + {23, 23}, + {25, 25}, + {26, std::wstring::npos}, + {500000, std::wstring::npos}, + {std::wstring::npos, std::wstring::npos}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(basic_cases); ++i) { + size_t offset = basic_cases[i].input_offset; + net::FormatUrl(GURL("http://www.google.com/foo/"), L"en", true, + UnescapeRule::NORMAL, NULL, NULL, &offset); + EXPECT_EQ(basic_cases[i].output_offset, offset); + } + + const struct { + const char* input_url; + size_t input_offset; + size_t output_offset; + } omit_auth_cases[] = { + {"http://foo:bar@www.google.com/", 6, 6}, + {"http://foo:bar@www.google.com/", 7, 7}, + {"http://foo:bar@www.google.com/", 8, std::wstring::npos}, + {"http://foo:bar@www.google.com/", 10, std::wstring::npos}, + {"http://foo:bar@www.google.com/", 11, std::wstring::npos}, + {"http://foo:bar@www.google.com/", 14, std::wstring::npos}, + {"http://foo:bar@www.google.com/", 15, 7}, + {"http://foo:bar@www.google.com/", 25, 17}, + {"http://foo@www.google.com/", 9, std::wstring::npos}, + {"http://foo@www.google.com/", 11, 7}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_auth_cases); ++i) { + size_t offset = omit_auth_cases[i].input_offset; + net::FormatUrl(GURL(omit_auth_cases[i].input_url), L"en", true, + UnescapeRule::NORMAL, NULL, NULL, &offset); + EXPECT_EQ(omit_auth_cases[i].output_offset, offset); + } + + const AdjustOffsetCase view_source_cases[] = { + {0, 0}, + {3, 3}, + {11, 11}, + {12, 12}, + {13, 13}, + {19, 19}, + {20, std::wstring::npos}, + {23, 19}, + {26, 22}, + {std::wstring::npos, std::wstring::npos}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(view_source_cases); ++i) { + size_t offset = view_source_cases[i].input_offset; + net::FormatUrl(GURL("view-source:http://foo@www.google.com/"), L"en", true, + UnescapeRule::NORMAL, NULL, NULL, &offset); + EXPECT_EQ(view_source_cases[i].output_offset, offset); + } + + const AdjustOffsetCase idn_hostname_cases[] = { + {8, std::wstring::npos}, + {16, std::wstring::npos}, + {24, std::wstring::npos}, + {25, 12}, + {30, 17}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_hostname_cases); ++i) { + size_t offset = idn_hostname_cases[i].input_offset; + // "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/" + net::FormatUrl(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), L"ja", true, + UnescapeRule::NORMAL, NULL, NULL, &offset); + EXPECT_EQ(idn_hostname_cases[i].output_offset, offset); + } + + const AdjustOffsetCase unescape_cases[] = { + {25, 25}, + {26, std::wstring::npos}, + {27, std::wstring::npos}, + {28, 26}, + {35, std::wstring::npos}, + {41, 31}, + {59, 33}, + {60, std::wstring::npos}, + {67, std::wstring::npos}, + {68, std::wstring::npos}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(unescape_cases); ++i) { + size_t offset = unescape_cases[i].input_offset; + // "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB" + net::FormatUrl(GURL( + "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"), + L"en", true, UnescapeRule::SPACES, NULL, NULL, &offset); + EXPECT_EQ(unescape_cases[i].output_offset, offset); + } + + const AdjustOffsetCase ref_cases[] = { + {30, 30}, + {31, 31}, + {32, std::wstring::npos}, + {34, 32}, + {37, 33}, + {38, std::wstring::npos}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(ref_cases); ++i) { + size_t offset = ref_cases[i].input_offset; + // "http://www.google.com/foo.html#\x30B0\x30B0z" + net::FormatUrl(GURL( + "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), L"en", + true, UnescapeRule::NORMAL, NULL, NULL, &offset); + EXPECT_EQ(ref_cases[i].output_offset, offset); + } +} + TEST(NetUtilTest, SimplifyUrlForRequest) { struct { const char* input_url; @@ -1466,4 +1608,3 @@ TEST(NetUtilTest, SetExplicitlyAllowedPortsTest) { EXPECT_EQ(i, net::explicitly_allowed_ports.size()); } } - |