diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/base/escape.cc | 42 | ||||
-rw-r--r-- | net/base/escape.h | 24 | ||||
-rw-r--r-- | net/base/escape_unittest.cc | 125 | ||||
-rw-r--r-- | net/base/net_util.cc | 355 | ||||
-rw-r--r-- | net/base/net_util.h | 59 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 183 |
6 files changed, 550 insertions, 238 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc index 3d2aca2..5196eb6 100644 --- a/net/base/escape.cc +++ b/net/base/escape.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -107,7 +107,14 @@ const char kUrlUnescape[128] = { }; std::string UnescapeURLImpl(const std::string& escaped_text, - UnescapeRule::Type rules) { + UnescapeRule::Type rules, + size_t* offset_for_adjustment) { + size_t offset_temp = std::wstring::npos; + if (!offset_for_adjustment) + offset_for_adjustment = &offset_temp; + else if (*offset_for_adjustment >= escaped_text.length()) + *offset_for_adjustment = std::wstring::npos; + // Do not unescape anything, return the |escaped_text| text. if (rules == UnescapeRule::NONE) return escaped_text; @@ -136,8 +143,17 @@ std::string UnescapeURLImpl(const std::string& escaped_text, // Additionally allow control characters if requested. (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { // Use the unescaped version of the character. + size_t length_before_append = result.length(); result.push_back(value); i += 2; + + // Adjust offset to match length change. + if (*offset_for_adjustment != std::string::npos) { + if (*offset_for_adjustment > (length_before_append + 2)) + *offset_for_adjustment -= 2; + else if (*offset_for_adjustment > length_before_append) + *offset_for_adjustment = std::string::npos; + } } else { // Keep escaped. Append a percent and we'll get the following two // digits on the next loops through. @@ -231,19 +247,27 @@ bool EscapeQueryParamValue(const std::wstring& text, const char* codepage, return true; } -std::wstring UnescapeAndDecodeURLComponent(const std::string& text, - const char* codepage, - UnescapeRule::Type rules) { +std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text, + UnescapeRule::Type rules, + size_t* offset_for_adjustment) { std::wstring result; - if (base::CodepageToWide(UnescapeURLImpl(text, rules), codepage, - base::OnStringConversionError::FAIL, &result)) + size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0; + if (base::CodepageToWideAndAdjustOffset( + UnescapeURLImpl(text, rules, offset_for_adjustment), + "UTF-8", base::OnStringConversionError::FAIL, &result, + offset_for_adjustment)) return result; // Character set looks like it's valid. - return UTF8ToWide(text); // Return the escaped version when it's not. + + // Not valid. Return the escaped version. Undo our changes to + // |offset_for_adjustment| since we haven't changed the string after all. + if (offset_for_adjustment) + *offset_for_adjustment = original_offset; + return UTF8ToWideAndAdjustOffset(text, offset_for_adjustment); } std::string UnescapeURLComponent(const std::string& escaped_text, UnescapeRule::Type rules) { - return UnescapeURLImpl(escaped_text, rules); + return UnescapeURLImpl(escaped_text, rules, NULL); } template <class str> diff --git a/net/base/escape.h b/net/base/escape.h index 8761d4d..9ff17b6 100644 --- a/net/base/escape.h +++ b/net/base/escape.h @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -93,17 +93,17 @@ std::string UnescapeURLComponent(const std::string& escaped_text, UnescapeRule::Type rules); // Unescapes the given substring as a URL, and then tries to interpret the -// result as being encoded in the given code page. If the result is convertable -// into the code page, it will be returned as converted. If it is not, the -// original escaped string will be converted into a wide string and returned. -std::wstring UnescapeAndDecodeURLComponent(const std::string& text, - const char* codepage, - UnescapeRule::Type rules); -inline std::wstring UnescapeAndDecodeUTF8URLComponent( - const std::string& text, - UnescapeRule::Type rules) { - return UnescapeAndDecodeURLComponent(text, "UTF-8", rules); -} +// result as being encoded as UTF-8. If the result is convertable into UTF-8, it +// will be returned as converted. If it is not, the original escaped string will +// be converted into a wide string and returned. +// +// |offset_for_adjustment| may be NULL; if not, it is an offset into |text| that +// will be adjusted to point at the same logical place in the result string. If +// this isn't possible because it points into the middle of an escape sequence +// or past the end of the string, it will be set to std::wstring::npos. +std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text, + UnescapeRule::Type rules, + size_t* offset_for_adjustment); // Deprecated ------------------------------------------------------------------ diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc index 44bb9972..8e5e7dc 100644 --- a/net/base/escape_unittest.cc +++ b/net/base/escape_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -24,8 +24,7 @@ struct UnescapeURLCase { const char* output; }; -struct UnescapeAndDecodeURLCase { - const char* encoding; +struct UnescapeAndDecodeCase { const char* input; // The expected output when run through UnescapeURL. @@ -38,6 +37,12 @@ struct UnescapeAndDecodeURLCase { const wchar_t* decoded; }; +struct AdjustOffsetCase { + const char* input; + size_t input_offset; + size_t output_offset; +}; + struct EscapeForHTMLCase { const char* input; const char* expected_output; @@ -45,7 +50,7 @@ struct EscapeForHTMLCase { } // namespace -TEST(Escape, EscapeTextForFormSubmission) { +TEST(EscapeTest, EscapeTextForFormSubmission) { const EscapeCase escape_cases[] = { {L"foo", L"foo"}, {L"foo bar", L"foo+bar"}, @@ -93,7 +98,7 @@ TEST(Escape, EscapeTextForFormSubmission) { EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str)); } -TEST(Escape, EscapePath) { +TEST(EscapeTest, EscapePath) { ASSERT_EQ( // Most of the character space we care about, un-escaped EscapePath( @@ -108,7 +113,7 @@ TEST(Escape, EscapePath) { "%7B%7C%7D~%7F%80%FF"); } -TEST(Escape, EscapeUrlEncodedData) { +TEST(EscapeTest, EscapeUrlEncodedData) { ASSERT_EQ( // Most of the character space we care about, un-escaped EscapeUrlEncodedData( @@ -123,7 +128,7 @@ TEST(Escape, EscapeUrlEncodedData) { "%7B%7C%7D~%7F%80%FF"); } -TEST(Escape, UnescapeURLComponent) { +TEST(EscapeTest, UnescapeURLComponent) { const UnescapeURLCase unescape_cases[] = { {"", UnescapeRule::NORMAL, ""}, {"%2", UnescapeRule::NORMAL, "%2"}, @@ -184,40 +189,48 @@ TEST(Escape, UnescapeURLComponent) { EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); } -TEST(Escape, UnescapeAndDecodeURLComponent) { - const UnescapeAndDecodeURLCase unescape_cases[] = { - {"UTF8", "%", "%", "%", L"%"}, - {"UTF8", "+", "+", " ", L"+"}, - {"UTF8", "%2+", "%2+", "%2 ", L"%2+"}, - {"UTF8", "+%%%+%%%", "+%%%+%%%", " %%% %%%", L"+%%%+%%%"}, - {"UTF8", "Don't escape anything", - "Don't escape anything", - "Don't escape anything", - L"Don't escape anything"}, - {"UTF8", "+Invalid %escape %2+", - "+Invalid %escape %2+", - " Invalid %escape %2 ", - L"+Invalid %escape %2+"}, - {"UTF8", "Some random text %25%3bOK", - "Some random text %25;OK", - "Some random text %25;OK", - L"Some random text %25;OK"}, - {"UTF8", "%01%02%03%04%05%06%07%08%09", - "%01%02%03%04%05%06%07%08%09", - "%01%02%03%04%05%06%07%08%09", - L"%01%02%03%04%05%06%07%08%09"}, - {"UTF8", "%E4%BD%A0+%E5%A5%BD", - "\xE4\xBD\xA0+\xE5\xA5\xBD", - "\xE4\xBD\xA0 \xE5\xA5\xBD", - L"\x4f60+\x597d"}, - {"BIG5", "%A7A%A6n", - "\xA7\x41\xA6n", - "\xA7\x41\xA6n", - L"\x4f60\x597d"}, - {"UTF8", "%ED%ED", // Invalid UTF-8. - "\xED\xED", - "\xED\xED", - L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped. +TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) { + const UnescapeAndDecodeCase unescape_cases[] = { + { "%", + "%", + "%", + L"%"}, + { "+", + "+", + " ", + L"+"}, + { "%2+", + "%2+", + "%2 ", + L"%2+"}, + { "+%%%+%%%", + "+%%%+%%%", + " %%% %%%", + L"+%%%+%%%"}, + { "Don't escape anything", + "Don't escape anything", + "Don't escape anything", + L"Don't escape anything"}, + { "+Invalid %escape %2+", + "+Invalid %escape %2+", + " Invalid %escape %2 ", + L"+Invalid %escape %2+"}, + { "Some random text %25%3BOK", + "Some random text %25;OK", + "Some random text %25;OK", + L"Some random text %25;OK"}, + { "%01%02%03%04%05%06%07%08%09", + "%01%02%03%04%05%06%07%08%09", + "%01%02%03%04%05%06%07%08%09", + L"%01%02%03%04%05%06%07%08%09"}, + { "%E4%BD%A0+%E5%A5%BD", + "\xE4\xBD\xA0+\xE5\xA5\xBD", + "\xE4\xBD\xA0 \xE5\xA5\xBD", + L"\x4f60+\x597d"}, + { "%ED%ED", // Invalid UTF-8. + "\xED\xED", + "\xED\xED", + L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped. }; for (size_t i = 0; i < arraysize(unescape_cases); i++) { @@ -230,14 +243,36 @@ TEST(Escape, UnescapeAndDecodeURLComponent) { EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped); // TODO: Need to test unescape_spaces and unescape_percent. - std::wstring decoded = UnescapeAndDecodeURLComponent( - unescape_cases[i].input, unescape_cases[i].encoding, - UnescapeRule::NORMAL); + std::wstring decoded = UnescapeAndDecodeUTF8URLComponent( + unescape_cases[i].input, UnescapeRule::NORMAL, NULL); EXPECT_EQ(std::wstring(unescape_cases[i].decoded), decoded); } } -TEST(Escape, EscapeForHTML) { +TEST(EscapeTest, AdjustOffset) { + const AdjustOffsetCase adjust_cases[] = { + {"", 0, std::wstring::npos}, + {"test", 0, 0}, + {"test", 2, 2}, + {"test", 4, std::wstring::npos}, + {"test", std::wstring::npos, std::wstring::npos}, + {"%3Btest", 6, 4}, + {"%3Btest", 2, std::wstring::npos}, + {"test%3B", 2, 2}, + {"%E4%BD%A0+%E5%A5%BD", 9, 1}, + {"%E4%BD%A0+%E5%A5%BD", 6, std::wstring::npos}, + {"%ED%B0%80+%E5%A5%BD", 6, 6}, + }; + + for (size_t i = 0; i < arraysize(adjust_cases); i++) { + size_t offset = adjust_cases[i].input_offset; + UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input, + UnescapeRule::NORMAL, &offset); + EXPECT_EQ(adjust_cases[i].output_offset, offset); + } +} + +TEST(EscapeTest, EscapeForHTML) { const EscapeForHTMLCase tests[] = { { "hello", "hello" }, { "<hello>", "<hello>" }, diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 85151e9..9171e54 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -650,60 +650,51 @@ bool IsIDNComponentSafe(const char16* str, } // Converts one component of a host (between dots) to IDN if safe. The result -// will be APPENDED to the given output string and will be the same as the -// input if it is not IDN or the IDN is unsafe to display. -void IDNToUnicodeOneComponent(const char16* comp, - int comp_len, +// will be APPENDED to the given output string and will be the same as the input +// if it is not IDN or the IDN is unsafe to display. Returns whether any +// conversion was performed. +bool IDNToUnicodeOneComponent(const char16* comp, + size_t comp_len, const std::wstring& languages, string16* out) { - DCHECK(comp_len >= 0); + DCHECK(out); if (comp_len == 0) - return; + return false; - // Expand the output string to make room for a possibly longer string - // (we'll expand if it's still not big enough below). - int extra_space = 64; - size_t host_begin_in_output = out->size(); - - // Just copy the input if it can't be an IDN component. - if (comp_len < 4 || - comp[0] != 'x' || comp[1] != 'n' || comp[2] != '-' || comp[3] != '-') { - out->resize(host_begin_in_output + comp_len); - for (int i = 0; i < comp_len; i++) - (*out)[host_begin_in_output + i] = comp[i]; - return; - } + // Only transform if the input can be an IDN component. + static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; + if ((comp_len > arraysize(kIdnPrefix)) && + !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { + // Repeatedly expand the output string until it's big enough. It looks like + // ICU will return the required size of the buffer, but that's not + // documented, so we'll just grow by 2x. This should be rare and is not on a + // critical path. + size_t original_length = out->length(); + for (int extra_space = 64; ; extra_space *= 2) { + UErrorCode status = U_ZERO_ERROR; + out->resize(out->length() + extra_space); + int output_chars = uidna_IDNToUnicode(comp, + static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space, + UIDNA_DEFAULT, NULL, &status); + if (status == U_ZERO_ERROR) { + // Converted successfully. + out->resize(original_length + output_chars); + if (IsIDNComponentSafe(out->data() + original_length, output_chars, + languages)) + return true; + } - while (true) { - UErrorCode status = U_ZERO_ERROR; - out->resize(out->size() + extra_space); - int output_chars = - uidna_IDNToUnicode(comp, comp_len, &(*out)[host_begin_in_output], - extra_space, UIDNA_DEFAULT, NULL, &status); - if (status == U_ZERO_ERROR) { - // Converted successfully. - out->resize(host_begin_in_output + output_chars); - if (!IsIDNComponentSafe(&out->data()[host_begin_in_output], - output_chars, - languages)) - break; // The error handling below will undo the IDN. - return; + if (status != U_BUFFER_OVERFLOW_ERROR) + break; } - if (status != U_BUFFER_OVERFLOW_ERROR) - break; - - // Need to loop again with a bigger buffer. It looks like ICU will - // return the required size of the buffer, but that's not documented, - // so we'll just grow by 2x. This should be rare and is not on a - // critical path. - extra_space *= 2; + // Failed, revert back to original string. + out->resize(original_length); } - // We get here on error, in which case we replace anything that was added - // with the literal input. - out->resize(host_begin_in_output + comp_len); - for (int i = 0; i < comp_len; i++) - (*out)[host_begin_in_output + i] = comp[i]; + // We get here with no IDN or on error, in which case we just append the + // literal input. + out->append(comp, comp_len); + return false; } // Helper for FormatUrl(). @@ -712,19 +703,23 @@ std::wstring FormatViewSourceUrl(const GURL& url, bool omit_username_password, UnescapeRule::Type unescape_rules, url_parse::Parsed* new_parsed, - size_t* prefix_end) { + size_t* prefix_end, + size_t* offset_for_adjustment) { DCHECK(new_parsed); const wchar_t* const kWideViewSource = L"view-source:"; const size_t kViewSourceLengthPlus1 = 12; GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); + size_t temp_offset = (*offset_for_adjustment == std::wstring::npos) ? + std::wstring::npos : (*offset_for_adjustment - kViewSourceLengthPlus1); + size_t* temp_offset_ptr = (*offset_for_adjustment < kViewSourceLengthPlus1) ? + NULL : &temp_offset; std::wstring result = net::FormatUrl(real_url, languages, - omit_username_password, unescape_rules, new_parsed, prefix_end); + omit_username_password, unescape_rules, new_parsed, prefix_end, + temp_offset_ptr); result.insert(0, kWideViewSource); // Adjust position values. - if (prefix_end) - *prefix_end += kViewSourceLengthPlus1; if (new_parsed->scheme.is_nonempty()) { // Assume "view-source:real-scheme" as a scheme. new_parsed->scheme.len += kViewSourceLengthPlus1; @@ -746,6 +741,12 @@ std::wstring FormatViewSourceUrl(const GURL& url, new_parsed->query.begin += kViewSourceLengthPlus1; if (new_parsed->ref.is_nonempty()) new_parsed->ref.begin += kViewSourceLengthPlus1; + if (prefix_end) + *prefix_end += kViewSourceLengthPlus1; + if (temp_offset_ptr) { + *offset_for_adjustment = (temp_offset == std::wstring::npos) ? + std::wstring::npos : (temp_offset + kViewSourceLengthPlus1); + } return result; } @@ -769,12 +770,20 @@ std::set<int> explicitly_allowed_ports; // Appends the substring |in_component| inside of the URL |spec| to |output|, // and the resulting range will be filled into |out_component|. |unescape_rules| -// defines how to clean the URL for human readability. +// defines how to clean the URL for human readability. |offset_for_adjustment| +// is an offset into |output| which will be adjusted based on how it maps to the +// component being converted; if it is less than output->length(), it will be +// untouched, and if it is greater than output->length() + in_component.len it +// will be shortened by the difference in lengths between the input and output +// components. Otherwise it points into the component being converted, and is +// adjusted to point to the same logical place in |output|. +// |offset_for_adjustment| may not be NULL. static void AppendFormattedComponent(const std::string& spec, const url_parse::Component& in_component, UnescapeRule::Type unescape_rules, std::wstring* output, - url_parse::Component* out_component); + url_parse::Component* out_component, + size_t* offset_for_adjustment); GURL FilePathToFileURL(const FilePath& path) { // Produce a URL like "file:///C:/foo" for a regular file, or @@ -849,58 +858,56 @@ std::string GetHeaderParamValue(const std::string& field, // // We may want to skip this step in the case of file URLs to allow unicode // UNC hostnames regardless of encodings. -void IDNToUnicode(const char* host, - int host_len, - const std::wstring& languages, - std::wstring* out) { +std::wstring IDNToUnicode(const char* host, + size_t host_len, + const std::wstring& languages, + size_t* offset_for_adjustment) { // Convert the ASCII input to a wide string for ICU. string16 input16; input16.reserve(host_len); - for (int i = 0; i < host_len; i++) - input16.push_back(host[i]); + std::copy(host, host + host_len, std::back_inserter(input16)); string16 out16; - // The output string is appended to, so convert what's already there if - // needed. -#if defined(WCHAR_T_IS_UTF32) - WideToUTF16(out->data(), out->length(), &out16); - out->clear(); // for equivalence with the swap below -#elif defined(WCHAR_T_IS_UTF16) - out->swap(out16); -#endif + size_t output_offset = offset_for_adjustment ? + *offset_for_adjustment : std::wstring::npos; // Do each component of the host separately, since we enforce script matching // on a per-component basis. - size_t cur_begin = 0; // Beginning of the current component (inclusive). - while (cur_begin < input16.size()) { - // Find the next dot or the end of the string. - size_t next_dot = input16.find_first_of('.', cur_begin); - if (next_dot == std::wstring::npos) - next_dot = input16.size(); // For getting the last component. - - if (next_dot > cur_begin) { + for (size_t component_start = 0, component_end; + component_start < input16.length(); + component_start = component_end + 1) { + // Find the end of the component. + component_end = input16.find('.', component_start); + if (component_end == string16::npos) + component_end = input16.length(); // For getting the last component. + size_t component_length = component_end - component_start; + + size_t output_component_start = out16.length(); + bool converted_idn = false; + if (component_end > component_start) { // Add the substring that we just found. - IDNToUnicodeOneComponent(&input16[cur_begin], - static_cast<int>(next_dot - cur_begin), - languages, - &out16); + converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, + component_length, languages, &out16); + } + size_t output_component_length = out16.length() - output_component_start; + + if ((output_offset != std::wstring::npos) && + (*offset_for_adjustment > component_start)) { + if ((*offset_for_adjustment < component_end) && converted_idn) + output_offset = std::wstring::npos; + else + output_offset += output_component_length - component_length; } - // Need to add the dot we just found (if we found one). This needs to be - // done before we break out below in case the URL ends in a dot. - if (next_dot < input16.size()) + // Need to add the dot we just found (if we found one). + if (component_end < input16.length()) out16.push_back('.'); - else - break; // No more components left. - - cur_begin = next_dot + 1; } -#if defined(WCHAR_T_IS_UTF32) - UTF16ToWide(out16.data(), out16.length(), out); -#elif defined(WCHAR_T_IS_UTF16) - out->swap(out16); -#endif + if (offset_for_adjustment) + *offset_for_adjustment = output_offset; + + return UTF16ToWideAndAdjustOffset(out16, offset_for_adjustment); } std::string CanonicalizeHost(const std::string& host, @@ -1262,31 +1269,48 @@ void GetIdentityFromURL(const GURL& url, std::wstring* username, std::wstring* password) { UnescapeRule::Type flags = UnescapeRule::SPACES; - *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags); - *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags); + *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); + *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); } void AppendFormattedHost(const GURL& url, const std::wstring& languages, std::wstring* output, - url_parse::Parsed* new_parsed) { + url_parse::Parsed* new_parsed, + size_t* offset_for_adjustment) { + DCHECK(output); const url_parse::Component& host = url.parsed_for_possibly_invalid_spec().host; if (host.is_nonempty()) { // Handle possible IDN in the host name. + int new_host_begin = static_cast<int>(output->length()); if (new_parsed) - new_parsed->host.begin = static_cast<int>(output->length()); + new_parsed->host.begin = new_host_begin; + size_t offset_past_current_output = + (!offset_for_adjustment || + (*offset_for_adjustment == std::wstring::npos) || + (*offset_for_adjustment < output->length())) ? + std::wstring::npos : (*offset_for_adjustment - output->length()); + size_t* offset_into_host = + (offset_past_current_output >= static_cast<size_t>(host.len)) ? + NULL : &offset_past_current_output; const std::string& spec = url.possibly_invalid_spec(); DCHECK(host.begin >= 0 && ((spec.length() == 0 && host.begin == 0) || host.begin < static_cast<int>(spec.length()))); - net::IDNToUnicode(&spec[host.begin], host.len, languages, output); + output->append(net::IDNToUnicode(&spec[host.begin], + static_cast<size_t>(host.len), languages, offset_into_host)); - if (new_parsed) { - new_parsed->host.len = - static_cast<int>(output->length()) - new_parsed->host.begin; + int new_host_len = static_cast<int>(output->length()) - new_host_begin; + if (new_parsed) + new_parsed->host.len = new_host_len; + if (offset_into_host) { + *offset_for_adjustment = (*offset_into_host == std::wstring::npos) ? + std::wstring::npos : (new_host_begin + *offset_into_host); + } else if (offset_past_current_output != std::wstring::npos) { + *offset_for_adjustment += new_host_len - host.len; } } else if (new_parsed) { new_parsed->host.reset(); @@ -1298,19 +1322,36 @@ void AppendFormattedComponent(const std::string& spec, const url_parse::Component& in_component, UnescapeRule::Type unescape_rules, std::wstring* output, - url_parse::Component* out_component) { + url_parse::Component* out_component, + size_t* offset_for_adjustment) { + DCHECK(output); + DCHECK(offset_for_adjustment); if (in_component.is_nonempty()) { out_component->begin = static_cast<int>(output->length()); + size_t offset_past_current_output = + ((*offset_for_adjustment == std::wstring::npos) || + (*offset_for_adjustment < output->length())) ? + std::wstring::npos : (*offset_for_adjustment - output->length()); + size_t* offset_into_component = + (offset_past_current_output >= static_cast<size_t>(in_component.len)) ? + NULL : &offset_past_current_output; if (unescape_rules == UnescapeRule::NONE) { - output->append(UTF8ToWide(spec.substr( - in_component.begin, in_component.len))); + output->append(UTF8ToWideAndAdjustOffset( + spec.substr(in_component.begin, in_component.len), + offset_into_component)); } else { output->append(UnescapeAndDecodeUTF8URLComponent( - spec.substr(in_component.begin, in_component.len), - unescape_rules)); + spec.substr(in_component.begin, in_component.len), unescape_rules, + offset_into_component)); } out_component->len = static_cast<int>(output->length()) - out_component->begin; + if (offset_into_component) { + *offset_for_adjustment = (*offset_into_component == std::wstring::npos) ? + std::wstring::npos : (out_component->begin + *offset_into_component); + } else if (offset_past_current_output != std::wstring::npos) { + *offset_for_adjustment += out_component->len - in_component.len; + } } else { out_component->reset(); } @@ -1321,10 +1362,14 @@ std::wstring FormatUrl(const GURL& url, bool omit_username_password, UnescapeRule::Type unescape_rules, url_parse::Parsed* new_parsed, - size_t* prefix_end) { + size_t* prefix_end, + size_t* offset_for_adjustment) { url_parse::Parsed parsed_temp; if (!new_parsed) new_parsed = &parsed_temp; + size_t offset_temp = std::wstring::npos; + if (!offset_for_adjustment) + offset_for_adjustment = &offset_temp; std::wstring url_string; @@ -1332,6 +1377,7 @@ std::wstring FormatUrl(const GURL& url, if (url.is_empty()) { if (prefix_end) *prefix_end = 0; + *offset_for_adjustment = std::wstring::npos; return url_string; } @@ -1343,19 +1389,22 @@ std::wstring FormatUrl(const GURL& url, if (url.SchemeIs(kViewSource) && !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { return FormatViewSourceUrl(url, languages, omit_username_password, - unescape_rules, new_parsed, prefix_end); + unescape_rules, new_parsed, prefix_end, offset_for_adjustment); } // We handle both valid and invalid URLs (this will give us the spec // regardless of validity). const std::string& spec = url.possibly_invalid_spec(); const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); + if (*offset_for_adjustment >= spec.length()) + *offset_for_adjustment = std::wstring::npos; // Copy everything before the username (the scheme and the separators.) // These are ASCII. - int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true); - for (int i = 0; i < pre_end; ++i) - url_string.push_back(spec[i]); + std::copy(spec.begin(), + spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, + true), + std::back_inserter(url_string)); new_parsed->scheme = parsed.scheme; if (omit_username_password) { @@ -1364,16 +1413,41 @@ std::wstring FormatUrl(const GURL& url, // e.g. "http://google.com:search@evil.ru/" new_parsed->username.reset(); new_parsed->password.reset(); + if ((*offset_for_adjustment != std::wstring::npos) && + (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { + if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { + // The seeming off-by-one and off-by-two in these first two lines are to + // account for the ':' after the username and '@' after the password. + if (*offset_for_adjustment > + static_cast<size_t>(parsed.password.end())) { + *offset_for_adjustment -= + (parsed.username.len + parsed.password.len + 2); + } else if (*offset_for_adjustment > + static_cast<size_t>(parsed.username.begin)) { + *offset_for_adjustment = std::wstring::npos; + } + } else { + const url_parse::Component* nonempty_component = + parsed.username.is_nonempty() ? &parsed.username : &parsed.password; + // The seeming off-by-one in these first two lines is to account for the + // '@' after the username/password. + if (*offset_for_adjustment > + static_cast<size_t>(nonempty_component->end())) { + *offset_for_adjustment -= (nonempty_component->len + 1); + } else if (*offset_for_adjustment > + static_cast<size_t>(nonempty_component->begin)) { + *offset_for_adjustment = std::wstring::npos; + } + } + } } else { - AppendFormattedComponent( - spec, parsed.username, unescape_rules, - &url_string, &new_parsed->username); + AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, + &new_parsed->username, offset_for_adjustment); if (parsed.password.is_valid()) { url_string.push_back(':'); } - AppendFormattedComponent( - spec, parsed.password, unescape_rules, - &url_string, &new_parsed->password); + AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, + &new_parsed->password, offset_for_adjustment); if (parsed.username.is_valid() || parsed.password.is_valid()) { url_string.push_back('@'); } @@ -1381,39 +1455,56 @@ std::wstring FormatUrl(const GURL& url, if (prefix_end) *prefix_end = static_cast<size_t>(url_string.length()); - AppendFormattedHost(url, languages, &url_string, new_parsed); + AppendFormattedHost(url, languages, &url_string, new_parsed, + offset_for_adjustment); // Port. if (parsed.port.is_nonempty()) { url_string.push_back(':'); - int begin = url_string.length(); - for (int i = parsed.port.begin; i < parsed.port.end(); ++i) - url_string.push_back(spec[i]); - new_parsed->port.begin = begin; - new_parsed->port.len = url_string.length() - begin; + new_parsed->port.begin = url_string.length(); + std::copy(spec.begin() + parsed.port.begin, + spec.begin() + parsed.port.end(), std::back_inserter(url_string)); + new_parsed->port.len = url_string.length() - new_parsed->port.begin; } else { new_parsed->port.reset(); } // Path and query both get the same general unescape & convert treatment. - AppendFormattedComponent( - spec, parsed.path, unescape_rules, &url_string, - &new_parsed->path); + AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, + &new_parsed->path, offset_for_adjustment); if (parsed.query.is_valid()) url_string.push_back('?'); - AppendFormattedComponent( - spec, parsed.query, unescape_rules, &url_string, - &new_parsed->query); + AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, + &new_parsed->query, offset_for_adjustment); // Reference is stored in valid, unescaped UTF-8, so we can just convert. if (parsed.ref.is_valid()) { url_string.push_back('#'); - int begin = url_string.length(); - if (parsed.ref.len > 0) - url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin], - parsed.ref.len))); - new_parsed->ref.begin = begin; - new_parsed->ref.len = url_string.length() - begin; + new_parsed->ref.begin = url_string.length(); + size_t offset_past_current_output = + ((*offset_for_adjustment == std::wstring::npos) || + (*offset_for_adjustment < url_string.length())) ? + std::wstring::npos : (*offset_for_adjustment - url_string.length()); + size_t* offset_into_ref = + (offset_past_current_output >= static_cast<size_t>(parsed.ref.len)) ? + NULL : &offset_past_current_output; + if (parsed.ref.len > 0) { + url_string.append(UTF8ToWideAndAdjustOffset(spec.substr(parsed.ref.begin, + parsed.ref.len), + offset_into_ref)); + } + new_parsed->ref.len = url_string.length() - new_parsed->ref.begin; + if (offset_into_ref) { + *offset_for_adjustment = (*offset_into_ref == std::wstring::npos) ? + std::wstring::npos : (new_parsed->ref.begin + *offset_into_ref); + } else if (offset_past_current_output != std::wstring::npos) { + // We clamped the offset near the beginning of this function to ensure it + // was within the input URL. If we reach here, the input was something + // invalid and non-parseable such that the offset was past any component + // we could figure out. In this case it won't be represented in the + // output string, so reset it. + *offset_for_adjustment = std::wstring::npos; + } } return url_string; diff --git a/net/base/net_util.h b/net/base/net_util.h index 1f1516f..d9affe6 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -129,10 +129,9 @@ std::string GetHeaderParamValue(const std::string& field, std::string GetFileNameFromCD(const std::string& header, const std::string& referrer_charset); -// Converts the given host name to unicode characters, APPENDING them to the -// the given output string. This can be called for any host name, if the -// input is not IDN or is invalid in some way, we'll just append the ASCII -// source to the output so it is still usable. +// Converts the given host name to unicode characters. This can be called for +// any host name, if the input is not IDN or is invalid in some way, we'll just +// return the ASCII source so it is still usable. // // The input should be the canonicalized ASCII host name from GURL. This // function does NOT accept UTF-8! Its length must also be given (this is @@ -146,10 +145,16 @@ std::string GetFileNameFromCD(const std::string& header, // Latin letters in the ASCII range can be mixed with a limited set of // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). // When |languages| is empty, even that mixing is not allowed. -void IDNToUnicode(const char* host, - int host_len, - const std::wstring& languages, - std::wstring* out); +// +// |offset_for_adjustment| is an offset into |host|, which will be adjusted to +// point at the same logical place in the output string. If this isn't possible +// because it points past the end of |host| or into the middle of a punycode +// sequence, it will be set to std::wstring::npos. |offset_for_adjustment| may +// be NULL. +std::wstring IDNToUnicode(const char* host, + size_t host_len, + const std::wstring& languages, + size_t* offset_for_adjustment); // Canonicalizes |host| and returns it. Also fills |host_info| with // IP address information. |host_info| must not be NULL. @@ -228,31 +233,47 @@ int SetNonBlocking(int fd); // the user. The given parsed structure will be updated. The host name formatter // also takes the same accept languages component as ElideURL. |new_parsed| may // be null. -void AppendFormattedHost(const GURL& url, const std::wstring& languages, - std::wstring* output, url_parse::Parsed* new_parsed); - -// Creates a string representation of |url|. The IDN host name may -// be in Unicode if |languages| accepts the Unicode representation. -// If |omit_username_password| is true, the username and the password are -// omitted. |unescape_rules| defines how to clean the URL for human readability. +void AppendFormattedHost(const GURL& url, + const std::wstring& languages, + std::wstring* output, + url_parse::Parsed* new_parsed, + size_t* offset_for_adjustment); + +// Creates a string representation of |url|. The IDN host name may be in Unicode +// if |languages| accepts the Unicode representation. If +// |omit_username_password| is true, any username and password are removed. +// |unescape_rules| defines how to clean the URL for human readability. // You will generally want |UnescapeRule::SPACES| for display to the user if you // can handle spaces, or |UnescapeRule::NORMAL| if not. If the path part and the // query part seem to be encoded in %-encoded UTF-8, decodes %-encoding and -// UTF-8. |new_parsed| will have parsing parameters of the resultant URL. +// UTF-8. +// +// The last three parameters may be NULL. +// |new_parsed| will be set to the parsing parameters of the resultant URL. // |prefix_end| will be the length before the hostname of the resultant URL. -// |new_parsed| and |prefix_end| may be NULL. +// |offset_for_adjustment| is an offset into the original |url|'s spec(), which +// will be modified to reflect changes this function makes to the output string; +// for example, if |url| is "http://a:b@c.com/", |omit_username_password| is +// true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return +// the output string will be "http://c.com/" and |offset_for_adjustment| will be +// 8. If the offset cannot be successfully adjusted (e.g. because it points +// into the middle of a component that was entirely removed, past the end of the +// string, or into the middle of an encoding sequence), it will be set to +// std::wstring::npos. std::wstring FormatUrl(const GURL& url, const std::wstring& languages, bool omit_username_password, UnescapeRule::Type unescape_rules, url_parse::Parsed* new_parsed, - size_t* prefix_end); + size_t* prefix_end, + size_t* offset_for_adjustment); // Creates a string representation of |url| for display to the user. // This is a shorthand of the above function with omit_username_password=true, // unescape=SPACES, new_parsed=NULL, and prefix_end=NULL. inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) { - return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL); + return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL, + NULL); } // Strip the portions of |url| that aren't core to the network request. diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index 07ec17c..308ef80 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -345,6 +345,11 @@ const IDNTestCase idn_cases[] = { #endif }; +struct AdjustOffsetCase { + size_t input_offset; + size_t output_offset; +}; + struct CompliantHostCase { const char* host; bool expected_output; @@ -782,14 +787,10 @@ TEST(NetUtilTest, IDNToUnicodeFast) { // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow if (j == 3 || j == 17 || j == 18) continue; - std::wstring output; - net::IDNToUnicode(idn_cases[i].input, - static_cast<int>(strlen(idn_cases[i].input)), - kLanguages[j], - &output); + std::wstring output(net::IDNToUnicode(idn_cases[i].input, + strlen(idn_cases[i].input), kLanguages[j], NULL)); std::wstring expected(idn_cases[i].unicode_allowed[j] ? - idn_cases[i].unicode_output : - ASCIIToWide(idn_cases[i].input)); + idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input)); AppendLanguagesToOutputs(kLanguages[j], &expected, &output); EXPECT_EQ(expected, output); } @@ -802,20 +803,43 @@ TEST(NetUtilTest, IDNToUnicodeSlow) { // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast if (!(j == 3 || j == 17 || j == 18)) continue; - std::wstring output; - net::IDNToUnicode(idn_cases[i].input, - static_cast<int>(strlen(idn_cases[i].input)), - kLanguages[j], - &output); + std::wstring output(net::IDNToUnicode(idn_cases[i].input, + strlen(idn_cases[i].input), kLanguages[j], NULL)); std::wstring expected(idn_cases[i].unicode_allowed[j] ? - idn_cases[i].unicode_output : - ASCIIToWide(idn_cases[i].input)); + idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input)); AppendLanguagesToOutputs(kLanguages[j], &expected, &output); EXPECT_EQ(expected, output); } } } +TEST(NetUtilTest, IDNToUnicodeAdjustOffset) { + const AdjustOffsetCase adjust_cases[] = { + {0, 0}, + {2, 2}, + {4, 4}, + {5, 5}, + {6, std::wstring::npos}, + {16, std::wstring::npos}, + {17, 7}, + {18, 8}, + {19, std::wstring::npos}, + {25, std::wstring::npos}, + {34, 12}, + {35, 13}, + {38, 16}, + {39, std::wstring::npos}, + {std::wstring::npos, std::wstring::npos}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(adjust_cases); ++i) { + size_t offset = adjust_cases[i].input_offset; + // "test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test" + net::IDNToUnicode("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39, L"zh-CN", + &offset); + EXPECT_EQ(adjust_cases[i].output_offset, offset); + } +} + TEST(NetUtilTest, CompliantHost) { const CompliantHostCase compliant_host_cases[] = { {"", false}, @@ -1328,7 +1352,7 @@ TEST(NetUtilTest, FormatUrl) { size_t prefix_len; std::wstring formatted = net::FormatUrl( GURL(tests[i].input), tests[i].languages, tests[i].omit, - tests[i].escape_rules, NULL, &prefix_len); + tests[i].escape_rules, NULL, &prefix_len, NULL); EXPECT_EQ(tests[i].output, formatted) << tests[i].description; EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; } @@ -1340,7 +1364,7 @@ TEST(NetUtilTest, FormatUrlParsed) { std::wstring formatted = net::FormatUrl( GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - L"ja", false, UnescapeRule::NONE, &parsed, NULL); + L"ja", false, UnescapeRule::NONE, &parsed, NULL, NULL); EXPECT_EQ(L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" L"/%E3%82%B0/?q=%E3%82%B0#\x30B0", formatted); EXPECT_EQ(L"%E3%82%B0", @@ -1360,7 +1384,7 @@ TEST(NetUtilTest, FormatUrlParsed) { formatted = net::FormatUrl( GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - L"ja", false, UnescapeRule::NORMAL, &parsed, NULL); + L"ja", false, UnescapeRule::NORMAL, &parsed, NULL, NULL); EXPECT_EQ(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" L"/\x30B0/?q=\x30B0#\x30B0", formatted); EXPECT_EQ(L"\x30B0", @@ -1379,7 +1403,7 @@ TEST(NetUtilTest, FormatUrlParsed) { formatted = net::FormatUrl( GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - L"ja", true, UnescapeRule::NORMAL, &parsed, NULL); + L"ja", true, UnescapeRule::NORMAL, &parsed, NULL, NULL); EXPECT_EQ(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" L"/\x30B0/?q=\x30B0#\x30B0", formatted); EXPECT_FALSE(parsed.username.is_valid()); @@ -1395,7 +1419,7 @@ TEST(NetUtilTest, FormatUrlParsed) { // View-source case. formatted = net::FormatUrl( GURL("view-source:http://user:passwd@host:81/path?query#ref"), - L"", true, UnescapeRule::NORMAL, &parsed, NULL); + L"", true, UnescapeRule::NORMAL, &parsed, NULL, NULL); EXPECT_EQ(L"view-source:http://host:81/path?query#ref", formatted); EXPECT_EQ(L"view-source:http", formatted.substr(parsed.scheme.begin, parsed.scheme.len)); @@ -1408,6 +1432,124 @@ TEST(NetUtilTest, FormatUrlParsed) { EXPECT_EQ(L"ref", formatted.substr(parsed.ref.begin, parsed.ref.len)); } +TEST(NetUtilTest, FormatUrlAdjustOffset) { + const AdjustOffsetCase basic_cases[] = { + {0, 0}, + {3, 3}, + {5, 5}, + {6, 6}, + {13, 13}, + {21, 21}, + {22, 22}, + {23, 23}, + {25, 25}, + {26, std::wstring::npos}, + {500000, std::wstring::npos}, + {std::wstring::npos, std::wstring::npos}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(basic_cases); ++i) { + size_t offset = basic_cases[i].input_offset; + net::FormatUrl(GURL("http://www.google.com/foo/"), L"en", true, + UnescapeRule::NORMAL, NULL, NULL, &offset); + EXPECT_EQ(basic_cases[i].output_offset, offset); + } + + const struct { + const char* input_url; + size_t input_offset; + size_t output_offset; + } omit_auth_cases[] = { + {"http://foo:bar@www.google.com/", 6, 6}, + {"http://foo:bar@www.google.com/", 7, 7}, + {"http://foo:bar@www.google.com/", 8, std::wstring::npos}, + {"http://foo:bar@www.google.com/", 10, std::wstring::npos}, + {"http://foo:bar@www.google.com/", 11, std::wstring::npos}, + {"http://foo:bar@www.google.com/", 14, std::wstring::npos}, + {"http://foo:bar@www.google.com/", 15, 7}, + {"http://foo:bar@www.google.com/", 25, 17}, + {"http://foo@www.google.com/", 9, std::wstring::npos}, + {"http://foo@www.google.com/", 11, 7}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_auth_cases); ++i) { + size_t offset = omit_auth_cases[i].input_offset; + net::FormatUrl(GURL(omit_auth_cases[i].input_url), L"en", true, + UnescapeRule::NORMAL, NULL, NULL, &offset); + EXPECT_EQ(omit_auth_cases[i].output_offset, offset); + } + + const AdjustOffsetCase view_source_cases[] = { + {0, 0}, + {3, 3}, + {11, 11}, + {12, 12}, + {13, 13}, + {19, 19}, + {20, std::wstring::npos}, + {23, 19}, + {26, 22}, + {std::wstring::npos, std::wstring::npos}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(view_source_cases); ++i) { + size_t offset = view_source_cases[i].input_offset; + net::FormatUrl(GURL("view-source:http://foo@www.google.com/"), L"en", true, + UnescapeRule::NORMAL, NULL, NULL, &offset); + EXPECT_EQ(view_source_cases[i].output_offset, offset); + } + + const AdjustOffsetCase idn_hostname_cases[] = { + {8, std::wstring::npos}, + {16, std::wstring::npos}, + {24, std::wstring::npos}, + {25, 12}, + {30, 17}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_hostname_cases); ++i) { + size_t offset = idn_hostname_cases[i].input_offset; + // "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/" + net::FormatUrl(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), L"ja", true, + UnescapeRule::NORMAL, NULL, NULL, &offset); + EXPECT_EQ(idn_hostname_cases[i].output_offset, offset); + } + + const AdjustOffsetCase unescape_cases[] = { + {25, 25}, + {26, std::wstring::npos}, + {27, std::wstring::npos}, + {28, 26}, + {35, std::wstring::npos}, + {41, 31}, + {59, 33}, + {60, std::wstring::npos}, + {67, std::wstring::npos}, + {68, std::wstring::npos}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(unescape_cases); ++i) { + size_t offset = unescape_cases[i].input_offset; + // "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB" + net::FormatUrl(GURL( + "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"), + L"en", true, UnescapeRule::SPACES, NULL, NULL, &offset); + EXPECT_EQ(unescape_cases[i].output_offset, offset); + } + + const AdjustOffsetCase ref_cases[] = { + {30, 30}, + {31, 31}, + {32, std::wstring::npos}, + {34, 32}, + {37, 33}, + {38, std::wstring::npos}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(ref_cases); ++i) { + size_t offset = ref_cases[i].input_offset; + // "http://www.google.com/foo.html#\x30B0\x30B0z" + net::FormatUrl(GURL( + "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), L"en", + true, UnescapeRule::NORMAL, NULL, NULL, &offset); + EXPECT_EQ(ref_cases[i].output_offset, offset); + } +} + TEST(NetUtilTest, SimplifyUrlForRequest) { struct { const char* input_url; @@ -1466,4 +1608,3 @@ TEST(NetUtilTest, SetExplicitlyAllowedPortsTest) { EXPECT_EQ(i, net::explicitly_allowed_ports.size()); } } - |