diff options
author | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-13 00:45:39 +0000 |
---|---|---|
committer | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-13 00:45:39 +0000 |
commit | a47f8eadd67f75d3b663fdcc898caabb335bad0b (patch) | |
tree | fdf872770d4cd58ee753f219475850490a008f6d /net | |
parent | 2e0e8253a232fa499d22e47753c5bbadaebd69e7 (diff) | |
download | chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.zip chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.gz chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.bz2 |
Add multiple-offset versions of the various URL reformatting functions. Fixed a couple of erroneous unit tests of offsets into username/password.
Note: This does not complete the work required for 78153 -- tis but the first 2/3rds.
BUG=78153
TEST=Many unit tests updated and added.
Review URL: http://codereview.chromium.org/6822038
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@81343 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r-- | net/base/escape.cc | 117 | ||||
-rw-r--r-- | net/base/escape.h | 31 | ||||
-rw-r--r-- | net/base/escape_unittest.cc | 39 | ||||
-rw-r--r-- | net/base/net_util.cc | 426 | ||||
-rw-r--r-- | net/base/net_util.h | 59 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 166 |
6 files changed, 636 insertions, 202 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc index 64bd107..61c3e81 100644 --- a/net/base/escape.cc +++ b/net/base/escape.cc @@ -2,11 +2,12 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include <algorithm> - #include "net/base/escape.h" +#include <algorithm> + #include "base/logging.h" +#include "base/scoped_ptr.h" #include "base/string_piece.h" #include "base/string_util.h" #include "base/utf_string_conversions.h" @@ -98,15 +99,14 @@ const char kUrlUnescape[128] = { }; template<typename STR> -STR UnescapeURLImpl(const STR& escaped_text, - UnescapeRule::Type rules, - size_t* offset_for_adjustment) { - size_t offset_temp = string16::npos; - if (!offset_for_adjustment) - offset_for_adjustment = &offset_temp; - else if (*offset_for_adjustment >= escaped_text.length()) - *offset_for_adjustment = string16::npos; - +STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, + UnescapeRule::Type rules, + std::vector<size_t>* offsets_for_adjustment) { + if (offsets_for_adjustment) { + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + LimitOffset<std::wstring>(escaped_text.length())); + } // Do not unescape anything, return the |escaped_text| text. if (rules == UnescapeRule::NONE) return escaped_text; @@ -117,6 +117,7 @@ STR UnescapeURLImpl(const STR& escaped_text, STR result; result.reserve(escaped_text.length()); + AdjustEncodingOffset::Adjustments adjustments; // Locations of adjusted text. for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { if (static_cast<unsigned char>(escaped_text[i]) >= 128) { // Non ASCII character, append as is. @@ -144,17 +145,9 @@ STR UnescapeURLImpl(const STR& escaped_text, // Additionally allow control characters if requested. (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { // Use the unescaped version of the character. - size_t length_before_append = result.length(); + adjustments.push_back(i); result.push_back(value); i += 2; - - // Adjust offset to match length change. - if (*offset_for_adjustment != std::string::npos) { - if (*offset_for_adjustment > (length_before_append + 2)) - *offset_for_adjustment -= 2; - else if (*offset_for_adjustment > length_before_append) - *offset_for_adjustment = std::string::npos; - } } else { // Keep escaped. Append a percent and we'll get the following two // digits on the next loops through. @@ -174,6 +167,26 @@ STR UnescapeURLImpl(const STR& escaped_text, } } + // Make offset adjustment. + if (offsets_for_adjustment && !adjustments.empty()) { + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + AdjustEncodingOffset(adjustments)); + } + + return result; +} + +template<typename STR> +STR UnescapeURLImpl(const STR& escaped_text, + UnescapeRule::Type rules, + size_t* offset_for_adjustment) { + std::vector<size_t> offsets; + if (offset_for_adjustment) + offsets.push_back(*offset_for_adjustment); + STR result = UnescapeURLWithOffsetsImpl(escaped_text, rules, &offsets); + if (offset_for_adjustment) + *offset_for_adjustment = offsets[0]; return result; } @@ -234,33 +247,49 @@ std::string EscapeExternalHandlerValue(const std::string& text) { return Escape(text, kExternalHandlerCharmap, false); } -string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, - UnescapeRule::Type rules, - size_t* offset_for_adjustment) { +string16 UnescapeAndDecodeUTF8URLComponentWithOffsets( + const std::string& text, + UnescapeRule::Type rules, + std::vector<size_t>* offsets_for_adjustment) { std::wstring result; - size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0; + std::vector<size_t> original_offsets; + if (offsets_for_adjustment) + original_offsets = *offsets_for_adjustment; std::string unescaped_url( - UnescapeURLImpl(text, rules, offset_for_adjustment)); - if (UTF8ToWideAndAdjustOffset(unescaped_url.data(), unescaped_url.length(), - &result, offset_for_adjustment)) + UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment)); + if (UTF8ToWideAndAdjustOffsets(unescaped_url.data(), unescaped_url.length(), + &result, offsets_for_adjustment)) return WideToUTF16Hack(result); // Character set looks like it's valid. // Not valid. Return the escaped version. Undo our changes to // |offset_for_adjustment| since we haven't changed the string after all. + if (offsets_for_adjustment) + *offsets_for_adjustment = original_offsets; + return WideToUTF16Hack(UTF8ToWideAndAdjustOffsets( + text, offsets_for_adjustment)); +} + +string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, + UnescapeRule::Type rules, + size_t* offset_for_adjustment) { + std::vector<size_t> offsets; + if (offset_for_adjustment) + offsets.push_back(*offset_for_adjustment); + string16 result = + UnescapeAndDecodeUTF8URLComponentWithOffsets(text, rules, &offsets); if (offset_for_adjustment) - *offset_for_adjustment = original_offset; - return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text, - offset_for_adjustment)); + *offset_for_adjustment = offsets[0]; + return result; } std::string UnescapeURLComponent(const std::string& escaped_text, UnescapeRule::Type rules) { - return UnescapeURLImpl(escaped_text, rules, NULL); + return UnescapeURLWithOffsetsImpl<std::string>(escaped_text, rules, NULL); } string16 UnescapeURLComponent(const string16& escaped_text, UnescapeRule::Type rules) { - return UnescapeURLImpl(escaped_text, rules, NULL); + return UnescapeURLWithOffsetsImpl<string16>(escaped_text, rules, NULL); } @@ -350,3 +379,27 @@ string16 UnescapeForHTML(const string16& input) { } return text; } + +AdjustEncodingOffset::AdjustEncodingOffset(const Adjustments& adjustments) + : adjustments(adjustments) {} + +void AdjustEncodingOffset::operator()(size_t& offset) { + // For each encoded character occurring before an offset subtract 2. + if (offset == string16::npos) + return; + size_t adjusted_offset = offset; + for (Adjustments::const_iterator i = adjustments.begin(); + i != adjustments.end(); ++i) { + size_t location = *i; + if (offset <= location) { + offset = adjusted_offset; + return; + } + if (offset <= (location + 2)) { + offset = string16::npos; + return; + } + adjusted_offset -= 2; + } + offset = adjusted_offset; +} diff --git a/net/base/escape.h b/net/base/escape.h index faa7bd3..f4c99a3 100644 --- a/net/base/escape.h +++ b/net/base/escape.h @@ -7,6 +7,7 @@ #pragma once #include <string> +#include <vector> #include "base/basictypes.h" #include "base/string16.h" @@ -99,15 +100,20 @@ string16 UnescapeURLComponent(const string16& escaped_text, // Unescapes the given substring as a URL, and then tries to interpret the // result as being encoded as UTF-8. If the result is convertable into UTF-8, it // will be returned as converted. If it is not, the original escaped string will -// be converted into a string16 and returned. -// -// |offset_for_adjustment| may be NULL; if not, it is an offset into |text| that -// will be adjusted to point at the same logical place in the result string. If -// this isn't possible because it points into the middle of an escape sequence -// or past the end of the string, it will be set to string16::npos. +// be converted into a string16 and returned. (|offset[s]_for_adjustment|) +// specifies one or more offsets into the source strings; each offset will be +// adjusted to point at the same logical place in the result strings during +// decoding. If this isn't possible because an offset points past the end of +// the source strings or into the middle of a multibyte sequence, the offending +// offset will be set to std::wstring::npos. |offset[s]_for_adjustment| may be +// NULL. string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, UnescapeRule::Type rules, size_t* offset_for_adjustment); +string16 UnescapeAndDecodeUTF8URLComponentWithOffsets( + const std::string& text, + UnescapeRule::Type rules, + std::vector<size_t>* offsets_for_adjustment); // Unescape the following ampersand character codes from |text|: // < > & " ' @@ -129,4 +135,17 @@ bool EscapeQueryParamValue(const string16& text, const char* codepage, // assumes the codepage is UTF8. This is provided as a convenience. string16 EscapeQueryParamValueUTF8(const string16& text, bool use_plus); +// Private Functions (Exposed for Unit Testing) -------------------------------- + +// A function called by std::for_each that will adjust any offset which occurs +// after one or more encoded characters. +struct AdjustEncodingOffset { + typedef std::vector<size_t> Adjustments; + + explicit AdjustEncodingOffset(const Adjustments& adjustments); + void operator()(size_t& offset); + + const Adjustments& adjustments; +}; + #endif // NET_BASE_ESCAPE_H_ diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc index 60d4ae3..3a8d895 100644 --- a/net/base/escape_unittest.cc +++ b/net/base/escape_unittest.cc @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include <algorithm> #include <string> #include "net/base/escape.h" @@ -15,6 +16,8 @@ namespace { +static const size_t kNpos = string16::npos; + struct EscapeCase { const wchar_t* input; const wchar_t* output; @@ -396,3 +399,39 @@ TEST(EscapeTest, UnescapeForHTML) { EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result); } } + +TEST(EscapeTest, AdjustEncodingOffset) { + // Imagine we have strings as shown in the following cases where the + // %XX's represent encoded characters + + // 1: abc%ECdef ==> abcXdef + std::vector<size_t> offsets; + for (size_t t = 0; t < 9; ++t) + offsets.push_back(t); + AdjustEncodingOffset::Adjustments adjustments; + adjustments.push_back(3); + std::for_each(offsets.begin(), offsets.end(), + AdjustEncodingOffset(adjustments)); + size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6}; + EXPECT_EQ(offsets.size(), arraysize(expected_1)); + for (size_t i = 0; i < arraysize(expected_1); ++i) + EXPECT_EQ(expected_1[i], offsets[i]); + + + // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX + offsets.clear(); + for (size_t t = 0; t < 18; ++t) + offsets.push_back(t); + adjustments.clear(); + adjustments.push_back(0); + adjustments.push_back(6); + adjustments.push_back(9); + adjustments.push_back(15); + std::for_each(offsets.begin(), offsets.end(), + AdjustEncodingOffset(adjustments)); + size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos, + kNpos, 6, 7, 8, 9, kNpos, kNpos}; + EXPECT_EQ(offsets.size(), arraysize(expected_2)); + for (size_t i = 0; i < arraysize(expected_2); ++i) + EXPECT_EQ(expected_2[i], offsets[i]); +} diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 1aaa98b..378ac7b 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -67,7 +67,6 @@ #endif #include "unicode/datefmt.h" - using base::Time; namespace net { @@ -734,6 +733,80 @@ bool IDNToUnicodeOneComponent(const char16* comp, return false; } +struct SubtractFromOffset { + explicit SubtractFromOffset(size_t amount) + : amount(amount) {} + void operator()(size_t& offset) { + if (offset != std::wstring::npos) + if (offset >= amount) + offset -= amount; + else + offset = std::wstring::npos; + } + + size_t amount; +}; + +struct AddToOffset { + explicit AddToOffset(size_t amount) + : amount(amount) {} + void operator()(size_t& offset) { + if (offset != std::wstring::npos) + offset += amount; + } + + size_t amount; +}; + +std::vector<size_t> OffsetsIntoSection( + std::vector<size_t>* offsets_for_adjustment, + size_t section_begin) { + std::vector<size_t> offsets_into_section; + if (offsets_for_adjustment) { + std::transform(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + std::back_inserter(offsets_into_section), + ClampComponentOffset(section_begin)); + std::for_each(offsets_into_section.begin(), offsets_into_section.end(), + SubtractFromOffset(section_begin)); + } + return offsets_into_section; +} + +void ApplySectionAdjustments(const std::vector<size_t>& offsets_into_section, + std::vector<size_t>* offsets_for_adjustment, + size_t old_section_len, + size_t new_section_len, + size_t section_begin) { + if (offsets_for_adjustment) { + DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size()); + std::vector<size_t>::const_iterator host_offsets_iter = + offsets_into_section.begin(); + for (std::vector<size_t>::iterator offsets_iter = + offsets_for_adjustment->begin(); + offsets_iter != offsets_for_adjustment->end(); + ++offsets_iter, ++host_offsets_iter) { + size_t offset = *offsets_iter; + if (offset == std::wstring::npos || offset < section_begin) { + // The offset is before the host section so leave it as is. + continue; + } + if (offset >= section_begin + old_section_len) { + // The offset is after the host section so adjust by host length delta. + offset += new_section_len - old_section_len; + } else if (*host_offsets_iter != std::wstring::npos) { + // The offset is within the host and valid so adjust by the host + // reformatting offsets results. + offset = section_begin + *host_offsets_iter; + } else { + // The offset is invalid. + offset = std::wstring::npos; + } + *offsets_iter = offset; + } + } +} + // If |component| is valid, its begin is incremented by |delta|. void AdjustComponent(int delta, url_parse::Component* component) { if (!component->is_valid()) @@ -760,7 +833,7 @@ std::wstring FormatUrlInternal(const GURL& url, UnescapeRule::Type unescape_rules, url_parse::Parsed* new_parsed, size_t* prefix_end, - size_t* offset_for_adjustment); + std::vector<size_t>* offsets_for_adjustment); // Helper for FormatUrl()/FormatUrlInternal(). std::wstring FormatViewSourceUrl(const GURL& url, @@ -769,18 +842,20 @@ std::wstring FormatViewSourceUrl(const GURL& url, UnescapeRule::Type unescape_rules, url_parse::Parsed* new_parsed, size_t* prefix_end, - size_t* offset_for_adjustment) { + std::vector<size_t>* offsets_for_adjustment) { DCHECK(new_parsed); + DCHECK(offsets_for_adjustment); const wchar_t* const kWideViewSource = L"view-source:"; const size_t kViewSourceLengthPlus1 = 12; + std::vector<size_t> saved_offsets(*offsets_for_adjustment); GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); - size_t temp_offset = (*offset_for_adjustment == std::wstring::npos) ? - std::wstring::npos : (*offset_for_adjustment - kViewSourceLengthPlus1); - size_t* temp_offset_ptr = (*offset_for_adjustment < kViewSourceLengthPlus1) ? - NULL : &temp_offset; + // Clamp the offsets to the source area. + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + SubtractFromOffset(kViewSourceLengthPlus1)); std::wstring result = FormatUrlInternal(real_url, languages, format_types, - unescape_rules, new_parsed, prefix_end, temp_offset_ptr); + unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); result.insert(0, kWideViewSource); // Adjust position values. @@ -794,57 +869,61 @@ std::wstring FormatViewSourceUrl(const GURL& url, AdjustComponents(kViewSourceLengthPlus1, new_parsed); if (prefix_end) *prefix_end += kViewSourceLengthPlus1; - if (temp_offset_ptr) { - *offset_for_adjustment = (temp_offset == std::wstring::npos) ? - std::wstring::npos : (temp_offset + kViewSourceLengthPlus1); + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + AddToOffset(kViewSourceLengthPlus1)); + // Restore all offsets which were not affected by FormatUrlInternal. + DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size()); + for (size_t i = 0; i < saved_offsets.size(); ++i) { + if (saved_offsets[i] < kViewSourceLengthPlus1) + (*offsets_for_adjustment)[i] = saved_offsets[i]; } return result; } // Appends the substring |in_component| inside of the URL |spec| to |output|, // and the resulting range will be filled into |out_component|. |unescape_rules| -// defines how to clean the URL for human readability. |offset_for_adjustment| -// is an offset into |output| which will be adjusted based on how it maps to the -// component being converted; if it is less than output->length(), it will be -// untouched, and if it is greater than output->length() + in_component.len it -// will be shortened by the difference in lengths between the input and output -// components. Otherwise it points into the component being converted, and is -// adjusted to point to the same logical place in |output|. -// |offset_for_adjustment| may not be NULL. +// defines how to clean the URL for human readability. |offsets_for_adjustment| +// is an array of offsets into |output| each of which will be adjusted based on +// how it maps to the component being converted; if it is less than +// output->length(), it will be untouched, and if it is greater than +// output->length() + in_component.len it will be adjusted by the difference in +// lengths between the input and output components. Otherwise it points into +// the component being converted, and is adjusted to point to the same logical +// place in |output|. |offsets_for_adjustment| may not be NULL. void AppendFormattedComponent(const std::string& spec, const url_parse::Component& in_component, UnescapeRule::Type unescape_rules, std::wstring* output, url_parse::Component* out_component, - size_t* offset_for_adjustment) { + std::vector<size_t>* offsets_for_adjustment) { DCHECK(output); - DCHECK(offset_for_adjustment); + DCHECK(offsets_for_adjustment); if (in_component.is_nonempty()) { - out_component->begin = static_cast<int>(output->length()); - size_t offset_past_current_output = - ((*offset_for_adjustment == std::wstring::npos) || - (*offset_for_adjustment < output->length())) ? - std::wstring::npos : (*offset_for_adjustment - output->length()); - size_t* offset_into_component = - (offset_past_current_output >= static_cast<size_t>(in_component.len)) ? - NULL : &offset_past_current_output; + size_t component_begin = output->length(); + out_component->begin = static_cast<int>(component_begin); + + // Compose a list of offsets within the component area. + std::vector<size_t> offsets_into_component = + OffsetsIntoSection(offsets_for_adjustment, component_begin); + if (unescape_rules == UnescapeRule::NONE) { - output->append(UTF8ToWideAndAdjustOffset( + output->append(UTF8ToWideAndAdjustOffsets( spec.substr(in_component.begin, in_component.len), - offset_into_component)); + &offsets_into_component)); } else { - output->append(UTF16ToWideHack(UnescapeAndDecodeUTF8URLComponent( - spec.substr(in_component.begin, in_component.len), unescape_rules, - offset_into_component))); - } - out_component->len = - static_cast<int>(output->length()) - out_component->begin; - if (offset_into_component) { - *offset_for_adjustment = (*offset_into_component == std::wstring::npos) ? - std::wstring::npos : (out_component->begin + *offset_into_component); - } else if (offset_past_current_output != std::wstring::npos) { - *offset_for_adjustment += out_component->len - in_component.len; + output->append(UTF16ToWideHack( + UnescapeAndDecodeUTF8URLComponentWithOffsets( + spec.substr(in_component.begin, in_component.len), unescape_rules, + &offsets_into_component))); } + size_t new_component_len = output->length() - component_begin; + out_component->len = static_cast<int>(new_component_len); + + // Apply offset adjustments. + size_t old_component_len = static_cast<size_t>(in_component.len); + ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment, + old_component_len, new_component_len, component_begin); } else { out_component->reset(); } @@ -858,15 +937,16 @@ std::wstring FormatUrlInternal(const GURL& url, UnescapeRule::Type unescape_rules, url_parse::Parsed* new_parsed, size_t* prefix_end, - size_t* offset_for_adjustment) { + std::vector<size_t>* offsets_for_adjustment) { url_parse::Parsed parsed_temp; if (!new_parsed) new_parsed = &parsed_temp; else *new_parsed = url_parse::Parsed(); - size_t offset_temp = std::wstring::npos; - if (!offset_for_adjustment) - offset_for_adjustment = &offset_temp; + + std::vector<size_t> offsets_temp; + if (!offsets_for_adjustment) + offsets_for_adjustment = &offsets_temp; std::wstring url_string; @@ -874,7 +954,9 @@ std::wstring FormatUrlInternal(const GURL& url, if (url.is_empty()) { if (prefix_end) *prefix_end = 0; - *offset_for_adjustment = std::wstring::npos; + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + LimitOffset<std::wstring>(0)); return url_string; } @@ -886,15 +968,17 @@ std::wstring FormatUrlInternal(const GURL& url, if (url.SchemeIs(kViewSource) && !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { return FormatViewSourceUrl(url, languages, format_types, - unescape_rules, new_parsed, prefix_end, offset_for_adjustment); + unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); } // We handle both valid and invalid URLs (this will give us the spec // regardless of validity). const std::string& spec = url.possibly_invalid_spec(); const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); - if (*offset_for_adjustment >= spec.length()) - *offset_for_adjustment = std::wstring::npos; + size_t spec_length = spec.length(); + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + LimitOffset<std::wstring>(spec_length)); // Copy everything before the username (the scheme and the separators.) // These are ASCII. @@ -922,48 +1006,47 @@ std::wstring FormatUrlInternal(const GURL& url, // e.g. "http://google.com:search@evil.ru/" new_parsed->username.reset(); new_parsed->password.reset(); - if ((*offset_for_adjustment != std::wstring::npos) && + // Update the offsets based on removed username and/or password. + if (!offsets_for_adjustment->empty() && (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { + AdjustOffset::Adjustments adjustments; if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { // The seeming off-by-one and off-by-two in these first two lines are to // account for the ':' after the username and '@' after the password. - if (*offset_for_adjustment > - static_cast<size_t>(parsed.password.end())) { - *offset_for_adjustment -= - (parsed.username.len + parsed.password.len + 2); - } else if (*offset_for_adjustment > - static_cast<size_t>(parsed.username.begin)) { - *offset_for_adjustment = std::wstring::npos; - } + adjustments.push_back(AdjustOffset::Adjustment( + static_cast<size_t>(parsed.username.begin), + static_cast<size_t>(parsed.username.len + parsed.password.len + + 2), 0)); } else { const url_parse::Component* nonempty_component = parsed.username.is_nonempty() ? &parsed.username : &parsed.password; - // The seeming off-by-one in these first two lines is to account for the - // '@' after the username/password. - if (*offset_for_adjustment > - static_cast<size_t>(nonempty_component->end())) { - *offset_for_adjustment -= (nonempty_component->len + 1); - } else if (*offset_for_adjustment > - static_cast<size_t>(nonempty_component->begin)) { - *offset_for_adjustment = std::wstring::npos; - } + // The seeming off-by-one in below is to account for the '@' after the + // username/password. + adjustments.push_back(AdjustOffset::Adjustment( + static_cast<size_t>(nonempty_component->begin), + static_cast<size_t>(nonempty_component->len + 1), 0)); } + + // Make offset adjustment. + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + AdjustOffset(adjustments)); } } else { AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, - &new_parsed->username, offset_for_adjustment); + &new_parsed->username, offsets_for_adjustment); if (parsed.password.is_valid()) url_string.push_back(':'); AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, - &new_parsed->password, offset_for_adjustment); + &new_parsed->password, offsets_for_adjustment); if (parsed.username.is_valid() || parsed.password.is_valid()) url_string.push_back('@'); } if (prefix_end) *prefix_end = static_cast<size_t>(url_string.length()); - AppendFormattedHost(url, languages, &url_string, new_parsed, - offset_for_adjustment); + AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed, + offsets_for_adjustment); // Port. if (parsed.port.is_nonempty()) { @@ -981,41 +1064,35 @@ std::wstring FormatUrlInternal(const GURL& url, if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || !CanStripTrailingSlash(url)) { AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, - &new_parsed->path, offset_for_adjustment); + &new_parsed->path, offsets_for_adjustment); } if (parsed.query.is_valid()) url_string.push_back('?'); AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, - &new_parsed->query, offset_for_adjustment); + &new_parsed->query, offsets_for_adjustment); // Reference is stored in valid, unescaped UTF-8, so we can just convert. if (parsed.ref.is_valid()) { url_string.push_back('#'); - new_parsed->ref.begin = url_string.length(); - size_t offset_past_current_output = - ((*offset_for_adjustment == std::wstring::npos) || - (*offset_for_adjustment < url_string.length())) ? - std::wstring::npos : (*offset_for_adjustment - url_string.length()); - size_t* offset_into_ref = - (offset_past_current_output >= static_cast<size_t>(parsed.ref.len)) ? - NULL : &offset_past_current_output; + size_t ref_begin = url_string.length(); + new_parsed->ref.begin = static_cast<int>(ref_begin); + + // Compose a list of offsets within the section. + std::vector<size_t> offsets_into_ref = + OffsetsIntoSection(offsets_for_adjustment, ref_begin); + if (parsed.ref.len > 0) { - url_string.append(UTF8ToWideAndAdjustOffset(spec.substr(parsed.ref.begin, - parsed.ref.len), - offset_into_ref)); - } - new_parsed->ref.len = url_string.length() - new_parsed->ref.begin; - if (offset_into_ref) { - *offset_for_adjustment = (*offset_into_ref == std::wstring::npos) ? - std::wstring::npos : (new_parsed->ref.begin + *offset_into_ref); - } else if (offset_past_current_output != std::wstring::npos) { - // We clamped the offset near the beginning of this function to ensure it - // was within the input URL. If we reach here, the input was something - // invalid and non-parseable such that the offset was past any component - // we could figure out. In this case it won't be represented in the - // output string, so reset it. - *offset_for_adjustment = std::wstring::npos; + url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin, + parsed.ref.len), + &offsets_into_ref)); } + size_t old_ref_len = static_cast<size_t>(parsed.ref.len); + size_t new_ref_len = url_string.length() - new_parsed->ref.begin; + new_parsed->ref.len = static_cast<int>(new_ref_len); + + // Apply offset adjustments. + ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment, + old_ref_len, new_ref_len, ref_begin); } // If we need to strip out http do it after the fact. This way we don't need @@ -1023,12 +1100,11 @@ std::wstring FormatUrlInternal(const GURL& url, const size_t kHTTPSize = arraysize(kHTTP) - 1; if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) { url_string = url_string.substr(kHTTPSize); - if (*offset_for_adjustment != std::wstring::npos) { - if (*offset_for_adjustment < kHTTPSize) - *offset_for_adjustment = std::wstring::npos; - else - *offset_for_adjustment -= kHTTPSize; - } + AdjustOffset::Adjustments adjustments; + adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0)); + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + AdjustOffset(adjustments)); if (prefix_end) *prefix_end -= kHTTPSize; @@ -1186,21 +1262,20 @@ std::string GetHeaderParamValue(const std::string& field, // // We may want to skip this step in the case of file URLs to allow unicode // UNC hostnames regardless of encodings. -std::wstring IDNToUnicode(const char* host, - size_t host_len, - const std::wstring& languages, - size_t* offset_for_adjustment) { +std::wstring IDNToUnicodeWithOffsets( + const char* host, + size_t host_len, + const std::wstring& languages, + std::vector<size_t>* offsets_for_adjustment) { // Convert the ASCII input to a wide string for ICU. string16 input16; input16.reserve(host_len); input16.insert(input16.end(), host, host + host_len); - string16 out16; - size_t output_offset = offset_for_adjustment ? - *offset_for_adjustment : std::wstring::npos; - // Do each component of the host separately, since we enforce script matching // on a per-component basis. + AdjustOffset::Adjustments adjustments; + string16 out16; for (size_t component_start = 0, component_end; component_start < input16.length(); component_start = component_end + 1) { @@ -1209,22 +1284,18 @@ std::wstring IDNToUnicode(const char* host, if (component_end == string16::npos) component_end = input16.length(); // For getting the last component. size_t component_length = component_end - component_start; - - size_t output_component_start = out16.length(); + size_t new_component_start = out16.length(); bool converted_idn = false; if (component_end > component_start) { // Add the substring that we just found. converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, component_length, languages, &out16); } - size_t output_component_length = out16.length() - output_component_start; + size_t new_component_length = out16.length() - new_component_start; - if ((output_offset != std::wstring::npos) && - (*offset_for_adjustment > component_start)) { - if ((*offset_for_adjustment < component_end) && converted_idn) - output_offset = std::wstring::npos; - else - output_offset += output_component_length - component_length; + if (converted_idn && offsets_for_adjustment) { + adjustments.push_back(AdjustOffset::Adjustment( + component_start, component_length, new_component_length)); } // Need to add the dot we just found (if we found one). @@ -1232,10 +1303,28 @@ std::wstring IDNToUnicode(const char* host, out16.push_back('.'); } - if (offset_for_adjustment) - *offset_for_adjustment = output_offset; + // Make offset adjustment. + if (offsets_for_adjustment && !adjustments.empty()) { + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + AdjustOffset(adjustments)); + } - return UTF16ToWideAndAdjustOffset(out16, offset_for_adjustment); + return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment); +} + +std::wstring IDNToUnicode(const char* host, + size_t host_len, + const std::wstring& languages, + size_t* offset_for_adjustment) { + std::vector<size_t> offsets; + if (offset_for_adjustment) + offsets.push_back(*offset_for_adjustment); + std::wstring result = + IDNToUnicodeWithOffsets(host, host_len, languages, &offsets); + if (offset_for_adjustment) + *offset_for_adjustment = offsets[0]; + return result; } std::string CanonicalizeHost(const std::string& host, @@ -1648,51 +1737,73 @@ std::string GetHostOrSpecFromURL(const GURL& url) { return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); } -void AppendFormattedHost(const GURL& url, - const std::wstring& languages, - std::wstring* output, - url_parse::Parsed* new_parsed, - size_t* offset_for_adjustment) { +void AppendFormattedHostWithOffsets( + const GURL& url, + const std::wstring& languages, + std::wstring* output, + url_parse::Parsed* new_parsed, + std::vector<size_t>* offsets_for_adjustment) { DCHECK(output); const url_parse::Component& host = url.parsed_for_possibly_invalid_spec().host; if (host.is_nonempty()) { // Handle possible IDN in the host name. - int new_host_begin = static_cast<int>(output->length()); + size_t host_begin = output->length(); if (new_parsed) - new_parsed->host.begin = new_host_begin; - size_t offset_past_current_output = - (!offset_for_adjustment || - (*offset_for_adjustment == std::wstring::npos) || - (*offset_for_adjustment < output->length())) ? - std::wstring::npos : (*offset_for_adjustment - output->length()); - size_t* offset_into_host = - (offset_past_current_output >= static_cast<size_t>(host.len)) ? - NULL : &offset_past_current_output; + new_parsed->host.begin = static_cast<int>(host_begin); + size_t old_host_len = static_cast<size_t>(host.len); + + // Compose a list of offsets within the host area. + std::vector<size_t> offsets_into_host = + OffsetsIntoSection(offsets_for_adjustment, host_begin); const std::string& spec = url.possibly_invalid_spec(); DCHECK(host.begin >= 0 && ((spec.length() == 0 && host.begin == 0) || host.begin < static_cast<int>(spec.length()))); - output->append(IDNToUnicode(&spec[host.begin], - static_cast<size_t>(host.len), languages, offset_into_host)); + output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len, + languages, &offsets_into_host)); - int new_host_len = static_cast<int>(output->length()) - new_host_begin; + size_t new_host_len = output->length() - host_begin; if (new_parsed) - new_parsed->host.len = new_host_len; - if (offset_into_host) { - *offset_for_adjustment = (*offset_into_host == std::wstring::npos) ? - std::wstring::npos : (new_host_begin + *offset_into_host); - } else if (offset_past_current_output != std::wstring::npos) { - *offset_for_adjustment += new_host_len - host.len; - } + new_parsed->host.len = static_cast<int>(new_host_len); + + // Apply offset adjustments. + ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment, + old_host_len, new_host_len, host_begin); } else if (new_parsed) { new_parsed->host.reset(); } } +void AppendFormattedHost(const GURL& url, + const std::wstring& languages, + std::wstring* output, + url_parse::Parsed* new_parsed, + size_t* offset_for_adjustment) { + std::vector<size_t> offsets; + if (offset_for_adjustment) + offsets.push_back(*offset_for_adjustment); + AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets); + if (offset_for_adjustment) + *offset_for_adjustment = offsets[0]; +} + // TODO(viettrungluu): convert the wstring |FormatUrlInternal()|. +string16 FormatUrlWithOffsets(const GURL& url, + const std::string& languages, + FormatUrlTypes format_types, + UnescapeRule::Type unescape_rules, + url_parse::Parsed* new_parsed, + size_t* prefix_end, + std::vector<size_t>* offsets_for_adjustment) { + return WideToUTF16Hack( + FormatUrlInternal(url, ASCIIToWide(languages), format_types, + unescape_rules, new_parsed, prefix_end, + offsets_for_adjustment)); +} + string16 FormatUrl(const GURL& url, const std::string& languages, FormatUrlTypes format_types, @@ -1700,10 +1811,15 @@ string16 FormatUrl(const GURL& url, url_parse::Parsed* new_parsed, size_t* prefix_end, size_t* offset_for_adjustment) { - return WideToUTF16Hack( + std::vector<size_t> offsets; + if (offset_for_adjustment) + offsets.push_back(*offset_for_adjustment); + string16 result = WideToUTF16Hack( FormatUrlInternal(url, ASCIIToWide(languages), format_types, - unescape_rules, new_parsed, prefix_end, - offset_for_adjustment)); + unescape_rules, new_parsed, prefix_end, &offsets)); + if (offset_for_adjustment) + *offset_for_adjustment = offsets[0]; + return result; } bool CanStripTrailingSlash(const GURL& url) { @@ -2156,4 +2272,12 @@ NetworkInterface::NetworkInterface(const std::string& name, NetworkInterface::~NetworkInterface() { } +ClampComponentOffset::ClampComponentOffset(size_t component_start) + : component_start(component_start) {} + +size_t ClampComponentOffset::operator()(size_t offset) { + return (offset >= component_start) ? + offset : std::wstring::npos; +} + } // namespace net diff --git a/net/base/net_util.h b/net/base/net_util.h index bae27c3..0ff3369 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -198,15 +198,21 @@ std::string GetFileNameFromCD(const std::string& header, // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). // When |languages| is empty, even that mixing is not allowed. // -// |offset_for_adjustment| is an offset into |host|, which will be adjusted to -// point at the same logical place in the output string. If this isn't possible -// because it points past the end of |host| or into the middle of a punycode -// sequence, it will be set to std::wstring::npos. |offset_for_adjustment| may -// be NULL. +// (|offset[s]_for_adjustment|) specifies one or more offsets into the original +// |url|'s spec(); each offset will be adjusted to point at the same logical +// place in the result strings during decoding. If this isn't possible because +// an offset points past the end of |host| or into the middle of a punycode +// sequence, the offending offset will be set to std::wstring::npos. +// |offset[s]_for_adjustment| may be NULL. std::wstring IDNToUnicode(const char* host, size_t host_len, const std::wstring& languages, size_t* offset_for_adjustment); +std::wstring IDNToUnicodeWithOffsets( + const char* host, + size_t host_len, + const std::wstring& languages, + std::vector<size_t>* offsets_for_adjustment); // Canonicalizes |host| and returns it. Also fills |host_info| with // IP address information. |host_info| must not be NULL. @@ -292,11 +298,24 @@ int SetNonBlocking(int fd); // the user. The given parsed structure will be updated. The host name formatter // also takes the same accept languages component as ElideURL. |new_parsed| may // be null. +// +// (|offset[s]_for_adjustment|) specifies one or more offsets into the original +// |url|'s spec(); each offset will be adjusted to point at the same logical +// place in the result strings after reformatting of the host. If this isn't +// possible because an offset points past the end of the host or into the middle +// of a multi-character sequence, the offending offset will be set to +// std::wstring::npos. |offset[s]_for_adjustment| may be NULL. void AppendFormattedHost(const GURL& url, const std::wstring& languages, std::wstring* output, url_parse::Parsed* new_parsed, size_t* offset_for_adjustment); +void AppendFormattedHostWithOffsets( + const GURL& url, + const std::wstring& languages, + std::wstring* output, + url_parse::Parsed* new_parsed, + std::vector<size_t>* offsets_for_adjustment); // Creates a string representation of |url|. The IDN host name may be in Unicode // if |languages| accepts the Unicode representation. |format_type| is a bitmask @@ -309,12 +328,13 @@ void AppendFormattedHost(const GURL& url, // The last three parameters may be NULL. // |new_parsed| will be set to the parsing parameters of the resultant URL. // |prefix_end| will be the length before the hostname of the resultant URL. -// |offset_for_adjustment| is an offset into the original |url|'s spec(), which -// will be modified to reflect changes this function makes to the output string; -// for example, if |url| is "http://a:b@c.com/", |omit_username_password| is -// true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return -// the output string will be "http://c.com/" and |offset_for_adjustment| will be -// 8. If the offset cannot be successfully adjusted (e.g. because it points +// +// (|offset[s]_for_adjustment|) specifies one or more offsets into the original +// |url|'s spec(); each offset will be modified to reflect changes this function +// makes to the output string. For example, if |url| is "http://a:b@c.com/", +// |omit_username_password| is true, and an offset is 12 (the offset of '.'), +// then on return the output string will be "http://c.com/" and the offset will +// be 8. If an offset cannot be successfully adjusted (e.g. because it points // into the middle of a component that was entirely removed, past the end of the // string, or into the middle of an encoding sequence), it will be set to // string16::npos. @@ -325,6 +345,13 @@ string16 FormatUrl(const GURL& url, url_parse::Parsed* new_parsed, size_t* prefix_end, size_t* offset_for_adjustment); +string16 FormatUrlWithOffsets(const GURL& url, + const std::string& languages, + FormatUrlTypes format_types, + UnescapeRule::Type unescape_rules, + url_parse::Parsed* new_parsed, + size_t* prefix_end, + std::vector<size_t>* offsets_for_adjustment); // This is a convenience function for FormatUrl() with // format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical @@ -454,6 +481,16 @@ typedef std::list<NetworkInterface> NetworkInterfaceList; // Can be called only on a thread that allows IO. bool GetNetworkList(NetworkInterfaceList* networks); +// Private adjustment function called by std::transform which sets the offset +// to npos if the offset occurs at or before |component_start|, otherwise don't +// alter the offset. Exposed here for unit testing. +struct ClampComponentOffset { + explicit ClampComponentOffset(size_t component_start); + size_t operator()(size_t offset); + + const size_t component_start; +}; + } // namespace net #endif // NET_BASE_NET_UTIL_H_ diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index b547f83..4265866 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -4,6 +4,8 @@ #include "net/base/net_util.h" +#include <algorithm> + #include "base/file_path.h" #include "base/format_macros.h" #include "base/string_number_conversions.h" @@ -21,6 +23,8 @@ namespace net { namespace { +static const size_t kNpos = string16::npos; + struct FileCase { const wchar_t* file; const char* url; @@ -988,6 +992,20 @@ TEST(NetUtilTest, IDNToUnicodeAdjustOffset) { &offset); EXPECT_EQ(adjust_cases[i].output_offset, offset); } + + std::vector<size_t> offsets; + for (size_t i = 0; i < 40; ++i) + offsets.push_back(i); + IDNToUnicodeWithOffsets("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39, + L"zh-CN", &offsets); + size_t expected[] = {0, 1, 2, 3, 4, 5, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 8, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15, + 16, kNpos}; + ASSERT_EQ(40U, arraysize(expected)); + for (size_t i = 0; i < 40; ++i) + EXPECT_EQ(expected[i], offsets[i]); } TEST(NetUtilTest, CompliantHost) { @@ -1799,13 +1817,24 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { EXPECT_EQ(basic_cases[i].output_offset, offset); } + size_t url_size = 26; + std::vector<size_t> offsets; + for (size_t i = 0; i < url_size + 1; ++i) + offsets.push_back(i); + FormatUrlWithOffsets(GURL("http://www.google.com/foo/"), "en", + kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, + NULL, NULL, &offsets); + for (size_t i = 0; i < url_size; ++i) + EXPECT_EQ(i, offsets[i]); + EXPECT_EQ(kNpos, offsets[url_size]); + const struct { const char* input_url; size_t input_offset; size_t output_offset; } omit_auth_cases[] = { {"http://foo:bar@www.google.com/", 6, 6}, - {"http://foo:bar@www.google.com/", 7, 7}, + {"http://foo:bar@www.google.com/", 7, string16::npos}, {"http://foo:bar@www.google.com/", 8, string16::npos}, {"http://foo:bar@www.google.com/", 10, string16::npos}, {"http://foo:bar@www.google.com/", 11, string16::npos}, @@ -1823,13 +1852,28 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { EXPECT_EQ(omit_auth_cases[i].output_offset, offset); } + url_size = 30; + offsets.clear(); + for (size_t i = 0; i < url_size; ++i) + offsets.push_back(i); + FormatUrlWithOffsets(GURL("http://foo:bar@www.google.com/"), "en", + kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, + NULL, NULL, &offsets); + for (size_t i = 0; i < 7; ++i) + EXPECT_EQ(i, offsets[i]); + for (size_t i = 7; i < 15; ++i) + EXPECT_EQ(kNpos, offsets[i]); + for (size_t i = 16; i < url_size; ++i) + EXPECT_EQ(i - 8 , offsets[i]); + const AdjustOffsetCase view_source_cases[] = { {0, 0}, {3, 3}, {11, 11}, {12, 12}, {13, 13}, - {19, 19}, + {18, 18}, + {19, string16::npos}, {20, string16::npos}, {23, 19}, {26, 22}, @@ -1843,6 +1887,20 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { EXPECT_EQ(view_source_cases[i].output_offset, offset); } + url_size = 38; + offsets.clear(); + for (size_t i = 0; i < url_size; ++i) + offsets.push_back(i); + FormatUrlWithOffsets(GURL("view-source:http://foo@www.google.com/"), "en", + kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, + NULL, NULL, &offsets); + size_t expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, kNpos, kNpos, kNpos, kNpos, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; + ASSERT_EQ(url_size, arraysize(expected)); + for (size_t i = 0; i < url_size; ++i) + EXPECT_EQ(expected[i], offsets[i]); + const AdjustOffsetCase idn_hostname_cases[] = { {8, string16::npos}, {16, string16::npos}, @@ -1859,6 +1917,21 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { EXPECT_EQ(idn_hostname_cases[i].output_offset, offset); } + url_size = 33; + offsets.clear(); + for (size_t i = 0; i < url_size; ++i) + offsets.push_back(i); + FormatUrlWithOffsets(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja", + kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, + NULL, NULL, &offsets); + size_t expected_1[] = {0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15, 16, + 17, 18, 19}; + ASSERT_EQ(url_size, arraysize(expected_1)); + for (size_t i = 0; i < url_size; ++i) + EXPECT_EQ(expected_1[i], offsets[i]); + const AdjustOffsetCase unescape_cases[] = { {25, 25}, {26, string16::npos}, @@ -1881,11 +1954,31 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { EXPECT_EQ(unescape_cases[i].output_offset, offset); } + url_size = 68; + offsets.clear(); + for (size_t i = 0; i < url_size; ++i) + offsets.push_back(i); + FormatUrlWithOffsets(GURL( + "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"), + "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL, NULL, + &offsets); + size_t expected_2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, kNpos, kNpos, + 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, 33, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos}; + ASSERT_EQ(url_size, arraysize(expected_2)); + for (size_t i = 0; i < url_size; ++i) + EXPECT_EQ(expected_2[i], offsets[i]); + const AdjustOffsetCase ref_cases[] = { {30, 30}, {31, 31}, {32, string16::npos}, {34, 32}, + {35, string16::npos}, {37, 33}, {38, string16::npos}, }; @@ -1899,6 +1992,22 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { EXPECT_EQ(ref_cases[i].output_offset, offset); } + url_size = 38; + offsets.clear(); + for (size_t i = 0; i < url_size; ++i) + offsets.push_back(i); + // "http://www.google.com/foo.html#\x30B0\x30B0z" + FormatUrlWithOffsets(GURL( + "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en", + kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL, + &offsets); + size_t expected_3[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, kNpos, kNpos, 32, kNpos, kNpos, 33}; + ASSERT_EQ(url_size, arraysize(expected_3)); + for (size_t i = 0; i < url_size; ++i) + EXPECT_EQ(expected_3[i], offsets[i]); + const AdjustOffsetCase omit_http_cases[] = { {0, string16::npos}, {3, string16::npos}, @@ -1912,6 +2021,18 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { EXPECT_EQ(omit_http_cases[i].output_offset, offset); } + url_size = 23; + offsets.clear(); + for (size_t i = 0; i < url_size; ++i) + offsets.push_back(i); + FormatUrlWithOffsets(GURL("http://www.google.com"), "en", + kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets); + size_t expected_4[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, kNpos}; + ASSERT_EQ(url_size, arraysize(expected_4)); + for (size_t i = 0; i < url_size; ++i) + EXPECT_EQ(expected_4[i], offsets[i]); + const AdjustOffsetCase omit_http_start_with_ftp[] = { {0, 0}, {3, 3}, @@ -1924,6 +2045,18 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { EXPECT_EQ(omit_http_start_with_ftp[i].output_offset, offset); } + url_size = 23; + offsets.clear(); + for (size_t i = 0; i < url_size; ++i) + offsets.push_back(i); + FormatUrlWithOffsets(GURL("http://ftp.google.com"), "en", + kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets); + size_t expected_5[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, kNpos}; + ASSERT_EQ(url_size, arraysize(expected_5)); + for (size_t i = 0; i < url_size; ++i) + EXPECT_EQ(expected_5[i], offsets[i]); + const AdjustOffsetCase omit_all_cases[] = { {12, 0}, {13, 1}, @@ -1936,6 +2069,19 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { UnescapeRule::NORMAL, NULL, NULL, &offset); EXPECT_EQ(omit_all_cases[i].output_offset, offset); } + + url_size = 21; + offsets.clear(); + for (size_t i = 0; i < url_size; ++i) + offsets.push_back(i); + FormatUrlWithOffsets(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll, + UnescapeRule::NORMAL, NULL, NULL, &offsets); + size_t expected_6[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, + kNpos}; + ASSERT_EQ(url_size, arraysize(expected_6)); + for (size_t i = 0; i < url_size; ++i) + EXPECT_EQ(expected_6[i], offsets[i]); } TEST(NetUtilTest, SimplifyUrlForRequest) { @@ -2221,4 +2367,20 @@ TEST(NetUtilTest, GetNetworkList) { } } +TEST(NetUtilTest, AdjustComponentOffset) { + std::vector<size_t> old_offsets; + for (size_t i = 0; i < 10; ++i) + old_offsets.push_back(i); + std::vector<size_t> new_offsets; + std::transform(old_offsets.begin(), + old_offsets.end(), + std::back_inserter(new_offsets), + ClampComponentOffset(5)); + size_t expected_1[] = {kNpos, kNpos, kNpos, kNpos, kNpos, 5, 6, 7, 8, 9}; + EXPECT_EQ(new_offsets.size(), arraysize(expected_1)); + EXPECT_EQ(new_offsets.size(), old_offsets.size()); + for (size_t i = 0; i < arraysize(expected_1); ++i) + EXPECT_EQ(expected_1[i], new_offsets[i]); +} + } // namespace net |