summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authormrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-04-13 00:45:39 +0000
committermrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-04-13 00:45:39 +0000
commita47f8eadd67f75d3b663fdcc898caabb335bad0b (patch)
treefdf872770d4cd58ee753f219475850490a008f6d /net
parent2e0e8253a232fa499d22e47753c5bbadaebd69e7 (diff)
downloadchromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.zip
chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.gz
chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.bz2
Add multiple-offset versions of the various URL reformatting functions. Fixed a couple of erroneous unit tests of offsets into username/password.
Note: This does not complete the work required for 78153 -- tis but the first 2/3rds. BUG=78153 TEST=Many unit tests updated and added. Review URL: http://codereview.chromium.org/6822038 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@81343 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r--net/base/escape.cc117
-rw-r--r--net/base/escape.h31
-rw-r--r--net/base/escape_unittest.cc39
-rw-r--r--net/base/net_util.cc426
-rw-r--r--net/base/net_util.h59
-rw-r--r--net/base/net_util_unittest.cc166
6 files changed, 636 insertions, 202 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 64bd107..61c3e81 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -2,11 +2,12 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include <algorithm>
-
#include "net/base/escape.h"
+#include <algorithm>
+
#include "base/logging.h"
+#include "base/scoped_ptr.h"
#include "base/string_piece.h"
#include "base/string_util.h"
#include "base/utf_string_conversions.h"
@@ -98,15 +99,14 @@ const char kUrlUnescape[128] = {
};
template<typename STR>
-STR UnescapeURLImpl(const STR& escaped_text,
- UnescapeRule::Type rules,
- size_t* offset_for_adjustment) {
- size_t offset_temp = string16::npos;
- if (!offset_for_adjustment)
- offset_for_adjustment = &offset_temp;
- else if (*offset_for_adjustment >= escaped_text.length())
- *offset_for_adjustment = string16::npos;
-
+STR UnescapeURLWithOffsetsImpl(const STR& escaped_text,
+ UnescapeRule::Type rules,
+ std::vector<size_t>* offsets_for_adjustment) {
+ if (offsets_for_adjustment) {
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ LimitOffset<std::wstring>(escaped_text.length()));
+ }
// Do not unescape anything, return the |escaped_text| text.
if (rules == UnescapeRule::NONE)
return escaped_text;
@@ -117,6 +117,7 @@ STR UnescapeURLImpl(const STR& escaped_text,
STR result;
result.reserve(escaped_text.length());
+ AdjustEncodingOffset::Adjustments adjustments; // Locations of adjusted text.
for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {
if (static_cast<unsigned char>(escaped_text[i]) >= 128) {
// Non ASCII character, append as is.
@@ -144,17 +145,9 @@ STR UnescapeURLImpl(const STR& escaped_text,
// Additionally allow control characters if requested.
(value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
// Use the unescaped version of the character.
- size_t length_before_append = result.length();
+ adjustments.push_back(i);
result.push_back(value);
i += 2;
-
- // Adjust offset to match length change.
- if (*offset_for_adjustment != std::string::npos) {
- if (*offset_for_adjustment > (length_before_append + 2))
- *offset_for_adjustment -= 2;
- else if (*offset_for_adjustment > length_before_append)
- *offset_for_adjustment = std::string::npos;
- }
} else {
// Keep escaped. Append a percent and we'll get the following two
// digits on the next loops through.
@@ -174,6 +167,26 @@ STR UnescapeURLImpl(const STR& escaped_text,
}
}
+ // Make offset adjustment.
+ if (offsets_for_adjustment && !adjustments.empty()) {
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ AdjustEncodingOffset(adjustments));
+ }
+
+ return result;
+}
+
+template<typename STR>
+STR UnescapeURLImpl(const STR& escaped_text,
+ UnescapeRule::Type rules,
+ size_t* offset_for_adjustment) {
+ std::vector<size_t> offsets;
+ if (offset_for_adjustment)
+ offsets.push_back(*offset_for_adjustment);
+ STR result = UnescapeURLWithOffsetsImpl(escaped_text, rules, &offsets);
+ if (offset_for_adjustment)
+ *offset_for_adjustment = offsets[0];
return result;
}
@@ -234,33 +247,49 @@ std::string EscapeExternalHandlerValue(const std::string& text) {
return Escape(text, kExternalHandlerCharmap, false);
}
-string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,
- UnescapeRule::Type rules,
- size_t* offset_for_adjustment) {
+string16 UnescapeAndDecodeUTF8URLComponentWithOffsets(
+ const std::string& text,
+ UnescapeRule::Type rules,
+ std::vector<size_t>* offsets_for_adjustment) {
std::wstring result;
- size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0;
+ std::vector<size_t> original_offsets;
+ if (offsets_for_adjustment)
+ original_offsets = *offsets_for_adjustment;
std::string unescaped_url(
- UnescapeURLImpl(text, rules, offset_for_adjustment));
- if (UTF8ToWideAndAdjustOffset(unescaped_url.data(), unescaped_url.length(),
- &result, offset_for_adjustment))
+ UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment));
+ if (UTF8ToWideAndAdjustOffsets(unescaped_url.data(), unescaped_url.length(),
+ &result, offsets_for_adjustment))
return WideToUTF16Hack(result); // Character set looks like it's valid.
// Not valid. Return the escaped version. Undo our changes to
// |offset_for_adjustment| since we haven't changed the string after all.
+ if (offsets_for_adjustment)
+ *offsets_for_adjustment = original_offsets;
+ return WideToUTF16Hack(UTF8ToWideAndAdjustOffsets(
+ text, offsets_for_adjustment));
+}
+
+string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,
+ UnescapeRule::Type rules,
+ size_t* offset_for_adjustment) {
+ std::vector<size_t> offsets;
+ if (offset_for_adjustment)
+ offsets.push_back(*offset_for_adjustment);
+ string16 result =
+ UnescapeAndDecodeUTF8URLComponentWithOffsets(text, rules, &offsets);
if (offset_for_adjustment)
- *offset_for_adjustment = original_offset;
- return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text,
- offset_for_adjustment));
+ *offset_for_adjustment = offsets[0];
+ return result;
}
std::string UnescapeURLComponent(const std::string& escaped_text,
UnescapeRule::Type rules) {
- return UnescapeURLImpl(escaped_text, rules, NULL);
+ return UnescapeURLWithOffsetsImpl<std::string>(escaped_text, rules, NULL);
}
string16 UnescapeURLComponent(const string16& escaped_text,
UnescapeRule::Type rules) {
- return UnescapeURLImpl(escaped_text, rules, NULL);
+ return UnescapeURLWithOffsetsImpl<string16>(escaped_text, rules, NULL);
}
@@ -350,3 +379,27 @@ string16 UnescapeForHTML(const string16& input) {
}
return text;
}
+
+AdjustEncodingOffset::AdjustEncodingOffset(const Adjustments& adjustments)
+ : adjustments(adjustments) {}
+
+void AdjustEncodingOffset::operator()(size_t& offset) {
+ // For each encoded character occurring before an offset subtract 2.
+ if (offset == string16::npos)
+ return;
+ size_t adjusted_offset = offset;
+ for (Adjustments::const_iterator i = adjustments.begin();
+ i != adjustments.end(); ++i) {
+ size_t location = *i;
+ if (offset <= location) {
+ offset = adjusted_offset;
+ return;
+ }
+ if (offset <= (location + 2)) {
+ offset = string16::npos;
+ return;
+ }
+ adjusted_offset -= 2;
+ }
+ offset = adjusted_offset;
+}
diff --git a/net/base/escape.h b/net/base/escape.h
index faa7bd3..f4c99a3 100644
--- a/net/base/escape.h
+++ b/net/base/escape.h
@@ -7,6 +7,7 @@
#pragma once
#include <string>
+#include <vector>
#include "base/basictypes.h"
#include "base/string16.h"
@@ -99,15 +100,20 @@ string16 UnescapeURLComponent(const string16& escaped_text,
// Unescapes the given substring as a URL, and then tries to interpret the
// result as being encoded as UTF-8. If the result is convertable into UTF-8, it
// will be returned as converted. If it is not, the original escaped string will
-// be converted into a string16 and returned.
-//
-// |offset_for_adjustment| may be NULL; if not, it is an offset into |text| that
-// will be adjusted to point at the same logical place in the result string. If
-// this isn't possible because it points into the middle of an escape sequence
-// or past the end of the string, it will be set to string16::npos.
+// be converted into a string16 and returned. (|offset[s]_for_adjustment|)
+// specifies one or more offsets into the source strings; each offset will be
+// adjusted to point at the same logical place in the result strings during
+// decoding. If this isn't possible because an offset points past the end of
+// the source strings or into the middle of a multibyte sequence, the offending
+// offset will be set to std::wstring::npos. |offset[s]_for_adjustment| may be
+// NULL.
string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,
UnescapeRule::Type rules,
size_t* offset_for_adjustment);
+string16 UnescapeAndDecodeUTF8URLComponentWithOffsets(
+ const std::string& text,
+ UnescapeRule::Type rules,
+ std::vector<size_t>* offsets_for_adjustment);
// Unescape the following ampersand character codes from |text|:
// &lt; &gt; &amp; &quot; &#39;
@@ -129,4 +135,17 @@ bool EscapeQueryParamValue(const string16& text, const char* codepage,
// assumes the codepage is UTF8. This is provided as a convenience.
string16 EscapeQueryParamValueUTF8(const string16& text, bool use_plus);
+// Private Functions (Exposed for Unit Testing) --------------------------------
+
+// A function called by std::for_each that will adjust any offset which occurs
+// after one or more encoded characters.
+struct AdjustEncodingOffset {
+ typedef std::vector<size_t> Adjustments;
+
+ explicit AdjustEncodingOffset(const Adjustments& adjustments);
+ void operator()(size_t& offset);
+
+ const Adjustments& adjustments;
+};
+
#endif // NET_BASE_ESCAPE_H_
diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc
index 60d4ae3..3a8d895 100644
--- a/net/base/escape_unittest.cc
+++ b/net/base/escape_unittest.cc
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include <algorithm>
#include <string>
#include "net/base/escape.h"
@@ -15,6 +16,8 @@
namespace {
+static const size_t kNpos = string16::npos;
+
struct EscapeCase {
const wchar_t* input;
const wchar_t* output;
@@ -396,3 +399,39 @@ TEST(EscapeTest, UnescapeForHTML) {
EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result);
}
}
+
+TEST(EscapeTest, AdjustEncodingOffset) {
+ // Imagine we have strings as shown in the following cases where the
+ // %XX's represent encoded characters
+
+ // 1: abc%ECdef ==> abcXdef
+ std::vector<size_t> offsets;
+ for (size_t t = 0; t < 9; ++t)
+ offsets.push_back(t);
+ AdjustEncodingOffset::Adjustments adjustments;
+ adjustments.push_back(3);
+ std::for_each(offsets.begin(), offsets.end(),
+ AdjustEncodingOffset(adjustments));
+ size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
+ EXPECT_EQ(offsets.size(), arraysize(expected_1));
+ for (size_t i = 0; i < arraysize(expected_1); ++i)
+ EXPECT_EQ(expected_1[i], offsets[i]);
+
+
+ // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX
+ offsets.clear();
+ for (size_t t = 0; t < 18; ++t)
+ offsets.push_back(t);
+ adjustments.clear();
+ adjustments.push_back(0);
+ adjustments.push_back(6);
+ adjustments.push_back(9);
+ adjustments.push_back(15);
+ std::for_each(offsets.begin(), offsets.end(),
+ AdjustEncodingOffset(adjustments));
+ size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos,
+ kNpos, 6, 7, 8, 9, kNpos, kNpos};
+ EXPECT_EQ(offsets.size(), arraysize(expected_2));
+ for (size_t i = 0; i < arraysize(expected_2); ++i)
+ EXPECT_EQ(expected_2[i], offsets[i]);
+}
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 1aaa98b..378ac7b 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -67,7 +67,6 @@
#endif
#include "unicode/datefmt.h"
-
using base::Time;
namespace net {
@@ -734,6 +733,80 @@ bool IDNToUnicodeOneComponent(const char16* comp,
return false;
}
+struct SubtractFromOffset {
+ explicit SubtractFromOffset(size_t amount)
+ : amount(amount) {}
+ void operator()(size_t& offset) {
+ if (offset != std::wstring::npos)
+ if (offset >= amount)
+ offset -= amount;
+ else
+ offset = std::wstring::npos;
+ }
+
+ size_t amount;
+};
+
+struct AddToOffset {
+ explicit AddToOffset(size_t amount)
+ : amount(amount) {}
+ void operator()(size_t& offset) {
+ if (offset != std::wstring::npos)
+ offset += amount;
+ }
+
+ size_t amount;
+};
+
+std::vector<size_t> OffsetsIntoSection(
+ std::vector<size_t>* offsets_for_adjustment,
+ size_t section_begin) {
+ std::vector<size_t> offsets_into_section;
+ if (offsets_for_adjustment) {
+ std::transform(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ std::back_inserter(offsets_into_section),
+ ClampComponentOffset(section_begin));
+ std::for_each(offsets_into_section.begin(), offsets_into_section.end(),
+ SubtractFromOffset(section_begin));
+ }
+ return offsets_into_section;
+}
+
+void ApplySectionAdjustments(const std::vector<size_t>& offsets_into_section,
+ std::vector<size_t>* offsets_for_adjustment,
+ size_t old_section_len,
+ size_t new_section_len,
+ size_t section_begin) {
+ if (offsets_for_adjustment) {
+ DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size());
+ std::vector<size_t>::const_iterator host_offsets_iter =
+ offsets_into_section.begin();
+ for (std::vector<size_t>::iterator offsets_iter =
+ offsets_for_adjustment->begin();
+ offsets_iter != offsets_for_adjustment->end();
+ ++offsets_iter, ++host_offsets_iter) {
+ size_t offset = *offsets_iter;
+ if (offset == std::wstring::npos || offset < section_begin) {
+ // The offset is before the host section so leave it as is.
+ continue;
+ }
+ if (offset >= section_begin + old_section_len) {
+ // The offset is after the host section so adjust by host length delta.
+ offset += new_section_len - old_section_len;
+ } else if (*host_offsets_iter != std::wstring::npos) {
+ // The offset is within the host and valid so adjust by the host
+ // reformatting offsets results.
+ offset = section_begin + *host_offsets_iter;
+ } else {
+ // The offset is invalid.
+ offset = std::wstring::npos;
+ }
+ *offsets_iter = offset;
+ }
+ }
+}
+
// If |component| is valid, its begin is incremented by |delta|.
void AdjustComponent(int delta, url_parse::Component* component) {
if (!component->is_valid())
@@ -760,7 +833,7 @@ std::wstring FormatUrlInternal(const GURL& url,
UnescapeRule::Type unescape_rules,
url_parse::Parsed* new_parsed,
size_t* prefix_end,
- size_t* offset_for_adjustment);
+ std::vector<size_t>* offsets_for_adjustment);
// Helper for FormatUrl()/FormatUrlInternal().
std::wstring FormatViewSourceUrl(const GURL& url,
@@ -769,18 +842,20 @@ std::wstring FormatViewSourceUrl(const GURL& url,
UnescapeRule::Type unescape_rules,
url_parse::Parsed* new_parsed,
size_t* prefix_end,
- size_t* offset_for_adjustment) {
+ std::vector<size_t>* offsets_for_adjustment) {
DCHECK(new_parsed);
+ DCHECK(offsets_for_adjustment);
const wchar_t* const kWideViewSource = L"view-source:";
const size_t kViewSourceLengthPlus1 = 12;
+ std::vector<size_t> saved_offsets(*offsets_for_adjustment);
GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1));
- size_t temp_offset = (*offset_for_adjustment == std::wstring::npos) ?
- std::wstring::npos : (*offset_for_adjustment - kViewSourceLengthPlus1);
- size_t* temp_offset_ptr = (*offset_for_adjustment < kViewSourceLengthPlus1) ?
- NULL : &temp_offset;
+ // Clamp the offsets to the source area.
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ SubtractFromOffset(kViewSourceLengthPlus1));
std::wstring result = FormatUrlInternal(real_url, languages, format_types,
- unescape_rules, new_parsed, prefix_end, temp_offset_ptr);
+ unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
result.insert(0, kWideViewSource);
// Adjust position values.
@@ -794,57 +869,61 @@ std::wstring FormatViewSourceUrl(const GURL& url,
AdjustComponents(kViewSourceLengthPlus1, new_parsed);
if (prefix_end)
*prefix_end += kViewSourceLengthPlus1;
- if (temp_offset_ptr) {
- *offset_for_adjustment = (temp_offset == std::wstring::npos) ?
- std::wstring::npos : (temp_offset + kViewSourceLengthPlus1);
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ AddToOffset(kViewSourceLengthPlus1));
+ // Restore all offsets which were not affected by FormatUrlInternal.
+ DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size());
+ for (size_t i = 0; i < saved_offsets.size(); ++i) {
+ if (saved_offsets[i] < kViewSourceLengthPlus1)
+ (*offsets_for_adjustment)[i] = saved_offsets[i];
}
return result;
}
// Appends the substring |in_component| inside of the URL |spec| to |output|,
// and the resulting range will be filled into |out_component|. |unescape_rules|
-// defines how to clean the URL for human readability. |offset_for_adjustment|
-// is an offset into |output| which will be adjusted based on how it maps to the
-// component being converted; if it is less than output->length(), it will be
-// untouched, and if it is greater than output->length() + in_component.len it
-// will be shortened by the difference in lengths between the input and output
-// components. Otherwise it points into the component being converted, and is
-// adjusted to point to the same logical place in |output|.
-// |offset_for_adjustment| may not be NULL.
+// defines how to clean the URL for human readability. |offsets_for_adjustment|
+// is an array of offsets into |output| each of which will be adjusted based on
+// how it maps to the component being converted; if it is less than
+// output->length(), it will be untouched, and if it is greater than
+// output->length() + in_component.len it will be adjusted by the difference in
+// lengths between the input and output components. Otherwise it points into
+// the component being converted, and is adjusted to point to the same logical
+// place in |output|. |offsets_for_adjustment| may not be NULL.
void AppendFormattedComponent(const std::string& spec,
const url_parse::Component& in_component,
UnescapeRule::Type unescape_rules,
std::wstring* output,
url_parse::Component* out_component,
- size_t* offset_for_adjustment) {
+ std::vector<size_t>* offsets_for_adjustment) {
DCHECK(output);
- DCHECK(offset_for_adjustment);
+ DCHECK(offsets_for_adjustment);
if (in_component.is_nonempty()) {
- out_component->begin = static_cast<int>(output->length());
- size_t offset_past_current_output =
- ((*offset_for_adjustment == std::wstring::npos) ||
- (*offset_for_adjustment < output->length())) ?
- std::wstring::npos : (*offset_for_adjustment - output->length());
- size_t* offset_into_component =
- (offset_past_current_output >= static_cast<size_t>(in_component.len)) ?
- NULL : &offset_past_current_output;
+ size_t component_begin = output->length();
+ out_component->begin = static_cast<int>(component_begin);
+
+ // Compose a list of offsets within the component area.
+ std::vector<size_t> offsets_into_component =
+ OffsetsIntoSection(offsets_for_adjustment, component_begin);
+
if (unescape_rules == UnescapeRule::NONE) {
- output->append(UTF8ToWideAndAdjustOffset(
+ output->append(UTF8ToWideAndAdjustOffsets(
spec.substr(in_component.begin, in_component.len),
- offset_into_component));
+ &offsets_into_component));
} else {
- output->append(UTF16ToWideHack(UnescapeAndDecodeUTF8URLComponent(
- spec.substr(in_component.begin, in_component.len), unescape_rules,
- offset_into_component)));
- }
- out_component->len =
- static_cast<int>(output->length()) - out_component->begin;
- if (offset_into_component) {
- *offset_for_adjustment = (*offset_into_component == std::wstring::npos) ?
- std::wstring::npos : (out_component->begin + *offset_into_component);
- } else if (offset_past_current_output != std::wstring::npos) {
- *offset_for_adjustment += out_component->len - in_component.len;
+ output->append(UTF16ToWideHack(
+ UnescapeAndDecodeUTF8URLComponentWithOffsets(
+ spec.substr(in_component.begin, in_component.len), unescape_rules,
+ &offsets_into_component)));
}
+ size_t new_component_len = output->length() - component_begin;
+ out_component->len = static_cast<int>(new_component_len);
+
+ // Apply offset adjustments.
+ size_t old_component_len = static_cast<size_t>(in_component.len);
+ ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment,
+ old_component_len, new_component_len, component_begin);
} else {
out_component->reset();
}
@@ -858,15 +937,16 @@ std::wstring FormatUrlInternal(const GURL& url,
UnescapeRule::Type unescape_rules,
url_parse::Parsed* new_parsed,
size_t* prefix_end,
- size_t* offset_for_adjustment) {
+ std::vector<size_t>* offsets_for_adjustment) {
url_parse::Parsed parsed_temp;
if (!new_parsed)
new_parsed = &parsed_temp;
else
*new_parsed = url_parse::Parsed();
- size_t offset_temp = std::wstring::npos;
- if (!offset_for_adjustment)
- offset_for_adjustment = &offset_temp;
+
+ std::vector<size_t> offsets_temp;
+ if (!offsets_for_adjustment)
+ offsets_for_adjustment = &offsets_temp;
std::wstring url_string;
@@ -874,7 +954,9 @@ std::wstring FormatUrlInternal(const GURL& url,
if (url.is_empty()) {
if (prefix_end)
*prefix_end = 0;
- *offset_for_adjustment = std::wstring::npos;
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ LimitOffset<std::wstring>(0));
return url_string;
}
@@ -886,15 +968,17 @@ std::wstring FormatUrlInternal(const GURL& url,
if (url.SchemeIs(kViewSource) &&
!StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {
return FormatViewSourceUrl(url, languages, format_types,
- unescape_rules, new_parsed, prefix_end, offset_for_adjustment);
+ unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
}
// We handle both valid and invalid URLs (this will give us the spec
// regardless of validity).
const std::string& spec = url.possibly_invalid_spec();
const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
- if (*offset_for_adjustment >= spec.length())
- *offset_for_adjustment = std::wstring::npos;
+ size_t spec_length = spec.length();
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ LimitOffset<std::wstring>(spec_length));
// Copy everything before the username (the scheme and the separators.)
// These are ASCII.
@@ -922,48 +1006,47 @@ std::wstring FormatUrlInternal(const GURL& url,
// e.g. "http://google.com:search@evil.ru/"
new_parsed->username.reset();
new_parsed->password.reset();
- if ((*offset_for_adjustment != std::wstring::npos) &&
+ // Update the offsets based on removed username and/or password.
+ if (!offsets_for_adjustment->empty() &&
(parsed.username.is_nonempty() || parsed.password.is_nonempty())) {
+ AdjustOffset::Adjustments adjustments;
if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {
// The seeming off-by-one and off-by-two in these first two lines are to
// account for the ':' after the username and '@' after the password.
- if (*offset_for_adjustment >
- static_cast<size_t>(parsed.password.end())) {
- *offset_for_adjustment -=
- (parsed.username.len + parsed.password.len + 2);
- } else if (*offset_for_adjustment >
- static_cast<size_t>(parsed.username.begin)) {
- *offset_for_adjustment = std::wstring::npos;
- }
+ adjustments.push_back(AdjustOffset::Adjustment(
+ static_cast<size_t>(parsed.username.begin),
+ static_cast<size_t>(parsed.username.len + parsed.password.len +
+ 2), 0));
} else {
const url_parse::Component* nonempty_component =
parsed.username.is_nonempty() ? &parsed.username : &parsed.password;
- // The seeming off-by-one in these first two lines is to account for the
- // '@' after the username/password.
- if (*offset_for_adjustment >
- static_cast<size_t>(nonempty_component->end())) {
- *offset_for_adjustment -= (nonempty_component->len + 1);
- } else if (*offset_for_adjustment >
- static_cast<size_t>(nonempty_component->begin)) {
- *offset_for_adjustment = std::wstring::npos;
- }
+ // The seeming off-by-one in below is to account for the '@' after the
+ // username/password.
+ adjustments.push_back(AdjustOffset::Adjustment(
+ static_cast<size_t>(nonempty_component->begin),
+ static_cast<size_t>(nonempty_component->len + 1), 0));
}
+
+ // Make offset adjustment.
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ AdjustOffset(adjustments));
}
} else {
AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,
- &new_parsed->username, offset_for_adjustment);
+ &new_parsed->username, offsets_for_adjustment);
if (parsed.password.is_valid())
url_string.push_back(':');
AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,
- &new_parsed->password, offset_for_adjustment);
+ &new_parsed->password, offsets_for_adjustment);
if (parsed.username.is_valid() || parsed.password.is_valid())
url_string.push_back('@');
}
if (prefix_end)
*prefix_end = static_cast<size_t>(url_string.length());
- AppendFormattedHost(url, languages, &url_string, new_parsed,
- offset_for_adjustment);
+ AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed,
+ offsets_for_adjustment);
// Port.
if (parsed.port.is_nonempty()) {
@@ -981,41 +1064,35 @@ std::wstring FormatUrlInternal(const GURL& url,
if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) ||
!CanStripTrailingSlash(url)) {
AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,
- &new_parsed->path, offset_for_adjustment);
+ &new_parsed->path, offsets_for_adjustment);
}
if (parsed.query.is_valid())
url_string.push_back('?');
AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,
- &new_parsed->query, offset_for_adjustment);
+ &new_parsed->query, offsets_for_adjustment);
// Reference is stored in valid, unescaped UTF-8, so we can just convert.
if (parsed.ref.is_valid()) {
url_string.push_back('#');
- new_parsed->ref.begin = url_string.length();
- size_t offset_past_current_output =
- ((*offset_for_adjustment == std::wstring::npos) ||
- (*offset_for_adjustment < url_string.length())) ?
- std::wstring::npos : (*offset_for_adjustment - url_string.length());
- size_t* offset_into_ref =
- (offset_past_current_output >= static_cast<size_t>(parsed.ref.len)) ?
- NULL : &offset_past_current_output;
+ size_t ref_begin = url_string.length();
+ new_parsed->ref.begin = static_cast<int>(ref_begin);
+
+ // Compose a list of offsets within the section.
+ std::vector<size_t> offsets_into_ref =
+ OffsetsIntoSection(offsets_for_adjustment, ref_begin);
+
if (parsed.ref.len > 0) {
- url_string.append(UTF8ToWideAndAdjustOffset(spec.substr(parsed.ref.begin,
- parsed.ref.len),
- offset_into_ref));
- }
- new_parsed->ref.len = url_string.length() - new_parsed->ref.begin;
- if (offset_into_ref) {
- *offset_for_adjustment = (*offset_into_ref == std::wstring::npos) ?
- std::wstring::npos : (new_parsed->ref.begin + *offset_into_ref);
- } else if (offset_past_current_output != std::wstring::npos) {
- // We clamped the offset near the beginning of this function to ensure it
- // was within the input URL. If we reach here, the input was something
- // invalid and non-parseable such that the offset was past any component
- // we could figure out. In this case it won't be represented in the
- // output string, so reset it.
- *offset_for_adjustment = std::wstring::npos;
+ url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin,
+ parsed.ref.len),
+ &offsets_into_ref));
}
+ size_t old_ref_len = static_cast<size_t>(parsed.ref.len);
+ size_t new_ref_len = url_string.length() - new_parsed->ref.begin;
+ new_parsed->ref.len = static_cast<int>(new_ref_len);
+
+ // Apply offset adjustments.
+ ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment,
+ old_ref_len, new_ref_len, ref_begin);
}
// If we need to strip out http do it after the fact. This way we don't need
@@ -1023,12 +1100,11 @@ std::wstring FormatUrlInternal(const GURL& url,
const size_t kHTTPSize = arraysize(kHTTP) - 1;
if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) {
url_string = url_string.substr(kHTTPSize);
- if (*offset_for_adjustment != std::wstring::npos) {
- if (*offset_for_adjustment < kHTTPSize)
- *offset_for_adjustment = std::wstring::npos;
- else
- *offset_for_adjustment -= kHTTPSize;
- }
+ AdjustOffset::Adjustments adjustments;
+ adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0));
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ AdjustOffset(adjustments));
if (prefix_end)
*prefix_end -= kHTTPSize;
@@ -1186,21 +1262,20 @@ std::string GetHeaderParamValue(const std::string& field,
//
// We may want to skip this step in the case of file URLs to allow unicode
// UNC hostnames regardless of encodings.
-std::wstring IDNToUnicode(const char* host,
- size_t host_len,
- const std::wstring& languages,
- size_t* offset_for_adjustment) {
+std::wstring IDNToUnicodeWithOffsets(
+ const char* host,
+ size_t host_len,
+ const std::wstring& languages,
+ std::vector<size_t>* offsets_for_adjustment) {
// Convert the ASCII input to a wide string for ICU.
string16 input16;
input16.reserve(host_len);
input16.insert(input16.end(), host, host + host_len);
- string16 out16;
- size_t output_offset = offset_for_adjustment ?
- *offset_for_adjustment : std::wstring::npos;
-
// Do each component of the host separately, since we enforce script matching
// on a per-component basis.
+ AdjustOffset::Adjustments adjustments;
+ string16 out16;
for (size_t component_start = 0, component_end;
component_start < input16.length();
component_start = component_end + 1) {
@@ -1209,22 +1284,18 @@ std::wstring IDNToUnicode(const char* host,
if (component_end == string16::npos)
component_end = input16.length(); // For getting the last component.
size_t component_length = component_end - component_start;
-
- size_t output_component_start = out16.length();
+ size_t new_component_start = out16.length();
bool converted_idn = false;
if (component_end > component_start) {
// Add the substring that we just found.
converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,
component_length, languages, &out16);
}
- size_t output_component_length = out16.length() - output_component_start;
+ size_t new_component_length = out16.length() - new_component_start;
- if ((output_offset != std::wstring::npos) &&
- (*offset_for_adjustment > component_start)) {
- if ((*offset_for_adjustment < component_end) && converted_idn)
- output_offset = std::wstring::npos;
- else
- output_offset += output_component_length - component_length;
+ if (converted_idn && offsets_for_adjustment) {
+ adjustments.push_back(AdjustOffset::Adjustment(
+ component_start, component_length, new_component_length));
}
// Need to add the dot we just found (if we found one).
@@ -1232,10 +1303,28 @@ std::wstring IDNToUnicode(const char* host,
out16.push_back('.');
}
- if (offset_for_adjustment)
- *offset_for_adjustment = output_offset;
+ // Make offset adjustment.
+ if (offsets_for_adjustment && !adjustments.empty()) {
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ AdjustOffset(adjustments));
+ }
- return UTF16ToWideAndAdjustOffset(out16, offset_for_adjustment);
+ return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment);
+}
+
+std::wstring IDNToUnicode(const char* host,
+ size_t host_len,
+ const std::wstring& languages,
+ size_t* offset_for_adjustment) {
+ std::vector<size_t> offsets;
+ if (offset_for_adjustment)
+ offsets.push_back(*offset_for_adjustment);
+ std::wstring result =
+ IDNToUnicodeWithOffsets(host, host_len, languages, &offsets);
+ if (offset_for_adjustment)
+ *offset_for_adjustment = offsets[0];
+ return result;
}
std::string CanonicalizeHost(const std::string& host,
@@ -1648,51 +1737,73 @@ std::string GetHostOrSpecFromURL(const GURL& url) {
return url.has_host() ? TrimEndingDot(url.host()) : url.spec();
}
-void AppendFormattedHost(const GURL& url,
- const std::wstring& languages,
- std::wstring* output,
- url_parse::Parsed* new_parsed,
- size_t* offset_for_adjustment) {
+void AppendFormattedHostWithOffsets(
+ const GURL& url,
+ const std::wstring& languages,
+ std::wstring* output,
+ url_parse::Parsed* new_parsed,
+ std::vector<size_t>* offsets_for_adjustment) {
DCHECK(output);
const url_parse::Component& host =
url.parsed_for_possibly_invalid_spec().host;
if (host.is_nonempty()) {
// Handle possible IDN in the host name.
- int new_host_begin = static_cast<int>(output->length());
+ size_t host_begin = output->length();
if (new_parsed)
- new_parsed->host.begin = new_host_begin;
- size_t offset_past_current_output =
- (!offset_for_adjustment ||
- (*offset_for_adjustment == std::wstring::npos) ||
- (*offset_for_adjustment < output->length())) ?
- std::wstring::npos : (*offset_for_adjustment - output->length());
- size_t* offset_into_host =
- (offset_past_current_output >= static_cast<size_t>(host.len)) ?
- NULL : &offset_past_current_output;
+ new_parsed->host.begin = static_cast<int>(host_begin);
+ size_t old_host_len = static_cast<size_t>(host.len);
+
+ // Compose a list of offsets within the host area.
+ std::vector<size_t> offsets_into_host =
+ OffsetsIntoSection(offsets_for_adjustment, host_begin);
const std::string& spec = url.possibly_invalid_spec();
DCHECK(host.begin >= 0 &&
((spec.length() == 0 && host.begin == 0) ||
host.begin < static_cast<int>(spec.length())));
- output->append(IDNToUnicode(&spec[host.begin],
- static_cast<size_t>(host.len), languages, offset_into_host));
+ output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len,
+ languages, &offsets_into_host));
- int new_host_len = static_cast<int>(output->length()) - new_host_begin;
+ size_t new_host_len = output->length() - host_begin;
if (new_parsed)
- new_parsed->host.len = new_host_len;
- if (offset_into_host) {
- *offset_for_adjustment = (*offset_into_host == std::wstring::npos) ?
- std::wstring::npos : (new_host_begin + *offset_into_host);
- } else if (offset_past_current_output != std::wstring::npos) {
- *offset_for_adjustment += new_host_len - host.len;
- }
+ new_parsed->host.len = static_cast<int>(new_host_len);
+
+ // Apply offset adjustments.
+ ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment,
+ old_host_len, new_host_len, host_begin);
} else if (new_parsed) {
new_parsed->host.reset();
}
}
+void AppendFormattedHost(const GURL& url,
+ const std::wstring& languages,
+ std::wstring* output,
+ url_parse::Parsed* new_parsed,
+ size_t* offset_for_adjustment) {
+ std::vector<size_t> offsets;
+ if (offset_for_adjustment)
+ offsets.push_back(*offset_for_adjustment);
+ AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets);
+ if (offset_for_adjustment)
+ *offset_for_adjustment = offsets[0];
+}
+
// TODO(viettrungluu): convert the wstring |FormatUrlInternal()|.
+string16 FormatUrlWithOffsets(const GURL& url,
+ const std::string& languages,
+ FormatUrlTypes format_types,
+ UnescapeRule::Type unescape_rules,
+ url_parse::Parsed* new_parsed,
+ size_t* prefix_end,
+ std::vector<size_t>* offsets_for_adjustment) {
+ return WideToUTF16Hack(
+ FormatUrlInternal(url, ASCIIToWide(languages), format_types,
+ unescape_rules, new_parsed, prefix_end,
+ offsets_for_adjustment));
+}
+
string16 FormatUrl(const GURL& url,
const std::string& languages,
FormatUrlTypes format_types,
@@ -1700,10 +1811,15 @@ string16 FormatUrl(const GURL& url,
url_parse::Parsed* new_parsed,
size_t* prefix_end,
size_t* offset_for_adjustment) {
- return WideToUTF16Hack(
+ std::vector<size_t> offsets;
+ if (offset_for_adjustment)
+ offsets.push_back(*offset_for_adjustment);
+ string16 result = WideToUTF16Hack(
FormatUrlInternal(url, ASCIIToWide(languages), format_types,
- unescape_rules, new_parsed, prefix_end,
- offset_for_adjustment));
+ unescape_rules, new_parsed, prefix_end, &offsets));
+ if (offset_for_adjustment)
+ *offset_for_adjustment = offsets[0];
+ return result;
}
bool CanStripTrailingSlash(const GURL& url) {
@@ -2156,4 +2272,12 @@ NetworkInterface::NetworkInterface(const std::string& name,
NetworkInterface::~NetworkInterface() {
}
+ClampComponentOffset::ClampComponentOffset(size_t component_start)
+ : component_start(component_start) {}
+
+size_t ClampComponentOffset::operator()(size_t offset) {
+ return (offset >= component_start) ?
+ offset : std::wstring::npos;
+}
+
} // namespace net
diff --git a/net/base/net_util.h b/net/base/net_util.h
index bae27c3..0ff3369 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -198,15 +198,21 @@ std::string GetFileNameFromCD(const std::string& header,
// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
// When |languages| is empty, even that mixing is not allowed.
//
-// |offset_for_adjustment| is an offset into |host|, which will be adjusted to
-// point at the same logical place in the output string. If this isn't possible
-// because it points past the end of |host| or into the middle of a punycode
-// sequence, it will be set to std::wstring::npos. |offset_for_adjustment| may
-// be NULL.
+// (|offset[s]_for_adjustment|) specifies one or more offsets into the original
+// |url|'s spec(); each offset will be adjusted to point at the same logical
+// place in the result strings during decoding. If this isn't possible because
+// an offset points past the end of |host| or into the middle of a punycode
+// sequence, the offending offset will be set to std::wstring::npos.
+// |offset[s]_for_adjustment| may be NULL.
std::wstring IDNToUnicode(const char* host,
size_t host_len,
const std::wstring& languages,
size_t* offset_for_adjustment);
+std::wstring IDNToUnicodeWithOffsets(
+ const char* host,
+ size_t host_len,
+ const std::wstring& languages,
+ std::vector<size_t>* offsets_for_adjustment);
// Canonicalizes |host| and returns it. Also fills |host_info| with
// IP address information. |host_info| must not be NULL.
@@ -292,11 +298,24 @@ int SetNonBlocking(int fd);
// the user. The given parsed structure will be updated. The host name formatter
// also takes the same accept languages component as ElideURL. |new_parsed| may
// be null.
+//
+// (|offset[s]_for_adjustment|) specifies one or more offsets into the original
+// |url|'s spec(); each offset will be adjusted to point at the same logical
+// place in the result strings after reformatting of the host. If this isn't
+// possible because an offset points past the end of the host or into the middle
+// of a multi-character sequence, the offending offset will be set to
+// std::wstring::npos. |offset[s]_for_adjustment| may be NULL.
void AppendFormattedHost(const GURL& url,
const std::wstring& languages,
std::wstring* output,
url_parse::Parsed* new_parsed,
size_t* offset_for_adjustment);
+void AppendFormattedHostWithOffsets(
+ const GURL& url,
+ const std::wstring& languages,
+ std::wstring* output,
+ url_parse::Parsed* new_parsed,
+ std::vector<size_t>* offsets_for_adjustment);
// Creates a string representation of |url|. The IDN host name may be in Unicode
// if |languages| accepts the Unicode representation. |format_type| is a bitmask
@@ -309,12 +328,13 @@ void AppendFormattedHost(const GURL& url,
// The last three parameters may be NULL.
// |new_parsed| will be set to the parsing parameters of the resultant URL.
// |prefix_end| will be the length before the hostname of the resultant URL.
-// |offset_for_adjustment| is an offset into the original |url|'s spec(), which
-// will be modified to reflect changes this function makes to the output string;
-// for example, if |url| is "http://a:b@c.com/", |omit_username_password| is
-// true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return
-// the output string will be "http://c.com/" and |offset_for_adjustment| will be
-// 8. If the offset cannot be successfully adjusted (e.g. because it points
+//
+// (|offset[s]_for_adjustment|) specifies one or more offsets into the original
+// |url|'s spec(); each offset will be modified to reflect changes this function
+// makes to the output string. For example, if |url| is "http://a:b@c.com/",
+// |omit_username_password| is true, and an offset is 12 (the offset of '.'),
+// then on return the output string will be "http://c.com/" and the offset will
+// be 8. If an offset cannot be successfully adjusted (e.g. because it points
// into the middle of a component that was entirely removed, past the end of the
// string, or into the middle of an encoding sequence), it will be set to
// string16::npos.
@@ -325,6 +345,13 @@ string16 FormatUrl(const GURL& url,
url_parse::Parsed* new_parsed,
size_t* prefix_end,
size_t* offset_for_adjustment);
+string16 FormatUrlWithOffsets(const GURL& url,
+ const std::string& languages,
+ FormatUrlTypes format_types,
+ UnescapeRule::Type unescape_rules,
+ url_parse::Parsed* new_parsed,
+ size_t* prefix_end,
+ std::vector<size_t>* offsets_for_adjustment);
// This is a convenience function for FormatUrl() with
// format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical
@@ -454,6 +481,16 @@ typedef std::list<NetworkInterface> NetworkInterfaceList;
// Can be called only on a thread that allows IO.
bool GetNetworkList(NetworkInterfaceList* networks);
+// Private adjustment function called by std::transform which sets the offset
+// to npos if the offset occurs at or before |component_start|, otherwise don't
+// alter the offset. Exposed here for unit testing.
+struct ClampComponentOffset {
+ explicit ClampComponentOffset(size_t component_start);
+ size_t operator()(size_t offset);
+
+ const size_t component_start;
+};
+
} // namespace net
#endif // NET_BASE_NET_UTIL_H_
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index b547f83..4265866 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -4,6 +4,8 @@
#include "net/base/net_util.h"
+#include <algorithm>
+
#include "base/file_path.h"
#include "base/format_macros.h"
#include "base/string_number_conversions.h"
@@ -21,6 +23,8 @@ namespace net {
namespace {
+static const size_t kNpos = string16::npos;
+
struct FileCase {
const wchar_t* file;
const char* url;
@@ -988,6 +992,20 @@ TEST(NetUtilTest, IDNToUnicodeAdjustOffset) {
&offset);
EXPECT_EQ(adjust_cases[i].output_offset, offset);
}
+
+ std::vector<size_t> offsets;
+ for (size_t i = 0; i < 40; ++i)
+ offsets.push_back(i);
+ IDNToUnicodeWithOffsets("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39,
+ L"zh-CN", &offsets);
+ size_t expected[] = {0, 1, 2, 3, 4, 5, kNpos, kNpos, kNpos, kNpos, kNpos,
+ kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 8, kNpos,
+ kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
+ kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15,
+ 16, kNpos};
+ ASSERT_EQ(40U, arraysize(expected));
+ for (size_t i = 0; i < 40; ++i)
+ EXPECT_EQ(expected[i], offsets[i]);
}
TEST(NetUtilTest, CompliantHost) {
@@ -1799,13 +1817,24 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
EXPECT_EQ(basic_cases[i].output_offset, offset);
}
+ size_t url_size = 26;
+ std::vector<size_t> offsets;
+ for (size_t i = 0; i < url_size + 1; ++i)
+ offsets.push_back(i);
+ FormatUrlWithOffsets(GURL("http://www.google.com/foo/"), "en",
+ kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
+ NULL, NULL, &offsets);
+ for (size_t i = 0; i < url_size; ++i)
+ EXPECT_EQ(i, offsets[i]);
+ EXPECT_EQ(kNpos, offsets[url_size]);
+
const struct {
const char* input_url;
size_t input_offset;
size_t output_offset;
} omit_auth_cases[] = {
{"http://foo:bar@www.google.com/", 6, 6},
- {"http://foo:bar@www.google.com/", 7, 7},
+ {"http://foo:bar@www.google.com/", 7, string16::npos},
{"http://foo:bar@www.google.com/", 8, string16::npos},
{"http://foo:bar@www.google.com/", 10, string16::npos},
{"http://foo:bar@www.google.com/", 11, string16::npos},
@@ -1823,13 +1852,28 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
EXPECT_EQ(omit_auth_cases[i].output_offset, offset);
}
+ url_size = 30;
+ offsets.clear();
+ for (size_t i = 0; i < url_size; ++i)
+ offsets.push_back(i);
+ FormatUrlWithOffsets(GURL("http://foo:bar@www.google.com/"), "en",
+ kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
+ NULL, NULL, &offsets);
+ for (size_t i = 0; i < 7; ++i)
+ EXPECT_EQ(i, offsets[i]);
+ for (size_t i = 7; i < 15; ++i)
+ EXPECT_EQ(kNpos, offsets[i]);
+ for (size_t i = 16; i < url_size; ++i)
+ EXPECT_EQ(i - 8 , offsets[i]);
+
const AdjustOffsetCase view_source_cases[] = {
{0, 0},
{3, 3},
{11, 11},
{12, 12},
{13, 13},
- {19, 19},
+ {18, 18},
+ {19, string16::npos},
{20, string16::npos},
{23, 19},
{26, 22},
@@ -1843,6 +1887,20 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
EXPECT_EQ(view_source_cases[i].output_offset, offset);
}
+ url_size = 38;
+ offsets.clear();
+ for (size_t i = 0; i < url_size; ++i)
+ offsets.push_back(i);
+ FormatUrlWithOffsets(GURL("view-source:http://foo@www.google.com/"), "en",
+ kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
+ NULL, NULL, &offsets);
+ size_t expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, kNpos, kNpos, kNpos, kNpos, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
+ ASSERT_EQ(url_size, arraysize(expected));
+ for (size_t i = 0; i < url_size; ++i)
+ EXPECT_EQ(expected[i], offsets[i]);
+
const AdjustOffsetCase idn_hostname_cases[] = {
{8, string16::npos},
{16, string16::npos},
@@ -1859,6 +1917,21 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
EXPECT_EQ(idn_hostname_cases[i].output_offset, offset);
}
+ url_size = 33;
+ offsets.clear();
+ for (size_t i = 0; i < url_size; ++i)
+ offsets.push_back(i);
+ FormatUrlWithOffsets(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja",
+ kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
+ NULL, NULL, &offsets);
+ size_t expected_1[] = {0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos,
+ kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
+ kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15, 16,
+ 17, 18, 19};
+ ASSERT_EQ(url_size, arraysize(expected_1));
+ for (size_t i = 0; i < url_size; ++i)
+ EXPECT_EQ(expected_1[i], offsets[i]);
+
const AdjustOffsetCase unescape_cases[] = {
{25, 25},
{26, string16::npos},
@@ -1881,11 +1954,31 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
EXPECT_EQ(unescape_cases[i].output_offset, offset);
}
+ url_size = 68;
+ offsets.clear();
+ for (size_t i = 0; i < url_size; ++i)
+ offsets.push_back(i);
+ FormatUrlWithOffsets(GURL(
+ "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"),
+ "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL, NULL,
+ &offsets);
+ size_t expected_2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, kNpos, kNpos,
+ 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, kNpos, kNpos,
+ kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos,
+ kNpos, kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos,
+ kNpos, kNpos, kNpos, kNpos, kNpos, 33, kNpos, kNpos,
+ kNpos, kNpos, kNpos, kNpos, kNpos, kNpos};
+ ASSERT_EQ(url_size, arraysize(expected_2));
+ for (size_t i = 0; i < url_size; ++i)
+ EXPECT_EQ(expected_2[i], offsets[i]);
+
const AdjustOffsetCase ref_cases[] = {
{30, 30},
{31, 31},
{32, string16::npos},
{34, 32},
+ {35, string16::npos},
{37, 33},
{38, string16::npos},
};
@@ -1899,6 +1992,22 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
EXPECT_EQ(ref_cases[i].output_offset, offset);
}
+ url_size = 38;
+ offsets.clear();
+ for (size_t i = 0; i < url_size; ++i)
+ offsets.push_back(i);
+ // "http://www.google.com/foo.html#\x30B0\x30B0z"
+ FormatUrlWithOffsets(GURL(
+ "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en",
+ kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL,
+ &offsets);
+ size_t expected_3[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, kNpos, kNpos, 32, kNpos, kNpos, 33};
+ ASSERT_EQ(url_size, arraysize(expected_3));
+ for (size_t i = 0; i < url_size; ++i)
+ EXPECT_EQ(expected_3[i], offsets[i]);
+
const AdjustOffsetCase omit_http_cases[] = {
{0, string16::npos},
{3, string16::npos},
@@ -1912,6 +2021,18 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
EXPECT_EQ(omit_http_cases[i].output_offset, offset);
}
+ url_size = 23;
+ offsets.clear();
+ for (size_t i = 0; i < url_size; ++i)
+ offsets.push_back(i);
+ FormatUrlWithOffsets(GURL("http://www.google.com"), "en",
+ kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets);
+ size_t expected_4[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, kNpos};
+ ASSERT_EQ(url_size, arraysize(expected_4));
+ for (size_t i = 0; i < url_size; ++i)
+ EXPECT_EQ(expected_4[i], offsets[i]);
+
const AdjustOffsetCase omit_http_start_with_ftp[] = {
{0, 0},
{3, 3},
@@ -1924,6 +2045,18 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
EXPECT_EQ(omit_http_start_with_ftp[i].output_offset, offset);
}
+ url_size = 23;
+ offsets.clear();
+ for (size_t i = 0; i < url_size; ++i)
+ offsets.push_back(i);
+ FormatUrlWithOffsets(GURL("http://ftp.google.com"), "en",
+ kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets);
+ size_t expected_5[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, kNpos};
+ ASSERT_EQ(url_size, arraysize(expected_5));
+ for (size_t i = 0; i < url_size; ++i)
+ EXPECT_EQ(expected_5[i], offsets[i]);
+
const AdjustOffsetCase omit_all_cases[] = {
{12, 0},
{13, 1},
@@ -1936,6 +2069,19 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
UnescapeRule::NORMAL, NULL, NULL, &offset);
EXPECT_EQ(omit_all_cases[i].output_offset, offset);
}
+
+ url_size = 21;
+ offsets.clear();
+ for (size_t i = 0; i < url_size; ++i)
+ offsets.push_back(i);
+ FormatUrlWithOffsets(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll,
+ UnescapeRule::NORMAL, NULL, NULL, &offsets);
+ size_t expected_6[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
+ kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7,
+ kNpos};
+ ASSERT_EQ(url_size, arraysize(expected_6));
+ for (size_t i = 0; i < url_size; ++i)
+ EXPECT_EQ(expected_6[i], offsets[i]);
}
TEST(NetUtilTest, SimplifyUrlForRequest) {
@@ -2221,4 +2367,20 @@ TEST(NetUtilTest, GetNetworkList) {
}
}
+TEST(NetUtilTest, AdjustComponentOffset) {
+ std::vector<size_t> old_offsets;
+ for (size_t i = 0; i < 10; ++i)
+ old_offsets.push_back(i);
+ std::vector<size_t> new_offsets;
+ std::transform(old_offsets.begin(),
+ old_offsets.end(),
+ std::back_inserter(new_offsets),
+ ClampComponentOffset(5));
+ size_t expected_1[] = {kNpos, kNpos, kNpos, kNpos, kNpos, 5, 6, 7, 8, 9};
+ EXPECT_EQ(new_offsets.size(), arraysize(expected_1));
+ EXPECT_EQ(new_offsets.size(), old_offsets.size());
+ for (size_t i = 0; i < arraysize(expected_1); ++i)
+ EXPECT_EQ(expected_1[i], new_offsets[i]);
+}
+
} // namespace net