summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/base/escape.cc42
-rw-r--r--net/base/escape.h24
-rw-r--r--net/base/escape_unittest.cc125
-rw-r--r--net/base/net_util.cc355
-rw-r--r--net/base/net_util.h59
-rw-r--r--net/base/net_util_unittest.cc183
6 files changed, 550 insertions, 238 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 3d2aca2..5196eb6 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -107,7 +107,14 @@ const char kUrlUnescape[128] = {
};
std::string UnescapeURLImpl(const std::string& escaped_text,
- UnescapeRule::Type rules) {
+ UnescapeRule::Type rules,
+ size_t* offset_for_adjustment) {
+ size_t offset_temp = std::wstring::npos;
+ if (!offset_for_adjustment)
+ offset_for_adjustment = &offset_temp;
+ else if (*offset_for_adjustment >= escaped_text.length())
+ *offset_for_adjustment = std::wstring::npos;
+
// Do not unescape anything, return the |escaped_text| text.
if (rules == UnescapeRule::NONE)
return escaped_text;
@@ -136,8 +143,17 @@ std::string UnescapeURLImpl(const std::string& escaped_text,
// Additionally allow control characters if requested.
(value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
// Use the unescaped version of the character.
+ size_t length_before_append = result.length();
result.push_back(value);
i += 2;
+
+ // Adjust offset to match length change.
+ if (*offset_for_adjustment != std::string::npos) {
+ if (*offset_for_adjustment > (length_before_append + 2))
+ *offset_for_adjustment -= 2;
+ else if (*offset_for_adjustment > length_before_append)
+ *offset_for_adjustment = std::string::npos;
+ }
} else {
// Keep escaped. Append a percent and we'll get the following two
// digits on the next loops through.
@@ -231,19 +247,27 @@ bool EscapeQueryParamValue(const std::wstring& text, const char* codepage,
return true;
}
-std::wstring UnescapeAndDecodeURLComponent(const std::string& text,
- const char* codepage,
- UnescapeRule::Type rules) {
+std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text,
+ UnescapeRule::Type rules,
+ size_t* offset_for_adjustment) {
std::wstring result;
- if (base::CodepageToWide(UnescapeURLImpl(text, rules), codepage,
- base::OnStringConversionError::FAIL, &result))
+ size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0;
+ if (base::CodepageToWideAndAdjustOffset(
+ UnescapeURLImpl(text, rules, offset_for_adjustment),
+ "UTF-8", base::OnStringConversionError::FAIL, &result,
+ offset_for_adjustment))
return result; // Character set looks like it's valid.
- return UTF8ToWide(text); // Return the escaped version when it's not.
+
+ // Not valid. Return the escaped version. Undo our changes to
+ // |offset_for_adjustment| since we haven't changed the string after all.
+ if (offset_for_adjustment)
+ *offset_for_adjustment = original_offset;
+ return UTF8ToWideAndAdjustOffset(text, offset_for_adjustment);
}
std::string UnescapeURLComponent(const std::string& escaped_text,
UnescapeRule::Type rules) {
- return UnescapeURLImpl(escaped_text, rules);
+ return UnescapeURLImpl(escaped_text, rules, NULL);
}
template <class str>
diff --git a/net/base/escape.h b/net/base/escape.h
index 8761d4d..9ff17b6 100644
--- a/net/base/escape.h
+++ b/net/base/escape.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -93,17 +93,17 @@ std::string UnescapeURLComponent(const std::string& escaped_text,
UnescapeRule::Type rules);
// Unescapes the given substring as a URL, and then tries to interpret the
-// result as being encoded in the given code page. If the result is convertable
-// into the code page, it will be returned as converted. If it is not, the
-// original escaped string will be converted into a wide string and returned.
-std::wstring UnescapeAndDecodeURLComponent(const std::string& text,
- const char* codepage,
- UnescapeRule::Type rules);
-inline std::wstring UnescapeAndDecodeUTF8URLComponent(
- const std::string& text,
- UnescapeRule::Type rules) {
- return UnescapeAndDecodeURLComponent(text, "UTF-8", rules);
-}
+// result as being encoded as UTF-8. If the result is convertable into UTF-8, it
+// will be returned as converted. If it is not, the original escaped string will
+// be converted into a wide string and returned.
+//
+// |offset_for_adjustment| may be NULL; if not, it is an offset into |text| that
+// will be adjusted to point at the same logical place in the result string. If
+// this isn't possible because it points into the middle of an escape sequence
+// or past the end of the string, it will be set to std::wstring::npos.
+std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text,
+ UnescapeRule::Type rules,
+ size_t* offset_for_adjustment);
// Deprecated ------------------------------------------------------------------
diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc
index 44bb9972..8e5e7dc 100644
--- a/net/base/escape_unittest.cc
+++ b/net/base/escape_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -24,8 +24,7 @@ struct UnescapeURLCase {
const char* output;
};
-struct UnescapeAndDecodeURLCase {
- const char* encoding;
+struct UnescapeAndDecodeCase {
const char* input;
// The expected output when run through UnescapeURL.
@@ -38,6 +37,12 @@ struct UnescapeAndDecodeURLCase {
const wchar_t* decoded;
};
+struct AdjustOffsetCase {
+ const char* input;
+ size_t input_offset;
+ size_t output_offset;
+};
+
struct EscapeForHTMLCase {
const char* input;
const char* expected_output;
@@ -45,7 +50,7 @@ struct EscapeForHTMLCase {
} // namespace
-TEST(Escape, EscapeTextForFormSubmission) {
+TEST(EscapeTest, EscapeTextForFormSubmission) {
const EscapeCase escape_cases[] = {
{L"foo", L"foo"},
{L"foo bar", L"foo+bar"},
@@ -93,7 +98,7 @@ TEST(Escape, EscapeTextForFormSubmission) {
EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str));
}
-TEST(Escape, EscapePath) {
+TEST(EscapeTest, EscapePath) {
ASSERT_EQ(
// Most of the character space we care about, un-escaped
EscapePath(
@@ -108,7 +113,7 @@ TEST(Escape, EscapePath) {
"%7B%7C%7D~%7F%80%FF");
}
-TEST(Escape, EscapeUrlEncodedData) {
+TEST(EscapeTest, EscapeUrlEncodedData) {
ASSERT_EQ(
// Most of the character space we care about, un-escaped
EscapeUrlEncodedData(
@@ -123,7 +128,7 @@ TEST(Escape, EscapeUrlEncodedData) {
"%7B%7C%7D~%7F%80%FF");
}
-TEST(Escape, UnescapeURLComponent) {
+TEST(EscapeTest, UnescapeURLComponent) {
const UnescapeURLCase unescape_cases[] = {
{"", UnescapeRule::NORMAL, ""},
{"%2", UnescapeRule::NORMAL, "%2"},
@@ -184,40 +189,48 @@ TEST(Escape, UnescapeURLComponent) {
EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
}
-TEST(Escape, UnescapeAndDecodeURLComponent) {
- const UnescapeAndDecodeURLCase unescape_cases[] = {
- {"UTF8", "%", "%", "%", L"%"},
- {"UTF8", "+", "+", " ", L"+"},
- {"UTF8", "%2+", "%2+", "%2 ", L"%2+"},
- {"UTF8", "+%%%+%%%", "+%%%+%%%", " %%% %%%", L"+%%%+%%%"},
- {"UTF8", "Don't escape anything",
- "Don't escape anything",
- "Don't escape anything",
- L"Don't escape anything"},
- {"UTF8", "+Invalid %escape %2+",
- "+Invalid %escape %2+",
- " Invalid %escape %2 ",
- L"+Invalid %escape %2+"},
- {"UTF8", "Some random text %25%3bOK",
- "Some random text %25;OK",
- "Some random text %25;OK",
- L"Some random text %25;OK"},
- {"UTF8", "%01%02%03%04%05%06%07%08%09",
- "%01%02%03%04%05%06%07%08%09",
- "%01%02%03%04%05%06%07%08%09",
- L"%01%02%03%04%05%06%07%08%09"},
- {"UTF8", "%E4%BD%A0+%E5%A5%BD",
- "\xE4\xBD\xA0+\xE5\xA5\xBD",
- "\xE4\xBD\xA0 \xE5\xA5\xBD",
- L"\x4f60+\x597d"},
- {"BIG5", "%A7A%A6n",
- "\xA7\x41\xA6n",
- "\xA7\x41\xA6n",
- L"\x4f60\x597d"},
- {"UTF8", "%ED%ED", // Invalid UTF-8.
- "\xED\xED",
- "\xED\xED",
- L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped.
+TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) {
+ const UnescapeAndDecodeCase unescape_cases[] = {
+ { "%",
+ "%",
+ "%",
+ L"%"},
+ { "+",
+ "+",
+ " ",
+ L"+"},
+ { "%2+",
+ "%2+",
+ "%2 ",
+ L"%2+"},
+ { "+%%%+%%%",
+ "+%%%+%%%",
+ " %%% %%%",
+ L"+%%%+%%%"},
+ { "Don't escape anything",
+ "Don't escape anything",
+ "Don't escape anything",
+ L"Don't escape anything"},
+ { "+Invalid %escape %2+",
+ "+Invalid %escape %2+",
+ " Invalid %escape %2 ",
+ L"+Invalid %escape %2+"},
+ { "Some random text %25%3BOK",
+ "Some random text %25;OK",
+ "Some random text %25;OK",
+ L"Some random text %25;OK"},
+ { "%01%02%03%04%05%06%07%08%09",
+ "%01%02%03%04%05%06%07%08%09",
+ "%01%02%03%04%05%06%07%08%09",
+ L"%01%02%03%04%05%06%07%08%09"},
+ { "%E4%BD%A0+%E5%A5%BD",
+ "\xE4\xBD\xA0+\xE5\xA5\xBD",
+ "\xE4\xBD\xA0 \xE5\xA5\xBD",
+ L"\x4f60+\x597d"},
+ { "%ED%ED", // Invalid UTF-8.
+ "\xED\xED",
+ "\xED\xED",
+ L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped.
};
for (size_t i = 0; i < arraysize(unescape_cases); i++) {
@@ -230,14 +243,36 @@ TEST(Escape, UnescapeAndDecodeURLComponent) {
EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped);
// TODO: Need to test unescape_spaces and unescape_percent.
- std::wstring decoded = UnescapeAndDecodeURLComponent(
- unescape_cases[i].input, unescape_cases[i].encoding,
- UnescapeRule::NORMAL);
+ std::wstring decoded = UnescapeAndDecodeUTF8URLComponent(
+ unescape_cases[i].input, UnescapeRule::NORMAL, NULL);
EXPECT_EQ(std::wstring(unescape_cases[i].decoded), decoded);
}
}
-TEST(Escape, EscapeForHTML) {
+TEST(EscapeTest, AdjustOffset) {
+ const AdjustOffsetCase adjust_cases[] = {
+ {"", 0, std::wstring::npos},
+ {"test", 0, 0},
+ {"test", 2, 2},
+ {"test", 4, std::wstring::npos},
+ {"test", std::wstring::npos, std::wstring::npos},
+ {"%3Btest", 6, 4},
+ {"%3Btest", 2, std::wstring::npos},
+ {"test%3B", 2, 2},
+ {"%E4%BD%A0+%E5%A5%BD", 9, 1},
+ {"%E4%BD%A0+%E5%A5%BD", 6, std::wstring::npos},
+ {"%ED%B0%80+%E5%A5%BD", 6, 6},
+ };
+
+ for (size_t i = 0; i < arraysize(adjust_cases); i++) {
+ size_t offset = adjust_cases[i].input_offset;
+ UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input,
+ UnescapeRule::NORMAL, &offset);
+ EXPECT_EQ(adjust_cases[i].output_offset, offset);
+ }
+}
+
+TEST(EscapeTest, EscapeForHTML) {
const EscapeForHTMLCase tests[] = {
{ "hello", "hello" },
{ "<hello>", "&lt;hello&gt;" },
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 85151e9..9171e54 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -650,60 +650,51 @@ bool IsIDNComponentSafe(const char16* str,
}
// Converts one component of a host (between dots) to IDN if safe. The result
-// will be APPENDED to the given output string and will be the same as the
-// input if it is not IDN or the IDN is unsafe to display.
-void IDNToUnicodeOneComponent(const char16* comp,
- int comp_len,
+// will be APPENDED to the given output string and will be the same as the input
+// if it is not IDN or the IDN is unsafe to display. Returns whether any
+// conversion was performed.
+bool IDNToUnicodeOneComponent(const char16* comp,
+ size_t comp_len,
const std::wstring& languages,
string16* out) {
- DCHECK(comp_len >= 0);
+ DCHECK(out);
if (comp_len == 0)
- return;
+ return false;
- // Expand the output string to make room for a possibly longer string
- // (we'll expand if it's still not big enough below).
- int extra_space = 64;
- size_t host_begin_in_output = out->size();
-
- // Just copy the input if it can't be an IDN component.
- if (comp_len < 4 ||
- comp[0] != 'x' || comp[1] != 'n' || comp[2] != '-' || comp[3] != '-') {
- out->resize(host_begin_in_output + comp_len);
- for (int i = 0; i < comp_len; i++)
- (*out)[host_begin_in_output + i] = comp[i];
- return;
- }
+ // Only transform if the input can be an IDN component.
+ static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'};
+ if ((comp_len > arraysize(kIdnPrefix)) &&
+ !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) {
+ // Repeatedly expand the output string until it's big enough. It looks like
+ // ICU will return the required size of the buffer, but that's not
+ // documented, so we'll just grow by 2x. This should be rare and is not on a
+ // critical path.
+ size_t original_length = out->length();
+ for (int extra_space = 64; ; extra_space *= 2) {
+ UErrorCode status = U_ZERO_ERROR;
+ out->resize(out->length() + extra_space);
+ int output_chars = uidna_IDNToUnicode(comp,
+ static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space,
+ UIDNA_DEFAULT, NULL, &status);
+ if (status == U_ZERO_ERROR) {
+ // Converted successfully.
+ out->resize(original_length + output_chars);
+ if (IsIDNComponentSafe(out->data() + original_length, output_chars,
+ languages))
+ return true;
+ }
- while (true) {
- UErrorCode status = U_ZERO_ERROR;
- out->resize(out->size() + extra_space);
- int output_chars =
- uidna_IDNToUnicode(comp, comp_len, &(*out)[host_begin_in_output],
- extra_space, UIDNA_DEFAULT, NULL, &status);
- if (status == U_ZERO_ERROR) {
- // Converted successfully.
- out->resize(host_begin_in_output + output_chars);
- if (!IsIDNComponentSafe(&out->data()[host_begin_in_output],
- output_chars,
- languages))
- break; // The error handling below will undo the IDN.
- return;
+ if (status != U_BUFFER_OVERFLOW_ERROR)
+ break;
}
- if (status != U_BUFFER_OVERFLOW_ERROR)
- break;
-
- // Need to loop again with a bigger buffer. It looks like ICU will
- // return the required size of the buffer, but that's not documented,
- // so we'll just grow by 2x. This should be rare and is not on a
- // critical path.
- extra_space *= 2;
+ // Failed, revert back to original string.
+ out->resize(original_length);
}
- // We get here on error, in which case we replace anything that was added
- // with the literal input.
- out->resize(host_begin_in_output + comp_len);
- for (int i = 0; i < comp_len; i++)
- (*out)[host_begin_in_output + i] = comp[i];
+ // We get here with no IDN or on error, in which case we just append the
+ // literal input.
+ out->append(comp, comp_len);
+ return false;
}
// Helper for FormatUrl().
@@ -712,19 +703,23 @@ std::wstring FormatViewSourceUrl(const GURL& url,
bool omit_username_password,
UnescapeRule::Type unescape_rules,
url_parse::Parsed* new_parsed,
- size_t* prefix_end) {
+ size_t* prefix_end,
+ size_t* offset_for_adjustment) {
DCHECK(new_parsed);
const wchar_t* const kWideViewSource = L"view-source:";
const size_t kViewSourceLengthPlus1 = 12;
GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1));
+ size_t temp_offset = (*offset_for_adjustment == std::wstring::npos) ?
+ std::wstring::npos : (*offset_for_adjustment - kViewSourceLengthPlus1);
+ size_t* temp_offset_ptr = (*offset_for_adjustment < kViewSourceLengthPlus1) ?
+ NULL : &temp_offset;
std::wstring result = net::FormatUrl(real_url, languages,
- omit_username_password, unescape_rules, new_parsed, prefix_end);
+ omit_username_password, unescape_rules, new_parsed, prefix_end,
+ temp_offset_ptr);
result.insert(0, kWideViewSource);
// Adjust position values.
- if (prefix_end)
- *prefix_end += kViewSourceLengthPlus1;
if (new_parsed->scheme.is_nonempty()) {
// Assume "view-source:real-scheme" as a scheme.
new_parsed->scheme.len += kViewSourceLengthPlus1;
@@ -746,6 +741,12 @@ std::wstring FormatViewSourceUrl(const GURL& url,
new_parsed->query.begin += kViewSourceLengthPlus1;
if (new_parsed->ref.is_nonempty())
new_parsed->ref.begin += kViewSourceLengthPlus1;
+ if (prefix_end)
+ *prefix_end += kViewSourceLengthPlus1;
+ if (temp_offset_ptr) {
+ *offset_for_adjustment = (temp_offset == std::wstring::npos) ?
+ std::wstring::npos : (temp_offset + kViewSourceLengthPlus1);
+ }
return result;
}
@@ -769,12 +770,20 @@ std::set<int> explicitly_allowed_ports;
// Appends the substring |in_component| inside of the URL |spec| to |output|,
// and the resulting range will be filled into |out_component|. |unescape_rules|
-// defines how to clean the URL for human readability.
+// defines how to clean the URL for human readability. |offset_for_adjustment|
+// is an offset into |output| which will be adjusted based on how it maps to the
+// component being converted; if it is less than output->length(), it will be
+// untouched, and if it is greater than output->length() + in_component.len it
+// will be shortened by the difference in lengths between the input and output
+// components. Otherwise it points into the component being converted, and is
+// adjusted to point to the same logical place in |output|.
+// |offset_for_adjustment| may not be NULL.
static void AppendFormattedComponent(const std::string& spec,
const url_parse::Component& in_component,
UnescapeRule::Type unescape_rules,
std::wstring* output,
- url_parse::Component* out_component);
+ url_parse::Component* out_component,
+ size_t* offset_for_adjustment);
GURL FilePathToFileURL(const FilePath& path) {
// Produce a URL like "file:///C:/foo" for a regular file, or
@@ -849,58 +858,56 @@ std::string GetHeaderParamValue(const std::string& field,
//
// We may want to skip this step in the case of file URLs to allow unicode
// UNC hostnames regardless of encodings.
-void IDNToUnicode(const char* host,
- int host_len,
- const std::wstring& languages,
- std::wstring* out) {
+std::wstring IDNToUnicode(const char* host,
+ size_t host_len,
+ const std::wstring& languages,
+ size_t* offset_for_adjustment) {
// Convert the ASCII input to a wide string for ICU.
string16 input16;
input16.reserve(host_len);
- for (int i = 0; i < host_len; i++)
- input16.push_back(host[i]);
+ std::copy(host, host + host_len, std::back_inserter(input16));
string16 out16;
- // The output string is appended to, so convert what's already there if
- // needed.
-#if defined(WCHAR_T_IS_UTF32)
- WideToUTF16(out->data(), out->length(), &out16);
- out->clear(); // for equivalence with the swap below
-#elif defined(WCHAR_T_IS_UTF16)
- out->swap(out16);
-#endif
+ size_t output_offset = offset_for_adjustment ?
+ *offset_for_adjustment : std::wstring::npos;
// Do each component of the host separately, since we enforce script matching
// on a per-component basis.
- size_t cur_begin = 0; // Beginning of the current component (inclusive).
- while (cur_begin < input16.size()) {
- // Find the next dot or the end of the string.
- size_t next_dot = input16.find_first_of('.', cur_begin);
- if (next_dot == std::wstring::npos)
- next_dot = input16.size(); // For getting the last component.
-
- if (next_dot > cur_begin) {
+ for (size_t component_start = 0, component_end;
+ component_start < input16.length();
+ component_start = component_end + 1) {
+ // Find the end of the component.
+ component_end = input16.find('.', component_start);
+ if (component_end == string16::npos)
+ component_end = input16.length(); // For getting the last component.
+ size_t component_length = component_end - component_start;
+
+ size_t output_component_start = out16.length();
+ bool converted_idn = false;
+ if (component_end > component_start) {
// Add the substring that we just found.
- IDNToUnicodeOneComponent(&input16[cur_begin],
- static_cast<int>(next_dot - cur_begin),
- languages,
- &out16);
+ converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,
+ component_length, languages, &out16);
+ }
+ size_t output_component_length = out16.length() - output_component_start;
+
+ if ((output_offset != std::wstring::npos) &&
+ (*offset_for_adjustment > component_start)) {
+ if ((*offset_for_adjustment < component_end) && converted_idn)
+ output_offset = std::wstring::npos;
+ else
+ output_offset += output_component_length - component_length;
}
- // Need to add the dot we just found (if we found one). This needs to be
- // done before we break out below in case the URL ends in a dot.
- if (next_dot < input16.size())
+ // Need to add the dot we just found (if we found one).
+ if (component_end < input16.length())
out16.push_back('.');
- else
- break; // No more components left.
-
- cur_begin = next_dot + 1;
}
-#if defined(WCHAR_T_IS_UTF32)
- UTF16ToWide(out16.data(), out16.length(), out);
-#elif defined(WCHAR_T_IS_UTF16)
- out->swap(out16);
-#endif
+ if (offset_for_adjustment)
+ *offset_for_adjustment = output_offset;
+
+ return UTF16ToWideAndAdjustOffset(out16, offset_for_adjustment);
}
std::string CanonicalizeHost(const std::string& host,
@@ -1262,31 +1269,48 @@ void GetIdentityFromURL(const GURL& url,
std::wstring* username,
std::wstring* password) {
UnescapeRule::Type flags = UnescapeRule::SPACES;
- *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags);
- *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags);
+ *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL);
+ *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL);
}
void AppendFormattedHost(const GURL& url,
const std::wstring& languages,
std::wstring* output,
- url_parse::Parsed* new_parsed) {
+ url_parse::Parsed* new_parsed,
+ size_t* offset_for_adjustment) {
+ DCHECK(output);
const url_parse::Component& host =
url.parsed_for_possibly_invalid_spec().host;
if (host.is_nonempty()) {
// Handle possible IDN in the host name.
+ int new_host_begin = static_cast<int>(output->length());
if (new_parsed)
- new_parsed->host.begin = static_cast<int>(output->length());
+ new_parsed->host.begin = new_host_begin;
+ size_t offset_past_current_output =
+ (!offset_for_adjustment ||
+ (*offset_for_adjustment == std::wstring::npos) ||
+ (*offset_for_adjustment < output->length())) ?
+ std::wstring::npos : (*offset_for_adjustment - output->length());
+ size_t* offset_into_host =
+ (offset_past_current_output >= static_cast<size_t>(host.len)) ?
+ NULL : &offset_past_current_output;
const std::string& spec = url.possibly_invalid_spec();
DCHECK(host.begin >= 0 &&
((spec.length() == 0 && host.begin == 0) ||
host.begin < static_cast<int>(spec.length())));
- net::IDNToUnicode(&spec[host.begin], host.len, languages, output);
+ output->append(net::IDNToUnicode(&spec[host.begin],
+ static_cast<size_t>(host.len), languages, offset_into_host));
- if (new_parsed) {
- new_parsed->host.len =
- static_cast<int>(output->length()) - new_parsed->host.begin;
+ int new_host_len = static_cast<int>(output->length()) - new_host_begin;
+ if (new_parsed)
+ new_parsed->host.len = new_host_len;
+ if (offset_into_host) {
+ *offset_for_adjustment = (*offset_into_host == std::wstring::npos) ?
+ std::wstring::npos : (new_host_begin + *offset_into_host);
+ } else if (offset_past_current_output != std::wstring::npos) {
+ *offset_for_adjustment += new_host_len - host.len;
}
} else if (new_parsed) {
new_parsed->host.reset();
@@ -1298,19 +1322,36 @@ void AppendFormattedComponent(const std::string& spec,
const url_parse::Component& in_component,
UnescapeRule::Type unescape_rules,
std::wstring* output,
- url_parse::Component* out_component) {
+ url_parse::Component* out_component,
+ size_t* offset_for_adjustment) {
+ DCHECK(output);
+ DCHECK(offset_for_adjustment);
if (in_component.is_nonempty()) {
out_component->begin = static_cast<int>(output->length());
+ size_t offset_past_current_output =
+ ((*offset_for_adjustment == std::wstring::npos) ||
+ (*offset_for_adjustment < output->length())) ?
+ std::wstring::npos : (*offset_for_adjustment - output->length());
+ size_t* offset_into_component =
+ (offset_past_current_output >= static_cast<size_t>(in_component.len)) ?
+ NULL : &offset_past_current_output;
if (unescape_rules == UnescapeRule::NONE) {
- output->append(UTF8ToWide(spec.substr(
- in_component.begin, in_component.len)));
+ output->append(UTF8ToWideAndAdjustOffset(
+ spec.substr(in_component.begin, in_component.len),
+ offset_into_component));
} else {
output->append(UnescapeAndDecodeUTF8URLComponent(
- spec.substr(in_component.begin, in_component.len),
- unescape_rules));
+ spec.substr(in_component.begin, in_component.len), unescape_rules,
+ offset_into_component));
}
out_component->len =
static_cast<int>(output->length()) - out_component->begin;
+ if (offset_into_component) {
+ *offset_for_adjustment = (*offset_into_component == std::wstring::npos) ?
+ std::wstring::npos : (out_component->begin + *offset_into_component);
+ } else if (offset_past_current_output != std::wstring::npos) {
+ *offset_for_adjustment += out_component->len - in_component.len;
+ }
} else {
out_component->reset();
}
@@ -1321,10 +1362,14 @@ std::wstring FormatUrl(const GURL& url,
bool omit_username_password,
UnescapeRule::Type unescape_rules,
url_parse::Parsed* new_parsed,
- size_t* prefix_end) {
+ size_t* prefix_end,
+ size_t* offset_for_adjustment) {
url_parse::Parsed parsed_temp;
if (!new_parsed)
new_parsed = &parsed_temp;
+ size_t offset_temp = std::wstring::npos;
+ if (!offset_for_adjustment)
+ offset_for_adjustment = &offset_temp;
std::wstring url_string;
@@ -1332,6 +1377,7 @@ std::wstring FormatUrl(const GURL& url,
if (url.is_empty()) {
if (prefix_end)
*prefix_end = 0;
+ *offset_for_adjustment = std::wstring::npos;
return url_string;
}
@@ -1343,19 +1389,22 @@ std::wstring FormatUrl(const GURL& url,
if (url.SchemeIs(kViewSource) &&
!StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {
return FormatViewSourceUrl(url, languages, omit_username_password,
- unescape_rules, new_parsed, prefix_end);
+ unescape_rules, new_parsed, prefix_end, offset_for_adjustment);
}
// We handle both valid and invalid URLs (this will give us the spec
// regardless of validity).
const std::string& spec = url.possibly_invalid_spec();
const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
+ if (*offset_for_adjustment >= spec.length())
+ *offset_for_adjustment = std::wstring::npos;
// Copy everything before the username (the scheme and the separators.)
// These are ASCII.
- int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true);
- for (int i = 0; i < pre_end; ++i)
- url_string.push_back(spec[i]);
+ std::copy(spec.begin(),
+ spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,
+ true),
+ std::back_inserter(url_string));
new_parsed->scheme = parsed.scheme;
if (omit_username_password) {
@@ -1364,16 +1413,41 @@ std::wstring FormatUrl(const GURL& url,
// e.g. "http://google.com:search@evil.ru/"
new_parsed->username.reset();
new_parsed->password.reset();
+ if ((*offset_for_adjustment != std::wstring::npos) &&
+ (parsed.username.is_nonempty() || parsed.password.is_nonempty())) {
+ if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {
+ // The seeming off-by-one and off-by-two in these first two lines are to
+ // account for the ':' after the username and '@' after the password.
+ if (*offset_for_adjustment >
+ static_cast<size_t>(parsed.password.end())) {
+ *offset_for_adjustment -=
+ (parsed.username.len + parsed.password.len + 2);
+ } else if (*offset_for_adjustment >
+ static_cast<size_t>(parsed.username.begin)) {
+ *offset_for_adjustment = std::wstring::npos;
+ }
+ } else {
+ const url_parse::Component* nonempty_component =
+ parsed.username.is_nonempty() ? &parsed.username : &parsed.password;
+ // The seeming off-by-one in these first two lines is to account for the
+ // '@' after the username/password.
+ if (*offset_for_adjustment >
+ static_cast<size_t>(nonempty_component->end())) {
+ *offset_for_adjustment -= (nonempty_component->len + 1);
+ } else if (*offset_for_adjustment >
+ static_cast<size_t>(nonempty_component->begin)) {
+ *offset_for_adjustment = std::wstring::npos;
+ }
+ }
+ }
} else {
- AppendFormattedComponent(
- spec, parsed.username, unescape_rules,
- &url_string, &new_parsed->username);
+ AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,
+ &new_parsed->username, offset_for_adjustment);
if (parsed.password.is_valid()) {
url_string.push_back(':');
}
- AppendFormattedComponent(
- spec, parsed.password, unescape_rules,
- &url_string, &new_parsed->password);
+ AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,
+ &new_parsed->password, offset_for_adjustment);
if (parsed.username.is_valid() || parsed.password.is_valid()) {
url_string.push_back('@');
}
@@ -1381,39 +1455,56 @@ std::wstring FormatUrl(const GURL& url,
if (prefix_end)
*prefix_end = static_cast<size_t>(url_string.length());
- AppendFormattedHost(url, languages, &url_string, new_parsed);
+ AppendFormattedHost(url, languages, &url_string, new_parsed,
+ offset_for_adjustment);
// Port.
if (parsed.port.is_nonempty()) {
url_string.push_back(':');
- int begin = url_string.length();
- for (int i = parsed.port.begin; i < parsed.port.end(); ++i)
- url_string.push_back(spec[i]);
- new_parsed->port.begin = begin;
- new_parsed->port.len = url_string.length() - begin;
+ new_parsed->port.begin = url_string.length();
+ std::copy(spec.begin() + parsed.port.begin,
+ spec.begin() + parsed.port.end(), std::back_inserter(url_string));
+ new_parsed->port.len = url_string.length() - new_parsed->port.begin;
} else {
new_parsed->port.reset();
}
// Path and query both get the same general unescape & convert treatment.
- AppendFormattedComponent(
- spec, parsed.path, unescape_rules, &url_string,
- &new_parsed->path);
+ AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,
+ &new_parsed->path, offset_for_adjustment);
if (parsed.query.is_valid())
url_string.push_back('?');
- AppendFormattedComponent(
- spec, parsed.query, unescape_rules, &url_string,
- &new_parsed->query);
+ AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,
+ &new_parsed->query, offset_for_adjustment);
// Reference is stored in valid, unescaped UTF-8, so we can just convert.
if (parsed.ref.is_valid()) {
url_string.push_back('#');
- int begin = url_string.length();
- if (parsed.ref.len > 0)
- url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin],
- parsed.ref.len)));
- new_parsed->ref.begin = begin;
- new_parsed->ref.len = url_string.length() - begin;
+ new_parsed->ref.begin = url_string.length();
+ size_t offset_past_current_output =
+ ((*offset_for_adjustment == std::wstring::npos) ||
+ (*offset_for_adjustment < url_string.length())) ?
+ std::wstring::npos : (*offset_for_adjustment - url_string.length());
+ size_t* offset_into_ref =
+ (offset_past_current_output >= static_cast<size_t>(parsed.ref.len)) ?
+ NULL : &offset_past_current_output;
+ if (parsed.ref.len > 0) {
+ url_string.append(UTF8ToWideAndAdjustOffset(spec.substr(parsed.ref.begin,
+ parsed.ref.len),
+ offset_into_ref));
+ }
+ new_parsed->ref.len = url_string.length() - new_parsed->ref.begin;
+ if (offset_into_ref) {
+ *offset_for_adjustment = (*offset_into_ref == std::wstring::npos) ?
+ std::wstring::npos : (new_parsed->ref.begin + *offset_into_ref);
+ } else if (offset_past_current_output != std::wstring::npos) {
+ // We clamped the offset near the beginning of this function to ensure it
+ // was within the input URL. If we reach here, the input was something
+ // invalid and non-parseable such that the offset was past any component
+ // we could figure out. In this case it won't be represented in the
+ // output string, so reset it.
+ *offset_for_adjustment = std::wstring::npos;
+ }
}
return url_string;
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 1f1516f..d9affe6 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -129,10 +129,9 @@ std::string GetHeaderParamValue(const std::string& field,
std::string GetFileNameFromCD(const std::string& header,
const std::string& referrer_charset);
-// Converts the given host name to unicode characters, APPENDING them to the
-// the given output string. This can be called for any host name, if the
-// input is not IDN or is invalid in some way, we'll just append the ASCII
-// source to the output so it is still usable.
+// Converts the given host name to unicode characters. This can be called for
+// any host name, if the input is not IDN or is invalid in some way, we'll just
+// return the ASCII source so it is still usable.
//
// The input should be the canonicalized ASCII host name from GURL. This
// function does NOT accept UTF-8! Its length must also be given (this is
@@ -146,10 +145,16 @@ std::string GetFileNameFromCD(const std::string& header,
// Latin letters in the ASCII range can be mixed with a limited set of
// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
// When |languages| is empty, even that mixing is not allowed.
-void IDNToUnicode(const char* host,
- int host_len,
- const std::wstring& languages,
- std::wstring* out);
+//
+// |offset_for_adjustment| is an offset into |host|, which will be adjusted to
+// point at the same logical place in the output string. If this isn't possible
+// because it points past the end of |host| or into the middle of a punycode
+// sequence, it will be set to std::wstring::npos. |offset_for_adjustment| may
+// be NULL.
+std::wstring IDNToUnicode(const char* host,
+ size_t host_len,
+ const std::wstring& languages,
+ size_t* offset_for_adjustment);
// Canonicalizes |host| and returns it. Also fills |host_info| with
// IP address information. |host_info| must not be NULL.
@@ -228,31 +233,47 @@ int SetNonBlocking(int fd);
// the user. The given parsed structure will be updated. The host name formatter
// also takes the same accept languages component as ElideURL. |new_parsed| may
// be null.
-void AppendFormattedHost(const GURL& url, const std::wstring& languages,
- std::wstring* output, url_parse::Parsed* new_parsed);
-
-// Creates a string representation of |url|. The IDN host name may
-// be in Unicode if |languages| accepts the Unicode representation.
-// If |omit_username_password| is true, the username and the password are
-// omitted. |unescape_rules| defines how to clean the URL for human readability.
+void AppendFormattedHost(const GURL& url,
+ const std::wstring& languages,
+ std::wstring* output,
+ url_parse::Parsed* new_parsed,
+ size_t* offset_for_adjustment);
+
+// Creates a string representation of |url|. The IDN host name may be in Unicode
+// if |languages| accepts the Unicode representation. If
+// |omit_username_password| is true, any username and password are removed.
+// |unescape_rules| defines how to clean the URL for human readability.
// You will generally want |UnescapeRule::SPACES| for display to the user if you
// can handle spaces, or |UnescapeRule::NORMAL| if not. If the path part and the
// query part seem to be encoded in %-encoded UTF-8, decodes %-encoding and
-// UTF-8. |new_parsed| will have parsing parameters of the resultant URL.
+// UTF-8.
+//
+// The last three parameters may be NULL.
+// |new_parsed| will be set to the parsing parameters of the resultant URL.
// |prefix_end| will be the length before the hostname of the resultant URL.
-// |new_parsed| and |prefix_end| may be NULL.
+// |offset_for_adjustment| is an offset into the original |url|'s spec(), which
+// will be modified to reflect changes this function makes to the output string;
+// for example, if |url| is "http://a:b@c.com/", |omit_username_password| is
+// true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return
+// the output string will be "http://c.com/" and |offset_for_adjustment| will be
+// 8. If the offset cannot be successfully adjusted (e.g. because it points
+// into the middle of a component that was entirely removed, past the end of the
+// string, or into the middle of an encoding sequence), it will be set to
+// std::wstring::npos.
std::wstring FormatUrl(const GURL& url,
const std::wstring& languages,
bool omit_username_password,
UnescapeRule::Type unescape_rules,
url_parse::Parsed* new_parsed,
- size_t* prefix_end);
+ size_t* prefix_end,
+ size_t* offset_for_adjustment);
// Creates a string representation of |url| for display to the user.
// This is a shorthand of the above function with omit_username_password=true,
// unescape=SPACES, new_parsed=NULL, and prefix_end=NULL.
inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) {
- return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL);
+ return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL,
+ NULL);
}
// Strip the portions of |url| that aren't core to the network request.
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 07ec17c..308ef80 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -345,6 +345,11 @@ const IDNTestCase idn_cases[] = {
#endif
};
+struct AdjustOffsetCase {
+ size_t input_offset;
+ size_t output_offset;
+};
+
struct CompliantHostCase {
const char* host;
bool expected_output;
@@ -782,14 +787,10 @@ TEST(NetUtilTest, IDNToUnicodeFast) {
// ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
if (j == 3 || j == 17 || j == 18)
continue;
- std::wstring output;
- net::IDNToUnicode(idn_cases[i].input,
- static_cast<int>(strlen(idn_cases[i].input)),
- kLanguages[j],
- &output);
+ std::wstring output(net::IDNToUnicode(idn_cases[i].input,
+ strlen(idn_cases[i].input), kLanguages[j], NULL));
std::wstring expected(idn_cases[i].unicode_allowed[j] ?
- idn_cases[i].unicode_output :
- ASCIIToWide(idn_cases[i].input));
+ idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input));
AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
EXPECT_EQ(expected, output);
}
@@ -802,20 +803,43 @@ TEST(NetUtilTest, IDNToUnicodeSlow) {
// !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
if (!(j == 3 || j == 17 || j == 18))
continue;
- std::wstring output;
- net::IDNToUnicode(idn_cases[i].input,
- static_cast<int>(strlen(idn_cases[i].input)),
- kLanguages[j],
- &output);
+ std::wstring output(net::IDNToUnicode(idn_cases[i].input,
+ strlen(idn_cases[i].input), kLanguages[j], NULL));
std::wstring expected(idn_cases[i].unicode_allowed[j] ?
- idn_cases[i].unicode_output :
- ASCIIToWide(idn_cases[i].input));
+ idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input));
AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
EXPECT_EQ(expected, output);
}
}
}
+TEST(NetUtilTest, IDNToUnicodeAdjustOffset) {
+ const AdjustOffsetCase adjust_cases[] = {
+ {0, 0},
+ {2, 2},
+ {4, 4},
+ {5, 5},
+ {6, std::wstring::npos},
+ {16, std::wstring::npos},
+ {17, 7},
+ {18, 8},
+ {19, std::wstring::npos},
+ {25, std::wstring::npos},
+ {34, 12},
+ {35, 13},
+ {38, 16},
+ {39, std::wstring::npos},
+ {std::wstring::npos, std::wstring::npos},
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(adjust_cases); ++i) {
+ size_t offset = adjust_cases[i].input_offset;
+ // "test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test"
+ net::IDNToUnicode("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39, L"zh-CN",
+ &offset);
+ EXPECT_EQ(adjust_cases[i].output_offset, offset);
+ }
+}
+
TEST(NetUtilTest, CompliantHost) {
const CompliantHostCase compliant_host_cases[] = {
{"", false},
@@ -1328,7 +1352,7 @@ TEST(NetUtilTest, FormatUrl) {
size_t prefix_len;
std::wstring formatted = net::FormatUrl(
GURL(tests[i].input), tests[i].languages, tests[i].omit,
- tests[i].escape_rules, NULL, &prefix_len);
+ tests[i].escape_rules, NULL, &prefix_len, NULL);
EXPECT_EQ(tests[i].output, formatted) << tests[i].description;
EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
}
@@ -1340,7 +1364,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
std::wstring formatted = net::FormatUrl(
GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
"%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
- L"ja", false, UnescapeRule::NONE, &parsed, NULL);
+ L"ja", false, UnescapeRule::NONE, &parsed, NULL, NULL);
EXPECT_EQ(L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
L"/%E3%82%B0/?q=%E3%82%B0#\x30B0", formatted);
EXPECT_EQ(L"%E3%82%B0",
@@ -1360,7 +1384,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
formatted = net::FormatUrl(
GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
"%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
- L"ja", false, UnescapeRule::NORMAL, &parsed, NULL);
+ L"ja", false, UnescapeRule::NORMAL, &parsed, NULL, NULL);
EXPECT_EQ(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
L"/\x30B0/?q=\x30B0#\x30B0", formatted);
EXPECT_EQ(L"\x30B0",
@@ -1379,7 +1403,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
formatted = net::FormatUrl(
GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
"%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
- L"ja", true, UnescapeRule::NORMAL, &parsed, NULL);
+ L"ja", true, UnescapeRule::NORMAL, &parsed, NULL, NULL);
EXPECT_EQ(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
L"/\x30B0/?q=\x30B0#\x30B0", formatted);
EXPECT_FALSE(parsed.username.is_valid());
@@ -1395,7 +1419,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
// View-source case.
formatted = net::FormatUrl(
GURL("view-source:http://user:passwd@host:81/path?query#ref"),
- L"", true, UnescapeRule::NORMAL, &parsed, NULL);
+ L"", true, UnescapeRule::NORMAL, &parsed, NULL, NULL);
EXPECT_EQ(L"view-source:http://host:81/path?query#ref", formatted);
EXPECT_EQ(L"view-source:http",
formatted.substr(parsed.scheme.begin, parsed.scheme.len));
@@ -1408,6 +1432,124 @@ TEST(NetUtilTest, FormatUrlParsed) {
EXPECT_EQ(L"ref", formatted.substr(parsed.ref.begin, parsed.ref.len));
}
+TEST(NetUtilTest, FormatUrlAdjustOffset) {
+ const AdjustOffsetCase basic_cases[] = {
+ {0, 0},
+ {3, 3},
+ {5, 5},
+ {6, 6},
+ {13, 13},
+ {21, 21},
+ {22, 22},
+ {23, 23},
+ {25, 25},
+ {26, std::wstring::npos},
+ {500000, std::wstring::npos},
+ {std::wstring::npos, std::wstring::npos},
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(basic_cases); ++i) {
+ size_t offset = basic_cases[i].input_offset;
+ net::FormatUrl(GURL("http://www.google.com/foo/"), L"en", true,
+ UnescapeRule::NORMAL, NULL, NULL, &offset);
+ EXPECT_EQ(basic_cases[i].output_offset, offset);
+ }
+
+ const struct {
+ const char* input_url;
+ size_t input_offset;
+ size_t output_offset;
+ } omit_auth_cases[] = {
+ {"http://foo:bar@www.google.com/", 6, 6},
+ {"http://foo:bar@www.google.com/", 7, 7},
+ {"http://foo:bar@www.google.com/", 8, std::wstring::npos},
+ {"http://foo:bar@www.google.com/", 10, std::wstring::npos},
+ {"http://foo:bar@www.google.com/", 11, std::wstring::npos},
+ {"http://foo:bar@www.google.com/", 14, std::wstring::npos},
+ {"http://foo:bar@www.google.com/", 15, 7},
+ {"http://foo:bar@www.google.com/", 25, 17},
+ {"http://foo@www.google.com/", 9, std::wstring::npos},
+ {"http://foo@www.google.com/", 11, 7},
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_auth_cases); ++i) {
+ size_t offset = omit_auth_cases[i].input_offset;
+ net::FormatUrl(GURL(omit_auth_cases[i].input_url), L"en", true,
+ UnescapeRule::NORMAL, NULL, NULL, &offset);
+ EXPECT_EQ(omit_auth_cases[i].output_offset, offset);
+ }
+
+ const AdjustOffsetCase view_source_cases[] = {
+ {0, 0},
+ {3, 3},
+ {11, 11},
+ {12, 12},
+ {13, 13},
+ {19, 19},
+ {20, std::wstring::npos},
+ {23, 19},
+ {26, 22},
+ {std::wstring::npos, std::wstring::npos},
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(view_source_cases); ++i) {
+ size_t offset = view_source_cases[i].input_offset;
+ net::FormatUrl(GURL("view-source:http://foo@www.google.com/"), L"en", true,
+ UnescapeRule::NORMAL, NULL, NULL, &offset);
+ EXPECT_EQ(view_source_cases[i].output_offset, offset);
+ }
+
+ const AdjustOffsetCase idn_hostname_cases[] = {
+ {8, std::wstring::npos},
+ {16, std::wstring::npos},
+ {24, std::wstring::npos},
+ {25, 12},
+ {30, 17},
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_hostname_cases); ++i) {
+ size_t offset = idn_hostname_cases[i].input_offset;
+ // "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/"
+ net::FormatUrl(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), L"ja", true,
+ UnescapeRule::NORMAL, NULL, NULL, &offset);
+ EXPECT_EQ(idn_hostname_cases[i].output_offset, offset);
+ }
+
+ const AdjustOffsetCase unescape_cases[] = {
+ {25, 25},
+ {26, std::wstring::npos},
+ {27, std::wstring::npos},
+ {28, 26},
+ {35, std::wstring::npos},
+ {41, 31},
+ {59, 33},
+ {60, std::wstring::npos},
+ {67, std::wstring::npos},
+ {68, std::wstring::npos},
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(unescape_cases); ++i) {
+ size_t offset = unescape_cases[i].input_offset;
+ // "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB"
+ net::FormatUrl(GURL(
+ "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"),
+ L"en", true, UnescapeRule::SPACES, NULL, NULL, &offset);
+ EXPECT_EQ(unescape_cases[i].output_offset, offset);
+ }
+
+ const AdjustOffsetCase ref_cases[] = {
+ {30, 30},
+ {31, 31},
+ {32, std::wstring::npos},
+ {34, 32},
+ {37, 33},
+ {38, std::wstring::npos},
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(ref_cases); ++i) {
+ size_t offset = ref_cases[i].input_offset;
+ // "http://www.google.com/foo.html#\x30B0\x30B0z"
+ net::FormatUrl(GURL(
+ "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), L"en",
+ true, UnescapeRule::NORMAL, NULL, NULL, &offset);
+ EXPECT_EQ(ref_cases[i].output_offset, offset);
+ }
+}
+
TEST(NetUtilTest, SimplifyUrlForRequest) {
struct {
const char* input_url;
@@ -1466,4 +1608,3 @@ TEST(NetUtilTest, SetExplicitlyAllowedPortsTest) {
EXPECT_EQ(i, net::explicitly_allowed_ports.size());
}
}
-