diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-05-03 20:03:50 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-05-03 20:03:50 +0000 |
commit | 04866c4c67566d17cdea2e72eb90a6ec79db1a8b (patch) | |
tree | 0e14e8c1fa5bd77b6cba9b44b110426ec596be57 /net/base | |
parent | 623fe74c6c07104c2f09b9822af47813a81d2f1f (diff) | |
download | chromium_src-04866c4c67566d17cdea2e72eb90a6ec79db1a8b.zip chromium_src-04866c4c67566d17cdea2e72eb90a6ec79db1a8b.tar.gz chromium_src-04866c4c67566d17cdea2e72eb90a6ec79db1a8b.tar.bz2 |
Eliminate wstring from base/utf_offset_string_conversions.h, net/base/escape.h, and net/base/net_util.h, and reduce the API surfaces in various places slightly where possible.
BUG=23581
TEST=none
Review URL: http://codereview.chromium.org/6898026
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@83948 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base')
-rw-r--r-- | net/base/escape.cc | 50 | ||||
-rw-r--r-- | net/base/escape.h | 5 | ||||
-rw-r--r-- | net/base/escape_unittest.cc | 51 | ||||
-rw-r--r-- | net/base/net_util.cc | 999 | ||||
-rw-r--r-- | net/base/net_util.h | 65 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 584 | ||||
-rw-r--r-- | net/base/registry_controlled_domain.cc | 25 | ||||
-rw-r--r-- | net/base/registry_controlled_domain.h | 3 | ||||
-rw-r--r-- | net/base/registry_controlled_domain_unittest.cc | 159 | ||||
-rw-r--r-- | net/base/sdch_manager.cc | 3 |
10 files changed, 819 insertions, 1125 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc index 61c3e81..f04e790 100644 --- a/net/base/escape.cc +++ b/net/base/escape.cc @@ -105,7 +105,7 @@ STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, if (offsets_for_adjustment) { std::for_each(offsets_for_adjustment->begin(), offsets_for_adjustment->end(), - LimitOffset<std::wstring>(escaped_text.length())); + LimitOffset<STR>(escaped_text.length())); } // Do not unescape anything, return the |escaped_text| text. if (rules == UnescapeRule::NONE) @@ -177,19 +177,6 @@ STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, return result; } -template<typename STR> -STR UnescapeURLImpl(const STR& escaped_text, - UnescapeRule::Type rules, - size_t* offset_for_adjustment) { - std::vector<size_t> offsets; - if (offset_for_adjustment) - offsets.push_back(*offset_for_adjustment); - STR result = UnescapeURLWithOffsetsImpl(escaped_text, rules, &offsets); - if (offset_for_adjustment) - *offset_for_adjustment = offsets[0]; - return result; -} - } // namespace // Everything except alphanumerics and !'()*-._~ @@ -251,22 +238,21 @@ string16 UnescapeAndDecodeUTF8URLComponentWithOffsets( const std::string& text, UnescapeRule::Type rules, std::vector<size_t>* offsets_for_adjustment) { - std::wstring result; + string16 result; std::vector<size_t> original_offsets; if (offsets_for_adjustment) original_offsets = *offsets_for_adjustment; std::string unescaped_url( UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment)); - if (UTF8ToWideAndAdjustOffsets(unescaped_url.data(), unescaped_url.length(), - &result, offsets_for_adjustment)) - return WideToUTF16Hack(result); // Character set looks like it's valid. + if (UTF8ToUTF16AndAdjustOffsets(unescaped_url.data(), unescaped_url.length(), + &result, offsets_for_adjustment)) + return result; // Character set looks like it's valid. // Not valid. Return the escaped version. Undo our changes to // |offset_for_adjustment| since we haven't changed the string after all. if (offsets_for_adjustment) *offsets_for_adjustment = original_offsets; - return WideToUTF16Hack(UTF8ToWideAndAdjustOffsets( - text, offsets_for_adjustment)); + return UTF8ToUTF16AndAdjustOffsets(text, offsets_for_adjustment); } string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, @@ -284,12 +270,12 @@ string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, std::string UnescapeURLComponent(const std::string& escaped_text, UnescapeRule::Type rules) { - return UnescapeURLWithOffsetsImpl<std::string>(escaped_text, rules, NULL); + return UnescapeURLWithOffsetsImpl(escaped_text, rules, NULL); } string16 UnescapeURLComponent(const string16& escaped_text, UnescapeRule::Type rules) { - return UnescapeURLWithOffsetsImpl<string16>(escaped_text, rules, NULL); + return UnescapeURLWithOffsetsImpl(escaped_text, rules, NULL); } @@ -322,10 +308,6 @@ void AppendEscapedCharForHTML(char c, std::string* output) { AppendEscapedCharForHTMLImpl(c, output); } -void AppendEscapedCharForHTML(wchar_t c, string16* output) { - AppendEscapedCharForHTMLImpl(c, output); -} - template <class str> str EscapeForHTMLImpl(const str& input) { str result; @@ -347,17 +329,17 @@ string16 EscapeForHTML(const string16& input) { string16 UnescapeForHTML(const string16& input) { static const struct { - const wchar_t* ampersand_code; + const char* ampersand_code; const char replacement; } kEscapeToChars[] = { - { L"<", '<' }, - { L">", '>' }, - { L"&", '&' }, - { L""", '"' }, - { L"'", '\''}, + { "<", '<' }, + { ">", '>' }, + { "&", '&' }, + { """, '"' }, + { "'", '\''}, }; - if (input.find(WideToUTF16(L"&")) == std::string::npos) + if (input.find(ASCIIToUTF16("&")) == std::string::npos) return input; string16 ampersand_chars[ARRAYSIZE_UNSAFE(kEscapeToChars)]; @@ -368,7 +350,7 @@ string16 UnescapeForHTML(const string16& input) { size_t index = iter - text.begin(); for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEscapeToChars); i++) { if (ampersand_chars[i].empty()) - ampersand_chars[i] = WideToUTF16(kEscapeToChars[i].ampersand_code); + ampersand_chars[i] = ASCIIToUTF16(kEscapeToChars[i].ampersand_code); if (text.find(ampersand_chars[i], index) == index) { text.replace(iter, iter + ampersand_chars[i].length(), 1, kEscapeToChars[i].replacement); diff --git a/net/base/escape.h b/net/base/escape.h index f4c99a3..d389d85 100644 --- a/net/base/escape.h +++ b/net/base/escape.h @@ -90,7 +90,7 @@ class UnescapeRule { // Watch out: this doesn't necessarily result in the correct final result, // because the encoding may be unknown. For example, the input might be ASCII, // which, after unescaping, is supposed to be interpreted as UTF-8, and then -// converted into full wide chars. This function won't tell you if any +// converted into full UTF-16 chars. This function won't tell you if any // conversions need to take place, it only unescapes. std::string UnescapeURLComponent(const std::string& escaped_text, UnescapeRule::Type rules); @@ -105,8 +105,7 @@ string16 UnescapeURLComponent(const string16& escaped_text, // adjusted to point at the same logical place in the result strings during // decoding. If this isn't possible because an offset points past the end of // the source strings or into the middle of a multibyte sequence, the offending -// offset will be set to std::wstring::npos. |offset[s]_for_adjustment| may be -// NULL. +// offset will be set to string16::npos. |offset[s]_for_adjustment| may be NULL. string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, UnescapeRule::Type rules, size_t* offset_for_adjustment); diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc index 3a8d895..5211fdf 100644 --- a/net/base/escape_unittest.cc +++ b/net/base/escape_unittest.cc @@ -19,8 +19,8 @@ namespace { static const size_t kNpos = string16::npos; struct EscapeCase { - const wchar_t* input; - const wchar_t* output; + const char* input; + const char* output; }; struct UnescapeURLCase { @@ -63,25 +63,25 @@ struct EscapeForHTMLCase { TEST(EscapeTest, EscapeTextForFormSubmission) { const EscapeCase escape_cases[] = { - {L"foo", L"foo"}, - {L"foo bar", L"foo+bar"}, - {L"foo++", L"foo%2B%2B"} + {"foo", "foo"}, + {"foo bar", "foo+bar"}, + {"foo++", "foo%2B%2B"} }; for (size_t i = 0; i < arraysize(escape_cases); ++i) { EscapeCase value = escape_cases[i]; - EXPECT_EQ(WideToUTF16Hack(value.output), - EscapeQueryParamValueUTF8(WideToUTF16Hack(value.input), true)); + EXPECT_EQ(UTF8ToUTF16(value.output), + EscapeQueryParamValueUTF8(UTF8ToUTF16(value.input), true)); } const EscapeCase escape_cases_no_plus[] = { - {L"foo", L"foo"}, - {L"foo bar", L"foo%20bar"}, - {L"foo++", L"foo%2B%2B"} + {"foo", "foo"}, + {"foo bar", "foo%20bar"}, + {"foo++", "foo%2B%2B"} }; for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) { EscapeCase value = escape_cases_no_plus[i]; - EXPECT_EQ(WideToUTF16Hack(value.output), - EscapeQueryParamValueUTF8(WideToUTF16Hack(value.input), false)); + EXPECT_EQ(ASCIIToUTF16(value.output), + EscapeQueryParamValueUTF8(ASCIIToUTF16(value.input), false)); } // Test all the values in we're supposed to be escaping. @@ -116,13 +116,13 @@ TEST(EscapeTest, EscapeTextForFormSubmission) { for (int i = 1; i < 5000; ++i) { test_str.push_back(i); } - string16 wide; + string16 utf16; EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, true, - &wide)); - EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str, true)); + &utf16)); + EXPECT_EQ(utf16, EscapeQueryParamValueUTF8(test_str, true)); EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, false, - &wide)); - EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str, false)); + &utf16)); + EXPECT_EQ(utf16, EscapeQueryParamValueUTF8(test_str, false)); } TEST(EscapeTest, EscapePath) { @@ -181,6 +181,10 @@ TEST(EscapeTest, UnescapeURLComponentASCII) { {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::URL_SPECIAL_CHARS, "Hello%20%13%10world ## ?? == && %% ++"}, + // We can neither escape nor unescape '@' since some websites expect it to + // be preserved as either '@' or "%40". + // See http://b/996720 and http://crbug.com/23933 . + {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"}, // Control characters. {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, "%01%02%03%04%05%06%07%08%09 %"}, @@ -340,23 +344,22 @@ TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) { // TODO: Need to test unescape_spaces and unescape_percent. string16 decoded = UnescapeAndDecodeUTF8URLComponent( unescape_cases[i].input, UnescapeRule::NORMAL, NULL); - EXPECT_EQ(WideToUTF16Hack(std::wstring(unescape_cases[i].decoded)), - decoded); + EXPECT_EQ(WideToUTF16(unescape_cases[i].decoded), decoded); } } TEST(EscapeTest, AdjustOffset) { const AdjustOffsetCase adjust_cases[] = { - {"", 0, std::wstring::npos}, + {"", 0, std::string::npos}, {"test", 0, 0}, {"test", 2, 2}, - {"test", 4, std::wstring::npos}, - {"test", std::wstring::npos, std::wstring::npos}, + {"test", 4, std::string::npos}, + {"test", std::string::npos, std::string::npos}, {"%2dtest", 6, 4}, - {"%2dtest", 2, std::wstring::npos}, + {"%2dtest", 2, std::string::npos}, {"test%2d", 2, 2}, {"%E4%BD%A0+%E5%A5%BD", 9, 1}, - {"%E4%BD%A0+%E5%A5%BD", 6, std::wstring::npos}, + {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos}, {"%ED%B0%80+%E5%A5%BD", 6, 6}, }; diff --git a/net/base/net_util.cc b/net/base/net_util.cc index a6fe220..5c7aab4 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -155,40 +155,6 @@ static const int kAllowedFtpPorts[] = { 22, // ssh }; -template<typename STR> -STR GetSpecificHeaderT(const STR& headers, const STR& name) { - // We want to grab the Value from the "Key: Value" pairs in the headers, - // which should look like this (no leading spaces, \n-separated) (we format - // them this way in url_request_inet.cc): - // HTTP/1.1 200 OK\n - // ETag: "6d0b8-947-24f35ec0"\n - // Content-Length: 2375\n - // Content-Type: text/html; charset=UTF-8\n - // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n - if (headers.empty()) - return STR(); - - STR match; - match.push_back('\n'); - match.append(name); - match.push_back(':'); - - typename STR::const_iterator begin = - search(headers.begin(), headers.end(), match.begin(), match.end(), - base::CaseInsensitiveCompareASCII<typename STR::value_type>()); - - if (begin == headers.end()) - return STR(); - - begin += match.length(); - - typename STR::const_iterator end = find(begin, headers.end(), '\n'); - - STR ret; - TrimWhitespace(STR(begin, end), TRIM_ALL, &ret); - return ret; -} - // Similar to Base64Decode. Decodes a Q-encoded string to a sequence // of bytes. If input is invalid, return false. bool QPDecode(const std::string& input, std::string* output) { @@ -276,12 +242,12 @@ bool DecodeWord(const std::string& encoded_word, if (IsStringUTF8(encoded_word)) { *output = encoded_word; } else { - std::wstring wide_output; + string16 utf16_output; if (!referrer_charset.empty() && - base::CodepageToWide(encoded_word, referrer_charset.c_str(), - base::OnStringConversionError::FAIL, - &wide_output)) { - *output = WideToUTF8(wide_output); + base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), + base::OnStringConversionError::FAIL, + &utf16_output)) { + *output = UTF16ToUTF8(utf16_output); } else { *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); } @@ -414,47 +380,6 @@ bool DecodeParamValue(const std::string& input, return true; } -// TODO(mpcomplete): This is a quick and dirty implementation for now. I'm -// sure this doesn't properly handle all (most?) cases. -template<typename STR> -STR GetHeaderParamValueT(const STR& header, const STR& param_name, - QuoteRule::Type quote_rule) { - // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". - typename STR::const_iterator param_begin = - search(header.begin(), header.end(), param_name.begin(), param_name.end(), - base::CaseInsensitiveCompareASCII<typename STR::value_type>()); - - if (param_begin == header.end()) - return STR(); - param_begin += param_name.length(); - - STR whitespace; - whitespace.push_back(' '); - whitespace.push_back('\t'); - const typename STR::size_type equals_offset = - header.find_first_not_of(whitespace, param_begin - header.begin()); - if (equals_offset == STR::npos || header.at(equals_offset) != '=') - return STR(); - - param_begin = header.begin() + equals_offset + 1; - if (param_begin == header.end()) - return STR(); - - typename STR::const_iterator param_end; - if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { - ++param_begin; // skip past the quote. - param_end = find(param_begin, header.end(), '"'); - // If the closing quote is missing, we will treat the rest of the - // string as the parameter. We can't set |param_end| to the - // location of the separator (';'), since the separator is - // technically quoted. See: http://crbug.com/58840 - } else { - param_end = find(param_begin+1, header.end(), ';'); - } - - return STR(param_begin, param_end); -} - // Does some simple normalization of scripts so we can allow certain scripts // to exist together. // TODO(brettw) bug 880223: we should allow some other languages to be @@ -593,7 +518,7 @@ bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters, // user. bool IsIDNComponentSafe(const char16* str, int str_len, - const std::wstring& languages) { + const std::string& languages) { // Most common cases (non-IDN) do not reach here so that we don't // need a fast return path. // TODO(jungshik) : Check if there's any character inappropriate @@ -677,8 +602,7 @@ bool IsIDNComponentSafe(const char16* str, // the remainder. component_characters.removeAll(common_characters); - std::string languages_list(WideToASCII(languages)); - StringTokenizer t(languages_list, ","); + StringTokenizer t(languages, ","); while (t.GetNext()) { if (IsComponentCoveredByLang(component_characters, t.token())) return true; @@ -692,7 +616,7 @@ bool IsIDNComponentSafe(const char16* str, // conversion was performed. bool IDNToUnicodeOneComponent(const char16* comp, size_t comp_len, - const std::wstring& languages, + const std::string& languages, string16* out) { DCHECK(out); if (comp_len == 0) @@ -734,77 +658,114 @@ bool IDNToUnicodeOneComponent(const char16* comp, return false; } -struct SubtractFromOffset { - explicit SubtractFromOffset(size_t amount) - : amount(amount) {} - void operator()(size_t& offset) { - if (offset != std::wstring::npos) { - if (offset >= amount) - offset -= amount; - else - offset = std::wstring::npos; - } +// Clamps the offsets in |offsets_for_adjustment| to the length of |str|. +void LimitOffsets(const string16& str, + std::vector<size_t>* offsets_for_adjustment) { + if (offsets_for_adjustment) { + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + LimitOffset<string16>(str.length())); } +} - size_t amount; -}; +// TODO(brettw) bug 734373: check the scripts for each host component and +// don't un-IDN-ize if there is more than one. Alternatively, only IDN for +// scripts that the user has installed. For now, just put the entire +// path through IDN. Maybe this feature can be implemented in ICU itself? +// +// We may want to skip this step in the case of file URLs to allow unicode +// UNC hostnames regardless of encodings. +string16 IDNToUnicodeWithOffsets(const std::string& host, + const std::string& languages, + std::vector<size_t>* offsets_for_adjustment) { + // Convert the ASCII input to a string16 for ICU. + string16 input16; + input16.reserve(host.length()); + input16.insert(input16.end(), host.begin(), host.end()); -struct AddToOffset { - explicit AddToOffset(size_t amount) - : amount(amount) {} - void operator()(size_t& offset) { - if (offset != std::wstring::npos) - offset += amount; - } + // Do each component of the host separately, since we enforce script matching + // on a per-component basis. + string16 out16; + { + OffsetAdjuster offset_adjuster(offsets_for_adjustment); + for (size_t component_start = 0, component_end; + component_start < input16.length(); + component_start = component_end + 1) { + // Find the end of the component. + component_end = input16.find('.', component_start); + if (component_end == string16::npos) + component_end = input16.length(); // For getting the last component. + size_t component_length = component_end - component_start; + size_t new_component_start = out16.length(); + bool converted_idn = false; + if (component_end > component_start) { + // Add the substring that we just found. + converted_idn = IDNToUnicodeOneComponent( + input16.data() + component_start, component_length, languages, + &out16); + } + size_t new_component_length = out16.length() - new_component_start; - size_t amount; -}; + if (converted_idn && offsets_for_adjustment) { + offset_adjuster.Add(OffsetAdjuster::Adjustment(component_start, + component_length, new_component_length)); + } -std::vector<size_t> OffsetsIntoSection( - std::vector<size_t>* offsets_for_adjustment, - size_t section_begin) { - std::vector<size_t> offsets_into_section; - if (offsets_for_adjustment) { - std::transform(offsets_for_adjustment->begin(), - offsets_for_adjustment->end(), - std::back_inserter(offsets_into_section), - ClampComponentOffset(section_begin)); - std::for_each(offsets_into_section.begin(), offsets_into_section.end(), - SubtractFromOffset(section_begin)); + // Need to add the dot we just found (if we found one). + if (component_end < input16.length()) + out16.push_back('.'); + } } - return offsets_into_section; -} -void ApplySectionAdjustments(const std::vector<size_t>& offsets_into_section, - std::vector<size_t>* offsets_for_adjustment, - size_t old_section_len, - size_t new_section_len, - size_t section_begin) { - if (offsets_for_adjustment) { - DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size()); - std::vector<size_t>::const_iterator host_offsets_iter = - offsets_into_section.begin(); - for (std::vector<size_t>::iterator offsets_iter = - offsets_for_adjustment->begin(); - offsets_iter != offsets_for_adjustment->end(); - ++offsets_iter, ++host_offsets_iter) { - size_t offset = *offsets_iter; - if (offset == std::wstring::npos || offset < section_begin) { - // The offset is before the host section so leave it as is. - continue; - } - if (offset >= section_begin + old_section_len) { - // The offset is after the host section so adjust by host length delta. - offset += new_section_len - old_section_len; - } else if (*host_offsets_iter != std::wstring::npos) { - // The offset is within the host and valid so adjust by the host - // reformatting offsets results. - offset = section_begin + *host_offsets_iter; - } else { - // The offset is invalid. - offset = std::wstring::npos; - } - *offsets_iter = offset; + LimitOffsets(out16, offsets_for_adjustment); + return out16; +} + +// Transforms |original_offsets| by subtracting |component_begin| from all +// offsets. Any offset which was not at least this large to begin with is set +// to std::string::npos. +std::vector<size_t> OffsetsIntoComponent( + const std::vector<size_t>& original_offsets, + size_t component_begin) { + DCHECK_NE(std::string::npos, component_begin); + std::vector<size_t> offsets_into_component(original_offsets); + for (std::vector<size_t>::iterator i(offsets_into_component.begin()); + i != offsets_into_component.end(); ++i) { + if (*i != std::string::npos) + *i = (*i < component_begin) ? std::string::npos : (*i - component_begin); + } + return offsets_into_component; +} + +// Called after we transform a component and append it to an output string. +// Maps |transformed_offsets|, which represent offsets into the transformed +// component itself, into appropriate offsets for the output string, by adding +// |output_component_begin| to each. Determines which offsets need mapping by +// checking to see which of the |original_offsets| were within the designated +// original component, using its provided endpoints. +void AdjustForComponentTransform( + const std::vector<size_t>& original_offsets, + size_t original_component_begin, + size_t original_component_end, + const std::vector<size_t>& transformed_offsets, + size_t output_component_begin, + std::vector<size_t>* offsets_for_adjustment) { + if (!offsets_for_adjustment) + return; + + DCHECK_NE(std::string::npos, original_component_begin); + DCHECK_NE(std::string::npos, original_component_end); + DCHECK_NE(string16::npos, output_component_begin); + size_t offsets_size = offsets_for_adjustment->size(); + DCHECK_EQ(offsets_size, original_offsets.size()); + DCHECK_EQ(offsets_size, transformed_offsets.size()); + for (size_t i = 0; i < offsets_size; ++i) { + size_t original_offset = original_offsets[i]; + if ((original_offset >= original_component_begin) && + (original_offset < original_component_end)) { + size_t transformed_offset = transformed_offsets[i]; + (*offsets_for_adjustment)[i] = (transformed_offset == string16::npos) ? + string16::npos : (output_component_begin + transformed_offset); } } } @@ -829,295 +790,125 @@ void AdjustComponents(int delta, url_parse::Parsed* parsed) { AdjustComponent(delta, &(parsed->ref)); } -std::wstring FormatUrlInternal(const GURL& url, - const std::wstring& languages, - FormatUrlTypes format_types, - UnescapeRule::Type unescape_rules, - url_parse::Parsed* new_parsed, - size_t* prefix_end, - std::vector<size_t>* offsets_for_adjustment); - -// Helper for FormatUrl()/FormatUrlInternal(). -std::wstring FormatViewSourceUrl(const GURL& url, - const std::wstring& languages, - FormatUrlTypes format_types, - UnescapeRule::Type unescape_rules, - url_parse::Parsed* new_parsed, - size_t* prefix_end, - std::vector<size_t>* offsets_for_adjustment) { +// Helper for FormatUrlWithOffsets(). +string16 FormatViewSourceUrl(const GURL& url, + const std::vector<size_t>& original_offsets, + const std::string& languages, + FormatUrlTypes format_types, + UnescapeRule::Type unescape_rules, + url_parse::Parsed* new_parsed, + size_t* prefix_end, + std::vector<size_t>* offsets_for_adjustment) { DCHECK(new_parsed); - DCHECK(offsets_for_adjustment); - const wchar_t* const kWideViewSource = L"view-source:"; - const size_t kViewSourceLengthPlus1 = 12; - std::vector<size_t> saved_offsets(*offsets_for_adjustment); - - GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); - // Clamp the offsets to the source area. - std::for_each(offsets_for_adjustment->begin(), - offsets_for_adjustment->end(), - SubtractFromOffset(kViewSourceLengthPlus1)); - std::wstring result = FormatUrlInternal(real_url, languages, format_types, - unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); - result.insert(0, kWideViewSource); + const char kViewSource[] = "view-source:"; + const size_t kViewSourceLength = arraysize(kViewSource) - 1; + std::vector<size_t> offsets_into_url( + OffsetsIntoComponent(original_offsets, kViewSourceLength)); + + GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength)); + string16 result(ASCIIToUTF16(kViewSource) + + FormatUrlWithOffsets(real_url, languages, format_types, unescape_rules, + new_parsed, prefix_end, &offsets_into_url)); // Adjust position values. if (new_parsed->scheme.is_nonempty()) { // Assume "view-source:real-scheme" as a scheme. - new_parsed->scheme.len += kViewSourceLengthPlus1; + new_parsed->scheme.len += kViewSourceLength; } else { new_parsed->scheme.begin = 0; - new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; + new_parsed->scheme.len = kViewSourceLength - 1; } - AdjustComponents(kViewSourceLengthPlus1, new_parsed); + AdjustComponents(kViewSourceLength, new_parsed); if (prefix_end) - *prefix_end += kViewSourceLengthPlus1; - std::for_each(offsets_for_adjustment->begin(), - offsets_for_adjustment->end(), - AddToOffset(kViewSourceLengthPlus1)); - // Restore all offsets which were not affected by FormatUrlInternal. - DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size()); - for (size_t i = 0; i < saved_offsets.size(); ++i) { - if (saved_offsets[i] < kViewSourceLengthPlus1) - (*offsets_for_adjustment)[i] = saved_offsets[i]; - } + *prefix_end += kViewSourceLength; + AdjustForComponentTransform(original_offsets, kViewSourceLength, + url.possibly_invalid_spec().length(), offsets_into_url, kViewSourceLength, + offsets_for_adjustment); + LimitOffsets(result, offsets_for_adjustment); return result; } -// Appends the substring |in_component| inside of the URL |spec| to |output|, -// and the resulting range will be filled into |out_component|. |unescape_rules| -// defines how to clean the URL for human readability. |offsets_for_adjustment| -// is an array of offsets into |output| each of which will be adjusted based on -// how it maps to the component being converted; if it is less than -// output->length(), it will be untouched, and if it is greater than -// output->length() + in_component.len it will be adjusted by the difference in -// lengths between the input and output components. Otherwise it points into -// the component being converted, and is adjusted to point to the same logical -// place in |output|. |offsets_for_adjustment| may not be NULL. -void AppendFormattedComponent(const std::string& spec, - const url_parse::Component& in_component, - UnescapeRule::Type unescape_rules, - std::wstring* output, - url_parse::Component* out_component, - std::vector<size_t>* offsets_for_adjustment) { - DCHECK(output); - DCHECK(offsets_for_adjustment); - if (in_component.is_nonempty()) { - size_t component_begin = output->length(); - out_component->begin = static_cast<int>(component_begin); - - // Compose a list of offsets within the component area. - std::vector<size_t> offsets_into_component = - OffsetsIntoSection(offsets_for_adjustment, component_begin); - - if (unescape_rules == UnescapeRule::NONE) { - output->append(UTF8ToWideAndAdjustOffsets( - spec.substr(in_component.begin, in_component.len), - &offsets_into_component)); - } else { - output->append(UTF16ToWideHack( - UnescapeAndDecodeUTF8URLComponentWithOffsets( - spec.substr(in_component.begin, in_component.len), unescape_rules, - &offsets_into_component))); - } - size_t new_component_len = output->length() - component_begin; - out_component->len = static_cast<int>(new_component_len); - - // Apply offset adjustments. - size_t old_component_len = static_cast<size_t>(in_component.len); - ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment, - old_component_len, new_component_len, component_begin); - } else { - out_component->reset(); - } -} - -// TODO(viettrungluu): This is really the old-fashioned version, made internal. -// I need to really convert |FormatUrl()|. -std::wstring FormatUrlInternal(const GURL& url, - const std::wstring& languages, - FormatUrlTypes format_types, - UnescapeRule::Type unescape_rules, - url_parse::Parsed* new_parsed, - size_t* prefix_end, - std::vector<size_t>* offsets_for_adjustment) { - url_parse::Parsed parsed_temp; - if (!new_parsed) - new_parsed = &parsed_temp; - else - *new_parsed = url_parse::Parsed(); +class AppendComponentTransform { + public: + AppendComponentTransform() {} + virtual ~AppendComponentTransform() {} - std::vector<size_t> offsets_temp; - if (!offsets_for_adjustment) - offsets_for_adjustment = &offsets_temp; + virtual string16 Execute( + const std::string& component_text, + std::vector<size_t>* offsets_into_component) const = 0; - std::wstring url_string; + // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an + // accessible copy constructor in order to call AppendFormattedComponent() + // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). +}; - // Check for empty URLs or 0 available text width. - if (url.is_empty()) { - if (prefix_end) - *prefix_end = 0; - std::for_each(offsets_for_adjustment->begin(), - offsets_for_adjustment->end(), - LimitOffset<std::wstring>(0)); - return url_string; +class HostComponentTransform : public AppendComponentTransform { + public: + explicit HostComponentTransform(const std::string& languages) + : languages_(languages) { } - // Special handling for view-source:. Don't use chrome::kViewSourceScheme - // because this library shouldn't depend on chrome. - const char* const kViewSource = "view-source"; - // Reject "view-source:view-source:..." to avoid deep recursion. - const char* const kViewSourceTwice = "view-source:view-source:"; - if (url.SchemeIs(kViewSource) && - !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { - return FormatViewSourceUrl(url, languages, format_types, - unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); + private: + virtual string16 Execute( + const std::string& component_text, + std::vector<size_t>* offsets_into_component) const { + return IDNToUnicodeWithOffsets(component_text, languages_, + offsets_into_component); } - // We handle both valid and invalid URLs (this will give us the spec - // regardless of validity). - const std::string& spec = url.possibly_invalid_spec(); - const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); - size_t spec_length = spec.length(); - std::for_each(offsets_for_adjustment->begin(), - offsets_for_adjustment->end(), - LimitOffset<std::wstring>(spec_length)); - - // Copy everything before the username (the scheme and the separators.) - // These are ASCII. - url_string.insert(url_string.end(), spec.begin(), - spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, - true)); - - const wchar_t kHTTP[] = L"http://"; - const char kFTP[] = "ftp."; - // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This - // means that if we trim "http://" off a URL whose host starts with "ftp." and - // the user inputs this into any field subject to fixup (which is basically - // all input fields), the meaning would be changed. (In fact, often the - // formatted URL is directly pre-filled into an input field.) For this reason - // we avoid stripping "http://" in this case. - bool omit_http = - (format_types & kFormatUrlOmitHTTP) && (url_string == kHTTP) && - (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0); - - new_parsed->scheme = parsed.scheme; - - if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { - // Remove the username and password fields. We don't want to display those - // to the user since they can be used for attacks, - // e.g. "http://google.com:search@evil.ru/" - new_parsed->username.reset(); - new_parsed->password.reset(); - // Update the offsets based on removed username and/or password. - if (!offsets_for_adjustment->empty() && - (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { - AdjustOffset::Adjustments adjustments; - if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { - // The seeming off-by-one and off-by-two in these first two lines are to - // account for the ':' after the username and '@' after the password. - adjustments.push_back(AdjustOffset::Adjustment( - static_cast<size_t>(parsed.username.begin), - static_cast<size_t>(parsed.username.len + parsed.password.len + - 2), 0)); - } else { - const url_parse::Component* nonempty_component = - parsed.username.is_nonempty() ? &parsed.username : &parsed.password; - // The seeming off-by-one in below is to account for the '@' after the - // username/password. - adjustments.push_back(AdjustOffset::Adjustment( - static_cast<size_t>(nonempty_component->begin), - static_cast<size_t>(nonempty_component->len + 1), 0)); - } + const std::string& languages_; +}; - // Make offset adjustment. - std::for_each(offsets_for_adjustment->begin(), - offsets_for_adjustment->end(), - AdjustOffset(adjustments)); - } - } else { - AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, - &new_parsed->username, offsets_for_adjustment); - if (parsed.password.is_valid()) - url_string.push_back(':'); - AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, - &new_parsed->password, offsets_for_adjustment); - if (parsed.username.is_valid() || parsed.password.is_valid()) - url_string.push_back('@'); +class NonHostComponentTransform : public AppendComponentTransform { + public: + explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules) + : unescape_rules_(unescape_rules) { } - if (prefix_end) - *prefix_end = static_cast<size_t>(url_string.length()); - - AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed, - offsets_for_adjustment); - // Port. - if (parsed.port.is_nonempty()) { - url_string.push_back(':'); - new_parsed->port.begin = url_string.length(); - url_string.insert(url_string.end(), - spec.begin() + parsed.port.begin, - spec.begin() + parsed.port.end()); - new_parsed->port.len = url_string.length() - new_parsed->port.begin; - } else { - new_parsed->port.reset(); + private: + virtual string16 Execute( + const std::string& component_text, + std::vector<size_t>* offsets_into_component) const { + return (unescape_rules_ == UnescapeRule::NONE) ? + UTF8ToUTF16AndAdjustOffsets(component_text, offsets_into_component) : + UnescapeAndDecodeUTF8URLComponentWithOffsets(component_text, + unescape_rules_, offsets_into_component); } - // Path and query both get the same general unescape & convert treatment. - if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || - !CanStripTrailingSlash(url)) { - AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, - &new_parsed->path, offsets_for_adjustment); - } - if (parsed.query.is_valid()) - url_string.push_back('?'); - AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, - &new_parsed->query, offsets_for_adjustment); + const UnescapeRule::Type unescape_rules_; +}; - // Reference is stored in valid, unescaped UTF-8, so we can just convert. - if (parsed.ref.is_valid()) { - url_string.push_back('#'); - size_t ref_begin = url_string.length(); - new_parsed->ref.begin = static_cast<int>(ref_begin); +void AppendFormattedComponent(const std::string& spec, + const url_parse::Component& original_component, + const std::vector<size_t>& original_offsets, + const AppendComponentTransform& transform, + string16* output, + url_parse::Component* output_component, + std::vector<size_t>* offsets_for_adjustment) { + DCHECK(output); + if (original_component.is_nonempty()) { + size_t original_component_begin = + static_cast<size_t>(original_component.begin); + size_t output_component_begin = output->length(); + if (output_component) + output_component->begin = static_cast<int>(output_component_begin); - // Compose a list of offsets within the section. - std::vector<size_t> offsets_into_ref = - OffsetsIntoSection(offsets_for_adjustment, ref_begin); + std::vector<size_t> offsets_into_component = + OffsetsIntoComponent(original_offsets, original_component_begin); + output->append(transform.Execute(std::string(spec, original_component_begin, + static_cast<size_t>(original_component.len)), &offsets_into_component)); - if (parsed.ref.len > 0) { - url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin, - parsed.ref.len), - &offsets_into_ref)); + if (output_component) { + output_component->len = + static_cast<int>(output->length() - output_component_begin); } - size_t old_ref_len = static_cast<size_t>(parsed.ref.len); - size_t new_ref_len = url_string.length() - new_parsed->ref.begin; - new_parsed->ref.len = static_cast<int>(new_ref_len); - - // Apply offset adjustments. - ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment, - old_ref_len, new_ref_len, ref_begin); + AdjustForComponentTransform(original_offsets, original_component_begin, + static_cast<size_t>(original_component.end()), + offsets_into_component, output_component_begin, + offsets_for_adjustment); + } else if (output_component) { + output_component->reset(); } - - // If we need to strip out http do it after the fact. This way we don't need - // to worry about how offset_for_adjustment is interpreted. - const size_t kHTTPSize = arraysize(kHTTP) - 1; - if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) { - url_string = url_string.substr(kHTTPSize); - AdjustOffset::Adjustments adjustments; - adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0)); - std::for_each(offsets_for_adjustment->begin(), - offsets_for_adjustment->end(), - AdjustOffset(adjustments)); - if (prefix_end) - *prefix_end -= kHTTPSize; - - // Adjust new_parsed. - DCHECK(new_parsed->scheme.is_valid()); - int delta = -(new_parsed->scheme.len + 3); // +3 for ://. - new_parsed->scheme.reset(); - AdjustComponents(delta, new_parsed); - } - - return url_string; } } // namespace @@ -1163,14 +954,34 @@ GURL FilePathToFileURL(const FilePath& path) { return GURL(url_string); } -std::wstring GetSpecificHeader(const std::wstring& headers, - const std::wstring& name) { - return GetSpecificHeaderT(headers, name); -} - std::string GetSpecificHeader(const std::string& headers, const std::string& name) { - return GetSpecificHeaderT(headers, name); + // We want to grab the Value from the "Key: Value" pairs in the headers, + // which should look like this (no leading spaces, \n-separated) (we format + // them this way in url_request_inet.cc): + // HTTP/1.1 200 OK\n + // ETag: "6d0b8-947-24f35ec0"\n + // Content-Length: 2375\n + // Content-Type: text/html; charset=UTF-8\n + // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n + if (headers.empty()) + return std::string(); + + std::string match('\n' + name + ':'); + + std::string::const_iterator begin = + search(headers.begin(), headers.end(), match.begin(), match.end(), + base::CaseInsensitiveCompareASCII<char>()); + + if (begin == headers.end()) + return std::string(); + + begin += match.length(); + + std::string ret; + TrimWhitespace(std::string(begin, find(begin, headers.end(), '\n')), TRIM_ALL, + &ret); + return ret; } bool DecodeCharset(const std::string& input, @@ -1245,88 +1056,49 @@ std::string GetFileNameFromCD(const std::string& header, return std::string(); } -std::wstring GetHeaderParamValue(const std::wstring& field, - const std::wstring& param_name, - QuoteRule::Type quote_rule) { - return GetHeaderParamValueT(field, param_name, quote_rule); -} - -std::string GetHeaderParamValue(const std::string& field, +// TODO(mpcomplete): This is a quick and dirty implementation for now. I'm +// sure this doesn't properly handle all (most?) cases. +std::string GetHeaderParamValue(const std::string& header, const std::string& param_name, QuoteRule::Type quote_rule) { - return GetHeaderParamValueT(field, param_name, quote_rule); -} - -// TODO(brettw) bug 734373: check the scripts for each host component and -// don't un-IDN-ize if there is more than one. Alternatively, only IDN for -// scripts that the user has installed. For now, just put the entire -// path through IDN. Maybe this feature can be implemented in ICU itself? -// -// We may want to skip this step in the case of file URLs to allow unicode -// UNC hostnames regardless of encodings. -std::wstring IDNToUnicodeWithOffsets( - const char* host, - size_t host_len, - const std::wstring& languages, - std::vector<size_t>* offsets_for_adjustment) { - // Convert the ASCII input to a wide string for ICU. - string16 input16; - input16.reserve(host_len); - input16.insert(input16.end(), host, host + host_len); + // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". + std::string::const_iterator param_begin = + search(header.begin(), header.end(), param_name.begin(), param_name.end(), + base::CaseInsensitiveCompareASCII<char>()); - // Do each component of the host separately, since we enforce script matching - // on a per-component basis. - AdjustOffset::Adjustments adjustments; - string16 out16; - for (size_t component_start = 0, component_end; - component_start < input16.length(); - component_start = component_end + 1) { - // Find the end of the component. - component_end = input16.find('.', component_start); - if (component_end == string16::npos) - component_end = input16.length(); // For getting the last component. - size_t component_length = component_end - component_start; - size_t new_component_start = out16.length(); - bool converted_idn = false; - if (component_end > component_start) { - // Add the substring that we just found. - converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, - component_length, languages, &out16); - } - size_t new_component_length = out16.length() - new_component_start; + if (param_begin == header.end()) + return std::string(); + param_begin += param_name.length(); - if (converted_idn && offsets_for_adjustment) { - adjustments.push_back(AdjustOffset::Adjustment( - component_start, component_length, new_component_length)); - } + std::string whitespace(" \t"); + size_t equals_offset = + header.find_first_not_of(whitespace, param_begin - header.begin()); + if (equals_offset == std::string::npos || header[equals_offset] != '=') + return std::string(); - // Need to add the dot we just found (if we found one). - if (component_end < input16.length()) - out16.push_back('.'); - } + param_begin = header.begin() + equals_offset + 1; + if (param_begin == header.end()) + return std::string(); - // Make offset adjustment. - if (offsets_for_adjustment && !adjustments.empty()) { - std::for_each(offsets_for_adjustment->begin(), - offsets_for_adjustment->end(), - AdjustOffset(adjustments)); + std::string::const_iterator param_end; + if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { + ++param_begin; // skip past the quote. + param_end = find(param_begin, header.end(), '"'); + // If the closing quote is missing, we will treat the rest of the + // string as the parameter. We can't set |param_end| to the + // location of the separator (';'), since the separator is + // technically quoted. See: http://crbug.com/58840 + } else { + param_end = find(param_begin + 1, header.end(), ';'); } - return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment); + return std::string(param_begin, param_end); } -std::wstring IDNToUnicode(const char* host, - size_t host_len, - const std::wstring& languages, - size_t* offset_for_adjustment) { +string16 IDNToUnicode(const std::string& host, + const std::string& languages) { std::vector<size_t> offsets; - if (offset_for_adjustment) - offsets.push_back(*offset_for_adjustment); - std::wstring result = - IDNToUnicodeWithOffsets(host, host_len, languages, &offsets); - if (offset_for_adjustment) - *offset_for_adjustment = offsets[0]; - return result; + return IDNToUnicodeWithOffsets(host, languages, &offsets); } std::string CanonicalizeHost(const std::string& host, @@ -1352,13 +1124,6 @@ std::string CanonicalizeHost(const std::string& host, return canon_host; } -std::string CanonicalizeHost(const std::wstring& host, - url_canon::CanonHostInfo* host_info) { - std::string converted_host; - WideToUTF8(host.c_str(), host.length(), &converted_host); - return CanonicalizeHost(converted_host, host_info); -} - std::string GetDirectoryListingHeader(const string16& title) { static const base::StringPiece header( NetModule::GetResource(IDR_DIR_HEADER_HTML)); @@ -1739,60 +1504,15 @@ std::string GetHostOrSpecFromURL(const GURL& url) { return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); } -void AppendFormattedHostWithOffsets( - const GURL& url, - const std::wstring& languages, - std::wstring* output, - url_parse::Parsed* new_parsed, - std::vector<size_t>* offsets_for_adjustment) { - DCHECK(output); - const url_parse::Component& host = - url.parsed_for_possibly_invalid_spec().host; - - if (host.is_nonempty()) { - // Handle possible IDN in the host name. - size_t host_begin = output->length(); - if (new_parsed) - new_parsed->host.begin = static_cast<int>(host_begin); - size_t old_host_len = static_cast<size_t>(host.len); - - // Compose a list of offsets within the host area. - std::vector<size_t> offsets_into_host = - OffsetsIntoSection(offsets_for_adjustment, host_begin); - - const std::string& spec = url.possibly_invalid_spec(); - DCHECK(host.begin >= 0 && - ((spec.length() == 0 && host.begin == 0) || - host.begin < static_cast<int>(spec.length()))); - output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len, - languages, &offsets_into_host)); - - size_t new_host_len = output->length() - host_begin; - if (new_parsed) - new_parsed->host.len = static_cast<int>(new_host_len); - - // Apply offset adjustments. - ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment, - old_host_len, new_host_len, host_begin); - } else if (new_parsed) { - new_parsed->host.reset(); - } -} - void AppendFormattedHost(const GURL& url, - const std::wstring& languages, - std::wstring* output, - url_parse::Parsed* new_parsed, - size_t* offset_for_adjustment) { + const std::string& languages, + string16* output) { std::vector<size_t> offsets; - if (offset_for_adjustment) - offsets.push_back(*offset_for_adjustment); - AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets); - if (offset_for_adjustment) - *offset_for_adjustment = offsets[0]; + AppendFormattedComponent(url.possibly_invalid_spec(), + url.parsed_for_possibly_invalid_spec().host, offsets, + HostComponentTransform(languages), output, NULL, NULL); } -// TODO(viettrungluu): convert the wstring |FormatUrlInternal()|. string16 FormatUrlWithOffsets(const GURL& url, const std::string& languages, FormatUrlTypes format_types, @@ -1800,10 +1520,184 @@ string16 FormatUrlWithOffsets(const GURL& url, url_parse::Parsed* new_parsed, size_t* prefix_end, std::vector<size_t>* offsets_for_adjustment) { - return WideToUTF16Hack( - FormatUrlInternal(url, ASCIIToWide(languages), format_types, - unescape_rules, new_parsed, prefix_end, - offsets_for_adjustment)); + url_parse::Parsed parsed_temp; + if (!new_parsed) + new_parsed = &parsed_temp; + else + *new_parsed = url_parse::Parsed(); + std::vector<size_t> original_offsets; + if (offsets_for_adjustment) + original_offsets = *offsets_for_adjustment; + + // Special handling for view-source:. Don't use chrome::kViewSourceScheme + // because this library shouldn't depend on chrome. + const char* const kViewSource = "view-source"; + // Reject "view-source:view-source:..." to avoid deep recursion. + const char* const kViewSourceTwice = "view-source:view-source:"; + if (url.SchemeIs(kViewSource) && + !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { + return FormatViewSourceUrl(url, original_offsets, languages, format_types, + unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); + } + + // We handle both valid and invalid URLs (this will give us the spec + // regardless of validity). + const std::string& spec = url.possibly_invalid_spec(); + const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); + + // Scheme & separators. These are ASCII. + string16 url_string; + url_string.insert(url_string.end(), spec.begin(), + spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, + true)); + const char kHTTP[] = "http://"; + const char kFTP[] = "ftp."; + // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This + // means that if we trim "http://" off a URL whose host starts with "ftp." and + // the user inputs this into any field subject to fixup (which is basically + // all input fields), the meaning would be changed. (In fact, often the + // formatted URL is directly pre-filled into an input field.) For this reason + // we avoid stripping "http://" in this case. + bool omit_http = (format_types & kFormatUrlOmitHTTP) && + EqualsASCII(url_string, kHTTP) && + !StartsWithASCII(url.host(), kFTP, true); + new_parsed->scheme = parsed.scheme; + + // Username & password. + if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { + // Remove the username and password fields. We don't want to display those + // to the user since they can be used for attacks, + // e.g. "http://google.com:search@evil.ru/" + new_parsed->username.reset(); + new_parsed->password.reset(); + // Update the offsets based on removed username and/or password. + if (offsets_for_adjustment && !offsets_for_adjustment->empty() && + (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { + OffsetAdjuster offset_adjuster(offsets_for_adjustment); + if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { + // The seeming off-by-one and off-by-two in these first two lines are to + // account for the ':' after the username and '@' after the password. + offset_adjuster.Add(OffsetAdjuster::Adjustment( + static_cast<size_t>(parsed.username.begin), + static_cast<size_t>(parsed.username.len + parsed.password.len + 2), + 0)); + } else { + const url_parse::Component* nonempty_component = + parsed.username.is_nonempty() ? &parsed.username : &parsed.password; + // The seeming off-by-one in below is to account for the '@' after the + // username/password. + offset_adjuster.Add(OffsetAdjuster::Adjustment( + static_cast<size_t>(nonempty_component->begin), + static_cast<size_t>(nonempty_component->len + 1), 0)); + } + } + } else { + AppendFormattedComponent(spec, parsed.username, original_offsets, + NonHostComponentTransform(unescape_rules), &url_string, + &new_parsed->username, offsets_for_adjustment); + if (parsed.password.is_valid()) { + size_t colon = parsed.username.end(); + DCHECK_EQ(static_cast<size_t>(parsed.password.begin - 1), colon); + std::vector<size_t>::const_iterator colon_iter = + std::find(original_offsets.begin(), original_offsets.end(), colon); + if (colon_iter != original_offsets.end()) { + (*offsets_for_adjustment)[colon_iter - original_offsets.begin()] = + url_string.length(); + } + url_string.push_back(':'); + } + AppendFormattedComponent(spec, parsed.password, original_offsets, + NonHostComponentTransform(unescape_rules), &url_string, + &new_parsed->password, offsets_for_adjustment); + if (parsed.username.is_valid() || parsed.password.is_valid()) { + size_t at_sign = (parsed.password.is_valid() ? + parsed.password : parsed.username).end(); + DCHECK_EQ(static_cast<size_t>(parsed.host.begin - 1), at_sign); + std::vector<size_t>::const_iterator at_sign_iter = + std::find(original_offsets.begin(), original_offsets.end(), at_sign); + if (at_sign_iter != original_offsets.end()) { + (*offsets_for_adjustment)[at_sign_iter - original_offsets.begin()] = + url_string.length(); + } + url_string.push_back('@'); + } + } + if (prefix_end) + *prefix_end = static_cast<size_t>(url_string.length()); + + // Host. + AppendFormattedComponent(spec, parsed.host, original_offsets, + HostComponentTransform(languages), &url_string, &new_parsed->host, + offsets_for_adjustment); + + // Port. + if (parsed.port.is_nonempty()) { + url_string.push_back(':'); + new_parsed->port.begin = url_string.length(); + url_string.insert(url_string.end(), + spec.begin() + parsed.port.begin, + spec.begin() + parsed.port.end()); + new_parsed->port.len = url_string.length() - new_parsed->port.begin; + } else { + new_parsed->port.reset(); + } + + // Path & query. Both get the same general unescape & convert treatment. + if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || + !CanStripTrailingSlash(url)) { + AppendFormattedComponent(spec, parsed.path, original_offsets, + NonHostComponentTransform(unescape_rules), &url_string, + &new_parsed->path, offsets_for_adjustment); + } + if (parsed.query.is_valid()) + url_string.push_back('?'); + AppendFormattedComponent(spec, parsed.query, original_offsets, + NonHostComponentTransform(unescape_rules), &url_string, + &new_parsed->query, offsets_for_adjustment); + + // Ref. This is valid, unescaped UTF-8, so we can just convert. + if (parsed.ref.is_valid()) { + url_string.push_back('#'); + size_t original_ref_begin = static_cast<size_t>(parsed.ref.begin); + size_t output_ref_begin = url_string.length(); + new_parsed->ref.begin = static_cast<int>(output_ref_begin); + + std::vector<size_t> offsets_into_ref( + OffsetsIntoComponent(original_offsets, original_ref_begin)); + if (parsed.ref.len > 0) { + url_string.append(UTF8ToUTF16AndAdjustOffsets( + spec.substr(original_ref_begin, static_cast<size_t>(parsed.ref.len)), + &offsets_into_ref)); + } + + new_parsed->ref.len = + static_cast<int>(url_string.length() - new_parsed->ref.begin); + AdjustForComponentTransform(original_offsets, original_ref_begin, + static_cast<size_t>(parsed.ref.end()), offsets_into_ref, + output_ref_begin, offsets_for_adjustment); + } + + // If we need to strip out http do it after the fact. This way we don't need + // to worry about how offset_for_adjustment is interpreted. + if (omit_http && StartsWith(url_string, ASCIIToUTF16(kHTTP), true)) { + const size_t kHTTPSize = arraysize(kHTTP) - 1; + url_string = url_string.substr(kHTTPSize); + if (offsets_for_adjustment && !offsets_for_adjustment->empty()) { + OffsetAdjuster offset_adjuster(offsets_for_adjustment); + offset_adjuster.Add(OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); + } + if (prefix_end) + *prefix_end -= kHTTPSize; + + // Adjust new_parsed. + DCHECK(new_parsed->scheme.is_valid()); + int delta = -(new_parsed->scheme.len + 3); // +3 for ://. + new_parsed->scheme.reset(); + AdjustComponents(delta, new_parsed); + } + + LimitOffsets(url_string, offsets_for_adjustment); + return url_string; } string16 FormatUrl(const GURL& url, @@ -1816,9 +1710,8 @@ string16 FormatUrl(const GURL& url, std::vector<size_t> offsets; if (offset_for_adjustment) offsets.push_back(*offset_for_adjustment); - string16 result = WideToUTF16Hack( - FormatUrlInternal(url, ASCIIToWide(languages), format_types, - unescape_rules, new_parsed, prefix_end, &offsets)); + string16 result = FormatUrlWithOffsets(url, languages, format_types, + unescape_rules, new_parsed, prefix_end, &offsets); if (offset_for_adjustment) *offset_for_adjustment = offsets[0]; return result; @@ -2274,12 +2167,4 @@ NetworkInterface::NetworkInterface(const std::string& name, NetworkInterface::~NetworkInterface() { } -ClampComponentOffset::ClampComponentOffset(size_t component_start) - : component_start(component_start) {} - -size_t ClampComponentOffset::operator()(size_t offset) { - return (offset >= component_start) ? - offset : std::wstring::npos; -} - } // namespace net diff --git a/net/base/net_util.h b/net/base/net_util.h index 7b3e7ec..3f36182 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -135,18 +135,13 @@ std::string GetHostOrSpecFromURL(const GURL& url); // Return the value of the HTTP response header with name 'name'. 'headers' // should be in the format that URLRequest::GetResponseHeaders() returns. // Returns the empty string if the header is not found. -std::wstring GetSpecificHeader(const std::wstring& headers, - const std::wstring& name); std::string GetSpecificHeader(const std::string& headers, const std::string& name); // Return the value of the HTTP response header field's parameter named // 'param_name'. Returns the empty string if the parameter is not found or is // improperly formatted. -std::wstring GetHeaderParamValue(const std::wstring& field, - const std::wstring& param_name, - QuoteRule::Type quote_rule); -std::string GetHeaderParamValue(const std::string& field, +std::string GetHeaderParamValue(const std::string& header, const std::string& param_name, QuoteRule::Type quote_rule); @@ -186,8 +181,7 @@ std::string GetFileNameFromCD(const std::string& header, // return the ASCII source so it is still usable. // // The input should be the canonicalized ASCII host name from GURL. This -// function does NOT accept UTF-8! Its length must also be given (this is -// designed to work on the substring of the host out of a URL spec). +// function does NOT accept UTF-8! // // |languages| is a comma separated list of ISO 639 language codes. It // is used to determine whether a hostname is 'comprehensible' to a user @@ -197,29 +191,13 @@ std::string GetFileNameFromCD(const std::string& header, // Latin letters in the ASCII range can be mixed with a limited set of // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). // When |languages| is empty, even that mixing is not allowed. -// -// (|offset[s]_for_adjustment|) specifies one or more offsets into the original -// |url|'s spec(); each offset will be adjusted to point at the same logical -// place in the result strings during decoding. If this isn't possible because -// an offset points past the end of |host| or into the middle of a punycode -// sequence, the offending offset will be set to std::wstring::npos. -// |offset[s]_for_adjustment| may be NULL. -std::wstring IDNToUnicode(const char* host, - size_t host_len, - const std::wstring& languages, - size_t* offset_for_adjustment); -std::wstring IDNToUnicodeWithOffsets( - const char* host, - size_t host_len, - const std::wstring& languages, - std::vector<size_t>* offsets_for_adjustment); +string16 IDNToUnicode(const std::string& host, + const std::string& languages); // Canonicalizes |host| and returns it. Also fills |host_info| with // IP address information. |host_info| must not be NULL. std::string CanonicalizeHost(const std::string& host, url_canon::CanonHostInfo* host_info); -std::string CanonicalizeHost(const std::wstring& host, - url_canon::CanonHostInfo* host_info); // Returns true if |host| is not an IP address and is compliant with a set of // rules based on RFC 1738 and tweaked to be compatible with the real world. @@ -294,28 +272,11 @@ bool IsPortAllowedByOverride(int port); // Set socket to non-blocking mode int SetNonBlocking(int fd); -// Appends the given part of the original URL to the output string formatted for -// the user. The given parsed structure will be updated. The host name formatter -// also takes the same accept languages component as ElideURL. |new_parsed| may -// be null. -// -// (|offset[s]_for_adjustment|) specifies one or more offsets into the original -// |url|'s spec(); each offset will be adjusted to point at the same logical -// place in the result strings after reformatting of the host. If this isn't -// possible because an offset points past the end of the host or into the middle -// of a multi-character sequence, the offending offset will be set to -// std::wstring::npos. |offset[s]_for_adjustment| may be NULL. +// Formats the host in |url| and appends it to |output|. The host formatter +// takes the same accept languages component as ElideURL(). void AppendFormattedHost(const GURL& url, - const std::wstring& languages, - std::wstring* output, - url_parse::Parsed* new_parsed, - size_t* offset_for_adjustment); -void AppendFormattedHostWithOffsets( - const GURL& url, - const std::wstring& languages, - std::wstring* output, - url_parse::Parsed* new_parsed, - std::vector<size_t>* offsets_for_adjustment); + const std::string& languages, + string16* output); // Creates a string representation of |url|. The IDN host name may be in Unicode // if |languages| accepts the Unicode representation. |format_type| is a bitmask @@ -481,16 +442,6 @@ typedef std::vector<NetworkInterface> NetworkInterfaceList; // Can be called only on a thread that allows IO. bool GetNetworkList(NetworkInterfaceList* networks); -// Private adjustment function called by std::transform which sets the offset -// to npos if the offset occurs at or before |component_start|, otherwise don't -// alter the offset. Exposed here for unit testing. -struct ClampComponentOffset { - explicit ClampComponentOffset(size_t component_start); - size_t operator()(size_t offset); - - const size_t component_start; -}; - } // namespace net #endif // NET_BASE_NET_UTIL_H_ diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index 4265866..2bac45b 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -31,14 +31,14 @@ struct FileCase { }; struct HeaderCase { - const wchar_t* header_name; - const wchar_t* expected; + const char* header_name; + const char* expected; }; struct HeaderParamCase { - const wchar_t* header_name; - const wchar_t* param_name; - const wchar_t* expected; + const char* header_name; + const char* param_name; + const char* expected; }; struct FileNameCDCase { @@ -47,12 +47,12 @@ struct FileNameCDCase { const wchar_t* expected; }; -const wchar_t* kLanguages[] = { - L"", L"en", L"zh-CN", L"ja", L"ko", - L"he", L"ar", L"ru", L"el", L"fr", - L"de", L"pt", L"sv", L"th", L"hi", - L"de,en", L"el,en", L"zh-TW,en", L"ko,ja", L"he,ru,en", - L"zh,ru,en" +const char* kLanguages[] = { + "", "en", "zh-CN", "ja", "ko", + "he", "ar", "ru", "el", "fr", + "de", "pt", "sv", "th", "hi", + "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en", + "zh,ru,en" }; struct IDNTestCase { @@ -451,13 +451,48 @@ const struct addrinfo* GetIPv6Address(const uint8* bytes, int port) { // A helper for IDN*{Fast,Slow}. // Append "::<language list>" to |expected| and |actual| to make it // easy to tell which sub-case fails without debugging. -void AppendLanguagesToOutputs(const wchar_t* languages, - std::wstring* expected, - std::wstring* actual) { - expected->append(L"::"); - expected->append(languages); - actual->append(L"::"); - actual->append(languages); +void AppendLanguagesToOutputs(const char* languages, + string16* expected, + string16* actual) { + string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages); + expected->append(to_append); + actual->append(to_append); +} + +// A pair of helpers for the FormatUrlWithOffsets() test. +void VerboseExpect(size_t expected, + size_t actual, + const std::string& original_url, + size_t position, + const string16& formatted_url) { + EXPECT_EQ(expected, actual) << "Original URL: " << original_url + << " (at char " << position << ")\nFormatted URL: " << formatted_url; +} + +void CheckAdjustedOffsets(const std::string& url_string, + const std::string& languages, + FormatUrlTypes format_types, + UnescapeRule::Type unescape_rules, + const AdjustOffsetCase* cases, + size_t num_cases, + const size_t* all_offsets) { + GURL url(url_string); + for (size_t i = 0; i < num_cases; ++i) { + size_t offset = cases[i].input_offset; + string16 formatted_url = FormatUrl(url, languages, format_types, + unescape_rules, NULL, NULL, &offset); + VerboseExpect(cases[i].output_offset, offset, url_string, i, formatted_url); + } + + size_t url_size = url_string.length(); + std::vector<size_t> offsets; + for (size_t i = 0; i < url_size + 1; ++i) + offsets.push_back(i); + string16 formatted_url = FormatUrlWithOffsets(url, languages, format_types, + unescape_rules, NULL, NULL, &offsets); + for (size_t i = 0; i < url_size; ++i) + VerboseExpect(all_offsets[i], offsets[i], url_string, i, formatted_url); + VerboseExpect(kNpos, offsets[url_size], url_string, url_size, formatted_url); } // Helper to strignize an IP number (used to define expectations). @@ -650,83 +685,83 @@ TEST(NetUtilTest, GetIdentityFromURL_UTF8) { } // Just a bunch of fake headers. -const wchar_t* google_headers = - L"HTTP/1.1 200 OK\n" - L"Content-TYPE: text/html; charset=utf-8\n" - L"Content-disposition: attachment; filename=\"download.pdf\"\n" - L"Content-Length: 378557\n" - L"X-Google-Google1: 314159265\n" - L"X-Google-Google2: aaaa2:7783,bbb21:9441\n" - L"X-Google-Google4: home\n" - L"Transfer-Encoding: chunked\n" - L"Set-Cookie: HEHE_AT=6666x66beef666x6-66xx6666x66; Path=/mail\n" - L"Set-Cookie: HEHE_HELP=owned:0;Path=/\n" - L"Set-Cookie: S=gmail=Xxx-beefbeefbeef_beefb:gmail_yj=beefbeef000beefbee" - L"fbee:gmproxy=bee-fbeefbe; Domain=.google.com; Path=/\n" - L"X-Google-Google2: /one/two/three/four/five/six/seven-height/nine:9411\n" - L"Server: GFE/1.3\n" - L"Transfer-Encoding: chunked\n" - L"Date: Mon, 13 Nov 2006 21:38:09 GMT\n" - L"Expires: Tue, 14 Nov 2006 19:23:58 GMT\n" - L"X-Malformed: bla; arg=test\"\n" - L"X-Malformed2: bla; arg=\n" - L"X-Test: bla; arg1=val1; arg2=val2"; +const char* google_headers = + "HTTP/1.1 200 OK\n" + "Content-TYPE: text/html; charset=utf-8\n" + "Content-disposition: attachment; filename=\"download.pdf\"\n" + "Content-Length: 378557\n" + "X-Google-Google1: 314159265\n" + "X-Google-Google2: aaaa2:7783,bbb21:9441\n" + "X-Google-Google4: home\n" + "Transfer-Encoding: chunked\n" + "Set-Cookie: HEHE_AT=6666x66beef666x6-66xx6666x66; Path=/mail\n" + "Set-Cookie: HEHE_HELP=owned:0;Path=/\n" + "Set-Cookie: S=gmail=Xxx-beefbeefbeef_beefb:gmail_yj=beefbeef000beefbee" + "fbee:gmproxy=bee-fbeefbe; Domain=.google.com; Path=/\n" + "X-Google-Google2: /one/two/three/four/five/six/seven-height/nine:9411\n" + "Server: GFE/1.3\n" + "Transfer-Encoding: chunked\n" + "Date: Mon, 13 Nov 2006 21:38:09 GMT\n" + "Expires: Tue, 14 Nov 2006 19:23:58 GMT\n" + "X-Malformed: bla; arg=test\"\n" + "X-Malformed2: bla; arg=\n" + "X-Test: bla; arg1=val1; arg2=val2"; TEST(NetUtilTest, GetSpecificHeader) { const HeaderCase tests[] = { - {L"content-type", L"text/html; charset=utf-8"}, - {L"CONTENT-LENGTH", L"378557"}, - {L"Date", L"Mon, 13 Nov 2006 21:38:09 GMT"}, - {L"Bad-Header", L""}, - {L"", L""}, + {"content-type", "text/html; charset=utf-8"}, + {"CONTENT-LENGTH", "378557"}, + {"Date", "Mon, 13 Nov 2006 21:38:09 GMT"}, + {"Bad-Header", ""}, + {"", ""}, }; // Test first with google_headers. for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) { - std::wstring result = GetSpecificHeader(google_headers, - tests[i].header_name); + std::string result = + GetSpecificHeader(google_headers, tests[i].header_name); EXPECT_EQ(result, tests[i].expected); } // Test again with empty headers. for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) { - std::wstring result = GetSpecificHeader(L"", tests[i].header_name); - EXPECT_EQ(result, std::wstring()); + std::string result = GetSpecificHeader(std::string(), tests[i].header_name); + EXPECT_EQ(result, std::string()); } } TEST(NetUtilTest, GetHeaderParamValue) { const HeaderParamCase tests[] = { - {L"Content-type", L"charset", L"utf-8"}, - {L"content-disposition", L"filename", L"download.pdf"}, - {L"Content-Type", L"badparam", L""}, - {L"X-Malformed", L"arg", L"test\""}, - {L"X-Malformed2", L"arg", L""}, - {L"X-Test", L"arg1", L"val1"}, - {L"X-Test", L"arg2", L"val2"}, - {L"Bad-Header", L"badparam", L""}, - {L"Bad-Header", L"", L""}, - {L"", L"badparam", L""}, - {L"", L"", L""}, + {"Content-type", "charset", "utf-8"}, + {"content-disposition", "filename", "download.pdf"}, + {"Content-Type", "badparam", ""}, + {"X-Malformed", "arg", "test\""}, + {"X-Malformed2", "arg", ""}, + {"X-Test", "arg1", "val1"}, + {"X-Test", "arg2", "val2"}, + {"Bad-Header", "badparam", ""}, + {"Bad-Header", "", ""}, + {"", "badparam", ""}, + {"", "", ""}, }; // TODO(mpcomplete): add tests for other formats of headers. for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) { - std::wstring header_value = + std::string header_value = GetSpecificHeader(google_headers, tests[i].header_name); - std::wstring result = + std::string result = GetHeaderParamValue(header_value, tests[i].param_name, QuoteRule::REMOVE_OUTER_QUOTES); EXPECT_EQ(result, tests[i].expected); } for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) { - std::wstring header_value = - GetSpecificHeader(L"", tests[i].header_name); - std::wstring result = + std::string header_value = + GetSpecificHeader(std::string(), tests[i].header_name); + std::string result = GetHeaderParamValue(header_value, tests[i].param_name, QuoteRule::REMOVE_OUTER_QUOTES); - EXPECT_EQ(result, std::wstring()); + EXPECT_EQ(result, std::string()); } } @@ -941,10 +976,10 @@ TEST(NetUtilTest, IDNToUnicodeFast) { // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow if (j == 3 || j == 17 || j == 18) continue; - std::wstring output(IDNToUnicode(idn_cases[i].input, - strlen(idn_cases[i].input), kLanguages[j], NULL)); - std::wstring expected(idn_cases[i].unicode_allowed[j] ? - idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input)); + string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); + string16 expected(idn_cases[i].unicode_allowed[j] ? + WideToUTF16(idn_cases[i].unicode_output) : + ASCIIToUTF16(idn_cases[i].input)); AppendLanguagesToOutputs(kLanguages[j], &expected, &output); EXPECT_EQ(expected, output); } @@ -957,57 +992,16 @@ TEST(NetUtilTest, IDNToUnicodeSlow) { // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast if (!(j == 3 || j == 17 || j == 18)) continue; - std::wstring output(IDNToUnicode(idn_cases[i].input, - strlen(idn_cases[i].input), kLanguages[j], NULL)); - std::wstring expected(idn_cases[i].unicode_allowed[j] ? - idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input)); + string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); + string16 expected(idn_cases[i].unicode_allowed[j] ? + WideToUTF16(idn_cases[i].unicode_output) : + ASCIIToUTF16(idn_cases[i].input)); AppendLanguagesToOutputs(kLanguages[j], &expected, &output); EXPECT_EQ(expected, output); } } } -TEST(NetUtilTest, IDNToUnicodeAdjustOffset) { - const AdjustOffsetCase adjust_cases[] = { - {0, 0}, - {2, 2}, - {4, 4}, - {5, 5}, - {6, string16::npos}, - {16, string16::npos}, - {17, 7}, - {18, 8}, - {19, string16::npos}, - {25, string16::npos}, - {34, 12}, - {35, 13}, - {38, 16}, - {39, string16::npos}, - {string16::npos, string16::npos}, - }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(adjust_cases); ++i) { - size_t offset = adjust_cases[i].input_offset; - // "test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test" - IDNToUnicode("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39, L"zh-CN", - &offset); - EXPECT_EQ(adjust_cases[i].output_offset, offset); - } - - std::vector<size_t> offsets; - for (size_t i = 0; i < 40; ++i) - offsets.push_back(i); - IDNToUnicodeWithOffsets("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39, - L"zh-CN", &offsets); - size_t expected[] = {0, 1, 2, 3, 4, 5, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 8, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15, - 16, kNpos}; - ASSERT_EQ(40U, arraysize(expected)); - for (size_t i = 0; i < 40; ++i) - EXPECT_EQ(expected[i], offsets[i]); -} - TEST(NetUtilTest, CompliantHost) { const CompliantHostCase compliant_host_cases[] = { {"", "", false}, @@ -1794,7 +1788,13 @@ TEST(NetUtilTest, FormatUrlParsed) { formatted.substr(parsed.path.begin, parsed.path.len)); } -TEST(NetUtilTest, FormatUrlAdjustOffset) { +TEST(NetUtilTest, FormatUrlWithOffsets) { + const AdjustOffsetCase null_cases[] = { + {0, string16::npos}, + }; + CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, + UnescapeRule::NORMAL, null_cases, arraysize(null_cases), NULL); + const AdjustOffsetCase basic_cases[] = { {0, 0}, {3, 3}, @@ -1809,62 +1809,56 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { {500000, string16::npos}, {string16::npos, string16::npos}, }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(basic_cases); ++i) { - size_t offset = basic_cases[i].input_offset; - FormatUrl(GURL("http://www.google.com/foo/"), "en", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, - NULL, NULL, &offset); - EXPECT_EQ(basic_cases[i].output_offset, offset); - } - - size_t url_size = 26; - std::vector<size_t> offsets; - for (size_t i = 0; i < url_size + 1; ++i) - offsets.push_back(i); - FormatUrlWithOffsets(GURL("http://www.google.com/foo/"), "en", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, - NULL, NULL, &offsets); - for (size_t i = 0; i < url_size; ++i) - EXPECT_EQ(i, offsets[i]); - EXPECT_EQ(kNpos, offsets[url_size]); + const size_t basic_offsets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}; + CheckAdjustedOffsets("http://www.google.com/foo/", "en", + kFormatUrlOmitNothing, UnescapeRule::NORMAL, basic_cases, + arraysize(basic_cases), basic_offsets); - const struct { - const char* input_url; - size_t input_offset; - size_t output_offset; - } omit_auth_cases[] = { - {"http://foo:bar@www.google.com/", 6, 6}, - {"http://foo:bar@www.google.com/", 7, string16::npos}, - {"http://foo:bar@www.google.com/", 8, string16::npos}, - {"http://foo:bar@www.google.com/", 10, string16::npos}, - {"http://foo:bar@www.google.com/", 11, string16::npos}, - {"http://foo:bar@www.google.com/", 14, string16::npos}, - {"http://foo:bar@www.google.com/", 15, 7}, - {"http://foo:bar@www.google.com/", 25, 17}, - {"http://foo@www.google.com/", 9, string16::npos}, - {"http://foo@www.google.com/", 11, 7}, + const AdjustOffsetCase omit_auth_cases_1[] = { + {6, 6}, + {7, string16::npos}, + {8, string16::npos}, + {10, string16::npos}, + {12, string16::npos}, + {14, string16::npos}, + {15, 7}, + {25, 17}, }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_auth_cases); ++i) { - size_t offset = omit_auth_cases[i].input_offset; - FormatUrl(GURL(omit_auth_cases[i].input_url), "en", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, - NULL, NULL, &offset); - EXPECT_EQ(omit_auth_cases[i].output_offset, offset); - } - - url_size = 30; - offsets.clear(); - for (size_t i = 0; i < url_size; ++i) - offsets.push_back(i); - FormatUrlWithOffsets(GURL("http://foo:bar@www.google.com/"), "en", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, - NULL, NULL, &offsets); - for (size_t i = 0; i < 7; ++i) - EXPECT_EQ(i, offsets[i]); - for (size_t i = 7; i < 15; ++i) - EXPECT_EQ(kNpos, offsets[i]); - for (size_t i = 16; i < url_size; ++i) - EXPECT_EQ(i - 8 , offsets[i]); + const size_t omit_auth_offsets_1[] = {0, 1, 2, 3, 4, 5, 6, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21}; + CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", + kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, omit_auth_cases_1, + arraysize(omit_auth_cases_1), omit_auth_offsets_1); + + const AdjustOffsetCase omit_auth_cases_2[] = { + {9, string16::npos}, + {11, 7}, + }; + const size_t omit_auth_offsets_2[] = {0, 1, 2, 3, 4, 5, 6, kNpos, kNpos, + kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}; + CheckAdjustedOffsets("http://foo@www.google.com/", "en", + kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, omit_auth_cases_2, + arraysize(omit_auth_cases_2), omit_auth_offsets_2); + + // "http://foo\x30B0:\x30B0bar@www.google.com" + const AdjustOffsetCase dont_omit_auth_cases[] = { + {0, 0}, + /*{3, string16::npos}, + {7, 0}, + {11, 4}, + {12, string16::npos}, + {20, 5}, + {24, 9},*/ + }; + const size_t dont_omit_auth_offsets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 11, 12, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", + kFormatUrlOmitNothing, UnescapeRule::NORMAL, dont_omit_auth_cases, + arraysize(dont_omit_auth_cases), dont_omit_auth_offsets); const AdjustOffsetCase view_source_cases[] = { {0, 0}, @@ -1879,59 +1873,57 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { {26, 22}, {string16::npos, string16::npos}, }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(view_source_cases); ++i) { - size_t offset = view_source_cases[i].input_offset; - FormatUrl(GURL("view-source:http://foo@www.google.com/"), "en", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, - NULL, NULL, &offset); - EXPECT_EQ(view_source_cases[i].output_offset, offset); - } - - url_size = 38; - offsets.clear(); - for (size_t i = 0; i < url_size; ++i) - offsets.push_back(i); - FormatUrlWithOffsets(GURL("view-source:http://foo@www.google.com/"), "en", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, - NULL, NULL, &offsets); - size_t expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, kNpos, kNpos, kNpos, kNpos, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; - ASSERT_EQ(url_size, arraysize(expected)); - for (size_t i = 0; i < url_size; ++i) - EXPECT_EQ(expected[i], offsets[i]); - - const AdjustOffsetCase idn_hostname_cases[] = { + const size_t view_source_offsets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, kNpos, kNpos, kNpos, kNpos, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; + CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", + kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, view_source_cases, + arraysize(view_source_cases), view_source_offsets); + + // "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/" + const AdjustOffsetCase idn_hostname_cases_1[] = { {8, string16::npos}, {16, string16::npos}, {24, string16::npos}, {25, 12}, {30, 17}, }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_hostname_cases); ++i) { - size_t offset = idn_hostname_cases[i].input_offset; - // "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/" - FormatUrl(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, - NULL, NULL, &offset); - EXPECT_EQ(idn_hostname_cases[i].output_offset, offset); - } - - url_size = 33; - offsets.clear(); - for (size_t i = 0; i < url_size; ++i) - offsets.push_back(i); - FormatUrlWithOffsets(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, - NULL, NULL, &offsets); - size_t expected_1[] = {0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15, 16, - 17, 18, 19}; - ASSERT_EQ(url_size, arraysize(expected_1)); - for (size_t i = 0; i < url_size; ++i) - EXPECT_EQ(expected_1[i], offsets[i]); - + const size_t idn_hostname_offsets_1[] = {0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15, 16, 17, 18, 19}; + CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", + kFormatUrlOmitNothing, UnescapeRule::NORMAL, idn_hostname_cases_1, + arraysize(idn_hostname_cases_1), idn_hostname_offsets_1); + + // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/" + const AdjustOffsetCase idn_hostname_cases_2[] = { + {7, 7}, + {9, 9}, + {11, 11}, + {12, 12}, + {13, string16::npos}, + {23, string16::npos}, + {24, 14}, + {25, 15}, + {26, string16::npos}, + {32, string16::npos}, + {41, 19}, + {42, 20}, + {45, 23}, + {46, 24}, + {47, string16::npos}, + {string16::npos, string16::npos}, + }; + const size_t idn_hostname_offsets_2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, 14, 15, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 19, 20, 21, 22, 23, 24}; + CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", + "zh-CN", kFormatUrlOmitNothing, UnescapeRule::NORMAL, + idn_hostname_cases_2, arraysize(idn_hostname_cases_2), + idn_hostname_offsets_2); + + // "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB" const AdjustOffsetCase unescape_cases[] = { {25, 25}, {26, string16::npos}, @@ -1944,35 +1936,18 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { {67, string16::npos}, {68, string16::npos}, }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(unescape_cases); ++i) { - size_t offset = unescape_cases[i].input_offset; - // "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB" - FormatUrl(GURL( - "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"), - "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL, - NULL, &offset); - EXPECT_EQ(unescape_cases[i].output_offset, offset); - } - - url_size = 68; - offsets.clear(); - for (size_t i = 0; i < url_size; ++i) - offsets.push_back(i); - FormatUrlWithOffsets(GURL( - "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"), - "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL, NULL, - &offsets); - size_t expected_2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, kNpos, kNpos, - 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, 33, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, kNpos, kNpos}; - ASSERT_EQ(url_size, arraysize(expected_2)); - for (size_t i = 0; i < url_size; ++i) - EXPECT_EQ(expected_2[i], offsets[i]); + const size_t unescape_offsets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, + 28, 29, 30, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 31, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 32, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 33, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos}; + CheckAdjustedOffsets( + "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", + "en", kFormatUrlOmitNothing, UnescapeRule::SPACES, unescape_cases, + arraysize(unescape_cases), unescape_offsets); + // "http://www.google.com/foo.html#\x30B0\x30B0z" const AdjustOffsetCase ref_cases[] = { {30, 30}, {31, 31}, @@ -1982,31 +1957,13 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { {37, 33}, {38, string16::npos}, }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(ref_cases); ++i) { - size_t offset = ref_cases[i].input_offset; - // "http://www.google.com/foo.html#\x30B0\x30B0z" - FormatUrl(GURL( - "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL, - &offset); - EXPECT_EQ(ref_cases[i].output_offset, offset); - } - - url_size = 38; - offsets.clear(); - for (size_t i = 0; i < url_size; ++i) - offsets.push_back(i); - // "http://www.google.com/foo.html#\x30B0\x30B0z" - FormatUrlWithOffsets(GURL( - "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en", - kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL, - &offsets); - size_t expected_3[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, kNpos, kNpos, 32, kNpos, kNpos, 33}; - ASSERT_EQ(url_size, arraysize(expected_3)); - for (size_t i = 0; i < url_size; ++i) - EXPECT_EQ(expected_3[i], offsets[i]); + const size_t ref_offsets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + kNpos, kNpos, 32, kNpos, kNpos, 33}; + CheckAdjustedOffsets( + "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", + kFormatUrlOmitNothing, UnescapeRule::NORMAL, ref_cases, + arraysize(ref_cases), ref_offsets); const AdjustOffsetCase omit_http_cases[] = { {0, string16::npos}, @@ -2014,48 +1971,23 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { {7, 0}, {8, 1}, }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_http_cases); ++i) { - size_t offset = omit_http_cases[i].input_offset; - FormatUrl(GURL("http://www.google.com"), "en", - kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offset); - EXPECT_EQ(omit_http_cases[i].output_offset, offset); - } - - url_size = 23; - offsets.clear(); - for (size_t i = 0; i < url_size; ++i) - offsets.push_back(i); - FormatUrlWithOffsets(GURL("http://www.google.com"), "en", - kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets); - size_t expected_4[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, kNpos}; - ASSERT_EQ(url_size, arraysize(expected_4)); - for (size_t i = 0; i < url_size; ++i) - EXPECT_EQ(expected_4[i], offsets[i]); + const size_t omit_http_offsets[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}; + CheckAdjustedOffsets("http://www.google.com/", "en", + kFormatUrlOmitHTTP, UnescapeRule::NORMAL, omit_http_cases, + arraysize(omit_http_cases), omit_http_offsets); - const AdjustOffsetCase omit_http_start_with_ftp[] = { + const AdjustOffsetCase omit_http_start_with_ftp_cases[] = { {0, 0}, {3, 3}, {8, 8}, }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_http_start_with_ftp); ++i) { - size_t offset = omit_http_start_with_ftp[i].input_offset; - FormatUrl(GURL("http://ftp.google.com"), "en", - kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offset); - EXPECT_EQ(omit_http_start_with_ftp[i].output_offset, offset); - } - - url_size = 23; - offsets.clear(); - for (size_t i = 0; i < url_size; ++i) - offsets.push_back(i); - FormatUrlWithOffsets(GURL("http://ftp.google.com"), "en", - kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets); - size_t expected_5[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, kNpos}; - ASSERT_EQ(url_size, arraysize(expected_5)); - for (size_t i = 0; i < url_size; ++i) - EXPECT_EQ(expected_5[i], offsets[i]); + const size_t omit_http_start_with_ftp_offsets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}; + CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, + UnescapeRule::NORMAL, omit_http_start_with_ftp_cases, + arraysize(omit_http_start_with_ftp_cases), + omit_http_start_with_ftp_offsets); const AdjustOffsetCase omit_all_cases[] = { {12, 0}, @@ -2063,25 +1995,11 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) { {0, string16::npos}, {3, string16::npos}, }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_all_cases); ++i) { - size_t offset = omit_all_cases[i].input_offset; - FormatUrl(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll, - UnescapeRule::NORMAL, NULL, NULL, &offset); - EXPECT_EQ(omit_all_cases[i].output_offset, offset); - } - - url_size = 21; - offsets.clear(); - for (size_t i = 0; i < url_size; ++i) - offsets.push_back(i); - FormatUrlWithOffsets(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll, - UnescapeRule::NORMAL, NULL, NULL, &offsets); - size_t expected_6[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, - kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, - kNpos}; - ASSERT_EQ(url_size, arraysize(expected_6)); - for (size_t i = 0; i < url_size; ++i) - EXPECT_EQ(expected_6[i], offsets[i]); + const size_t omit_all_offsets[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, kNpos}; + CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, + UnescapeRule::NORMAL, omit_all_cases, + arraysize(omit_all_cases), omit_all_offsets); } TEST(NetUtilTest, SimplifyUrlForRequest) { @@ -2367,20 +2285,4 @@ TEST(NetUtilTest, GetNetworkList) { } } -TEST(NetUtilTest, AdjustComponentOffset) { - std::vector<size_t> old_offsets; - for (size_t i = 0; i < 10; ++i) - old_offsets.push_back(i); - std::vector<size_t> new_offsets; - std::transform(old_offsets.begin(), - old_offsets.end(), - std::back_inserter(new_offsets), - ClampComponentOffset(5)); - size_t expected_1[] = {kNpos, kNpos, kNpos, kNpos, kNpos, 5, 6, 7, 8, 9}; - EXPECT_EQ(new_offsets.size(), arraysize(expected_1)); - EXPECT_EQ(new_offsets.size(), old_offsets.size()); - for (size_t i = 0; i < arraysize(expected_1); ++i) - EXPECT_EQ(expected_1[i], new_offsets[i]); -} - } // namespace net diff --git a/net/base/registry_controlled_domain.cc b/net/base/registry_controlled_domain.cc index e8bc423..2d1d4f2 100644 --- a/net/base/registry_controlled_domain.cc +++ b/net/base/registry_controlled_domain.cc @@ -42,6 +42,7 @@ #include "base/logging.h" #include "base/memory/singleton.h" #include "base/string_util.h" +#include "base/utf_string_conversions.h" #include "googleurl/src/gurl.h" #include "googleurl/src/url_parse.h" #include "net/base/net_module.h" @@ -82,16 +83,6 @@ std::string RegistryControlledDomainService::GetDomainAndRegistry( } // static -std::string RegistryControlledDomainService::GetDomainAndRegistry( - const std::wstring& host) { - url_canon::CanonHostInfo host_info; - const std::string canon_host(CanonicalizeHost(host, &host_info)); - if (canon_host.empty() || host_info.IsIPAddress()) - return std::string(); - return GetDomainAndRegistryImpl(canon_host); -} - -// static bool RegistryControlledDomainService::SameDomainOrHost(const GURL& gurl1, const GURL& gurl2) { // See if both URLs have a known domain + registry, and those values are the @@ -143,20 +134,6 @@ size_t RegistryControlledDomainService::GetRegistryLength( } // static -size_t RegistryControlledDomainService::GetRegistryLength( - const std::wstring& host, - bool allow_unknown_registries) { - url_canon::CanonHostInfo host_info; - const std::string canon_host(CanonicalizeHost(host, &host_info)); - if (canon_host.empty()) - return std::string::npos; - if (host_info.IsIPAddress()) - return 0; - return GetInstance()->GetRegistryLengthImpl(canon_host, - allow_unknown_registries); -} - -// static RegistryControlledDomainService* RegistryControlledDomainService::GetInstance() { if (test_instance_) diff --git a/net/base/registry_controlled_domain.h b/net/base/registry_controlled_domain.h index 90a1b8f..57a4a06 100644 --- a/net/base/registry_controlled_domain.h +++ b/net/base/registry_controlled_domain.h @@ -155,7 +155,6 @@ class RegistryControlledDomainService { // Like the GURL version, but takes a host (which is canonicalized internally) // instead of a full GURL. static std::string GetDomainAndRegistry(const std::string& host); - static std::string GetDomainAndRegistry(const std::wstring& host); // This convenience function returns true if the two GURLs both have hosts // and one of the following is true: @@ -195,8 +194,6 @@ class RegistryControlledDomainService { // instead of a full GURL. static size_t GetRegistryLength(const std::string& host, bool allow_unknown_registries); - static size_t GetRegistryLength(const std::wstring& host, - bool allow_unknown_registries); // Returns the singleton instance, after attempting to initialize it. // NOTE that if the effective-TLD data resource can't be found, the instance diff --git a/net/base/registry_controlled_domain_unittest.cc b/net/base/registry_controlled_domain_unittest.cc index 6f2041f..dcaac5c 100644 --- a/net/base/registry_controlled_domain_unittest.cc +++ b/net/base/registry_controlled_domain_unittest.cc @@ -55,7 +55,7 @@ std::string GetDomainFromURL(const std::string& url) { return TestRegistryControlledDomainService::GetDomainAndRegistry(GURL(url)); } -std::string GetDomainFromHost(const std::wstring& host) { +std::string GetDomainFromHost(const std::string& host) { return TestRegistryControlledDomainService::GetDomainAndRegistry(host); } @@ -65,7 +65,7 @@ size_t GetRegistryLengthFromURL(const std::string& url, allow_unknown_registries); } -size_t GetRegistryLengthFromHost(const std::wstring& host, +size_t GetRegistryLengthFromHost(const std::string& host, bool allow_unknown_registries) { return TestRegistryControlledDomainService::GetRegistryLength(host, allow_unknown_registries); @@ -82,23 +82,23 @@ TEST_F(RegistryControlledDomainTest, TestGetDomainAndRegistry) { Perfect_Hash_Test1::FindDomain); // Test GURL version of GetDomainAndRegistry(). - EXPECT_EQ("baz.jp", GetDomainFromURL("http://a.baz.jp/file.html")); // 1 - EXPECT_EQ("baz.jp.", GetDomainFromURL("http://a.baz.jp./file.html")); // 1 - EXPECT_EQ("", GetDomainFromURL("http://ac.jp")); // 2 - EXPECT_EQ("", GetDomainFromURL("http://a.bar.jp")); // 3 - EXPECT_EQ("", GetDomainFromURL("http://bar.jp")); // 3 - EXPECT_EQ("", GetDomainFromURL("http://baz.bar.jp")); // 3 4 + EXPECT_EQ("baz.jp", GetDomainFromURL("http://a.baz.jp/file.html")); // 1 + EXPECT_EQ("baz.jp.", GetDomainFromURL("http://a.baz.jp./file.html")); // 1 + EXPECT_EQ("", GetDomainFromURL("http://ac.jp")); // 2 + EXPECT_EQ("", GetDomainFromURL("http://a.bar.jp")); // 3 + EXPECT_EQ("", GetDomainFromURL("http://bar.jp")); // 3 + EXPECT_EQ("", GetDomainFromURL("http://baz.bar.jp")); // 3 4 EXPECT_EQ("a.b.baz.bar.jp", GetDomainFromURL("http://a.b.baz.bar.jp")); - // 4 - EXPECT_EQ("pref.bar.jp", GetDomainFromURL("http://baz.pref.bar.jp")); // 5 + // 4 + EXPECT_EQ("pref.bar.jp", GetDomainFromURL("http://baz.pref.bar.jp")); // 5 EXPECT_EQ("b.bar.baz.com.", GetDomainFromURL("http://a.b.bar.baz.com.")); - // 6 - EXPECT_EQ("a.d.c", GetDomainFromURL("http://a.d.c")); // 7 - EXPECT_EQ("a.d.c", GetDomainFromURL("http://.a.d.c")); // 7 - EXPECT_EQ("a.d.c", GetDomainFromURL("http://..a.d.c")); // 7 - EXPECT_EQ("b.c", GetDomainFromURL("http://a.b.c")); // 7 8 - EXPECT_EQ("baz.com", GetDomainFromURL("http://baz.com")); // none - EXPECT_EQ("baz.com.", GetDomainFromURL("http://baz.com.")); // none + // 6 + EXPECT_EQ("a.d.c", GetDomainFromURL("http://a.d.c")); // 7 + EXPECT_EQ("a.d.c", GetDomainFromURL("http://.a.d.c")); // 7 + EXPECT_EQ("a.d.c", GetDomainFromURL("http://..a.d.c")); // 7 + EXPECT_EQ("b.c", GetDomainFromURL("http://a.b.c")); // 7 8 + EXPECT_EQ("baz.com", GetDomainFromURL("http://baz.com")); // none + EXPECT_EQ("baz.com.", GetDomainFromURL("http://baz.com.")); // none EXPECT_EQ("", GetDomainFromURL("")); EXPECT_EQ("", GetDomainFromURL("http://")); @@ -113,28 +113,28 @@ TEST_F(RegistryControlledDomainTest, TestGetDomainAndRegistry) { // Test std::wstring version of GetDomainAndRegistry(). Uses the same // underpinnings as the GURL version, so this is really more of a check of // CanonicalizeHost(). - EXPECT_EQ("baz.jp", GetDomainFromHost(L"a.baz.jp")); // 1 - EXPECT_EQ("baz.jp.", GetDomainFromHost(L"a.baz.jp.")); // 1 - EXPECT_EQ("", GetDomainFromHost(L"ac.jp")); // 2 - EXPECT_EQ("", GetDomainFromHost(L"a.bar.jp")); // 3 - EXPECT_EQ("", GetDomainFromHost(L"bar.jp")); // 3 - EXPECT_EQ("", GetDomainFromHost(L"baz.bar.jp")); // 3 4 - EXPECT_EQ("a.b.baz.bar.jp", GetDomainFromHost(L"a.b.baz.bar.jp")); // 3 4 - EXPECT_EQ("pref.bar.jp", GetDomainFromHost(L"baz.pref.bar.jp")); // 5 - EXPECT_EQ("b.bar.baz.com.", GetDomainFromHost(L"a.b.bar.baz.com.")); // 6 - EXPECT_EQ("a.d.c", GetDomainFromHost(L"a.d.c")); // 7 - EXPECT_EQ("a.d.c", GetDomainFromHost(L".a.d.c")); // 7 - EXPECT_EQ("a.d.c", GetDomainFromHost(L"..a.d.c")); // 7 - EXPECT_EQ("b.c", GetDomainFromHost(L"a.b.c")); // 7 8 - EXPECT_EQ("baz.com", GetDomainFromHost(L"baz.com")); // none - EXPECT_EQ("baz.com.", GetDomainFromHost(L"baz.com.")); // none + EXPECT_EQ("baz.jp", GetDomainFromHost("a.baz.jp")); // 1 + EXPECT_EQ("baz.jp.", GetDomainFromHost("a.baz.jp.")); // 1 + EXPECT_EQ("", GetDomainFromHost("ac.jp")); // 2 + EXPECT_EQ("", GetDomainFromHost("a.bar.jp")); // 3 + EXPECT_EQ("", GetDomainFromHost("bar.jp")); // 3 + EXPECT_EQ("", GetDomainFromHost("baz.bar.jp")); // 3 4 + EXPECT_EQ("a.b.baz.bar.jp", GetDomainFromHost("a.b.baz.bar.jp")); // 3 4 + EXPECT_EQ("pref.bar.jp", GetDomainFromHost("baz.pref.bar.jp")); // 5 + EXPECT_EQ("b.bar.baz.com.", GetDomainFromHost("a.b.bar.baz.com.")); // 6 + EXPECT_EQ("a.d.c", GetDomainFromHost("a.d.c")); // 7 + EXPECT_EQ("a.d.c", GetDomainFromHost(".a.d.c")); // 7 + EXPECT_EQ("a.d.c", GetDomainFromHost("..a.d.c")); // 7 + EXPECT_EQ("b.c", GetDomainFromHost("a.b.c")); // 7 8 + EXPECT_EQ("baz.com", GetDomainFromHost("baz.com")); // none + EXPECT_EQ("baz.com.", GetDomainFromHost("baz.com.")); // none - EXPECT_EQ("", GetDomainFromHost(L"")); - EXPECT_EQ("", GetDomainFromHost(L"foo.com..")); - EXPECT_EQ("", GetDomainFromHost(L"...")); - EXPECT_EQ("", GetDomainFromHost(L"192.168.0.1")); - EXPECT_EQ("", GetDomainFromHost(L"localhost.")); - EXPECT_EQ("", GetDomainFromHost(L".localhost.")); + EXPECT_EQ("", GetDomainFromHost("")); + EXPECT_EQ("", GetDomainFromHost("foo.com..")); + EXPECT_EQ("", GetDomainFromHost("...")); + EXPECT_EQ("", GetDomainFromHost("192.168.0.1")); + EXPECT_EQ("", GetDomainFromHost("localhost.")); + EXPECT_EQ("", GetDomainFromHost(".localhost.")); } TEST_F(RegistryControlledDomainTest, TestGetRegistryLength) { @@ -181,33 +181,32 @@ TEST_F(RegistryControlledDomainTest, TestGetRegistryLength) { // Test std::wstring version of GetRegistryLength(). Uses the same // underpinnings as the GURL version, so this is really more of a check of // CanonicalizeHost(). - EXPECT_EQ(2U, GetRegistryLengthFromHost(L"a.baz.jp", false)); // 1 - EXPECT_EQ(3U, GetRegistryLengthFromHost(L"a.baz.jp.", false)); // 1 - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"ac.jp", false)); // 2 - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"a.bar.jp", false)); // 3 - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"bar.jp", false)); // 3 - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"baz.bar.jp", false)); // 3 4 - EXPECT_EQ(12U, GetRegistryLengthFromHost(L"a.b.baz.bar.jp", false)); // 4 - EXPECT_EQ(6U, GetRegistryLengthFromHost(L"baz.pref.bar.jp", false)); // 5 - EXPECT_EQ(11U, GetRegistryLengthFromHost(L"a.b.bar.baz.com", false)); - // 6 - EXPECT_EQ(3U, GetRegistryLengthFromHost(L"a.d.c", false)); // 7 - EXPECT_EQ(3U, GetRegistryLengthFromHost(L".a.d.c", false)); // 7 - EXPECT_EQ(3U, GetRegistryLengthFromHost(L"..a.d.c", false)); // 7 - EXPECT_EQ(1U, GetRegistryLengthFromHost(L"a.b.c", false)); // 7 8 - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"baz.com", false)); // none - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"baz.com.", false)); // none - EXPECT_EQ(3U, GetRegistryLengthFromHost(L"baz.com", true)); // none - EXPECT_EQ(4U, GetRegistryLengthFromHost(L"baz.com.", true)); // none + EXPECT_EQ(2U, GetRegistryLengthFromHost("a.baz.jp", false)); // 1 + EXPECT_EQ(3U, GetRegistryLengthFromHost("a.baz.jp.", false)); // 1 + EXPECT_EQ(0U, GetRegistryLengthFromHost("ac.jp", false)); // 2 + EXPECT_EQ(0U, GetRegistryLengthFromHost("a.bar.jp", false)); // 3 + EXPECT_EQ(0U, GetRegistryLengthFromHost("bar.jp", false)); // 3 + EXPECT_EQ(0U, GetRegistryLengthFromHost("baz.bar.jp", false)); // 3 4 + EXPECT_EQ(12U, GetRegistryLengthFromHost("a.b.baz.bar.jp", false)); // 4 + EXPECT_EQ(6U, GetRegistryLengthFromHost("baz.pref.bar.jp", false)); // 5 + EXPECT_EQ(11U, GetRegistryLengthFromHost("a.b.bar.baz.com", false)); // 6 + EXPECT_EQ(3U, GetRegistryLengthFromHost("a.d.c", false)); // 7 + EXPECT_EQ(3U, GetRegistryLengthFromHost(".a.d.c", false)); // 7 + EXPECT_EQ(3U, GetRegistryLengthFromHost("..a.d.c", false)); // 7 + EXPECT_EQ(1U, GetRegistryLengthFromHost("a.b.c", false)); // 7 8 + EXPECT_EQ(0U, GetRegistryLengthFromHost("baz.com", false)); // none + EXPECT_EQ(0U, GetRegistryLengthFromHost("baz.com.", false)); // none + EXPECT_EQ(3U, GetRegistryLengthFromHost("baz.com", true)); // none + EXPECT_EQ(4U, GetRegistryLengthFromHost("baz.com.", true)); // none - EXPECT_EQ(std::string::npos, GetRegistryLengthFromHost(L"", false)); - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"foo.com..", false)); - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"..", false)); - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"192.168.0.1", false)); - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"localhost", false)); - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"localhost", true)); - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"localhost.", false)); - EXPECT_EQ(0U, GetRegistryLengthFromHost(L"localhost.", true)); + EXPECT_EQ(std::string::npos, GetRegistryLengthFromHost("", false)); + EXPECT_EQ(0U, GetRegistryLengthFromHost("foo.com..", false)); + EXPECT_EQ(0U, GetRegistryLengthFromHost("..", false)); + EXPECT_EQ(0U, GetRegistryLengthFromHost("192.168.0.1", false)); + EXPECT_EQ(0U, GetRegistryLengthFromHost("localhost", false)); + EXPECT_EQ(0U, GetRegistryLengthFromHost("localhost", true)); + EXPECT_EQ(0U, GetRegistryLengthFromHost("localhost.", false)); + EXPECT_EQ(0U, GetRegistryLengthFromHost("localhost.", true)); } TEST_F(RegistryControlledDomainTest, TestSameDomainOrHost) { @@ -215,29 +214,29 @@ TEST_F(RegistryControlledDomainTest, TestSameDomainOrHost) { Perfect_Hash_Test2::FindDomain); EXPECT_TRUE(CompareDomains("http://a.b.bar.jp/file.html", - "http://a.b.bar.jp/file.html")); // b.bar.jp + "http://a.b.bar.jp/file.html")); // b.bar.jp EXPECT_TRUE(CompareDomains("http://a.b.bar.jp/file.html", - "http://b.b.bar.jp/file.html")); // b.bar.jp - EXPECT_FALSE(CompareDomains("http://a.foo.jp/file.html", // foo.jp - "http://a.not.jp/file.html")); // not.jp - EXPECT_FALSE(CompareDomains("http://a.foo.jp/file.html", // foo.jp - "http://a.foo.jp./file.html")); // foo.jp. - EXPECT_FALSE(CompareDomains("http://a.com/file.html", // a.com - "http://b.com/file.html")); // b.com + "http://b.b.bar.jp/file.html")); // b.bar.jp + EXPECT_FALSE(CompareDomains("http://a.foo.jp/file.html", // foo.jp + "http://a.not.jp/file.html")); // not.jp + EXPECT_FALSE(CompareDomains("http://a.foo.jp/file.html", // foo.jp + "http://a.foo.jp./file.html")); // foo.jp. + EXPECT_FALSE(CompareDomains("http://a.com/file.html", // a.com + "http://b.com/file.html")); // b.com EXPECT_TRUE(CompareDomains("http://a.x.com/file.html", - "http://b.x.com/file.html")); // x.com + "http://b.x.com/file.html")); // x.com EXPECT_TRUE(CompareDomains("http://a.x.com/file.html", - "http://.x.com/file.html")); // x.com + "http://.x.com/file.html")); // x.com EXPECT_TRUE(CompareDomains("http://a.x.com/file.html", - "http://..b.x.com/file.html")); // x.com + "http://..b.x.com/file.html")); // x.com EXPECT_TRUE(CompareDomains("http://intranet/file.html", - "http://intranet/file.html")); // intranet + "http://intranet/file.html")); // intranet EXPECT_TRUE(CompareDomains("http://127.0.0.1/file.html", - "http://127.0.0.1/file.html")); // 127.0.0.1 - EXPECT_FALSE(CompareDomains("http://192.168.0.1/file.html", // 192.168.0.1 - "http://127.0.0.1/file.html")); // 127.0.0.1 + "http://127.0.0.1/file.html")); // 127.0.0.1 + EXPECT_FALSE(CompareDomains("http://192.168.0.1/file.html", // 192.168.0.1 + "http://127.0.0.1/file.html")); // 127.0.0.1 EXPECT_FALSE(CompareDomains("file:///C:/file.html", - "file:///C:/file.html")); // no host + "file:///C:/file.html")); // no host } TEST_F(RegistryControlledDomainTest, TestDefaultData) { diff --git a/net/base/sdch_manager.cc b/net/base/sdch_manager.cc index 0c16455..6887fd0 100644 --- a/net/base/sdch_manager.cc +++ b/net/base/sdch_manager.cc @@ -106,8 +106,7 @@ bool SdchManager::Dictionary::CanSet(const std::string& domain, SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER); return false; // Domain is required. } - if (RegistryControlledDomainService::GetDomainAndRegistry(domain).size() - == 0) { + if (RegistryControlledDomainService::GetDomainAndRegistry(domain).empty()) { SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN); return false; // domain was a TLD. } |