diff options
Diffstat (limited to 'url/url_canon_unittest.cc')
-rw-r--r-- | url/url_canon_unittest.cc | 119 |
1 files changed, 13 insertions, 106 deletions
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc index 55f9608..9a766e3 100644 --- a/url/url_canon_unittest.cc +++ b/url/url_canon_unittest.cc @@ -4,10 +4,9 @@ #include <errno.h> +#include "base/macros.h" #include "testing/gtest/include/gtest/gtest.h" -#include "third_party/icu/source/common/unicode/ucnv.h" #include "url/url_canon.h" -#include "url/url_canon_icu.h" #include "url/url_canon_internal.h" #include "url/url_canon_stdstring.h" #include "url/url_parse.h" @@ -84,26 +83,6 @@ struct ReplaceCase { const char* expected; }; -// Wrapper around a UConverter object that managers creation and destruction. -class UConvScoper { - public: - explicit UConvScoper(const char* charset_name) { - UErrorCode err = U_ZERO_ERROR; - converter_ = ucnv_open(charset_name, &err); - } - - ~UConvScoper() { - if (converter_) - ucnv_close(converter_); - } - - // Returns the converter object, may be NULL. - UConverter* converter() const { return converter_; } - - private: - UConverter* converter_; -}; - // Magic string used in the replacements code that tells SetupReplComp to // call the clear function. const char kDeleteComp[] = "|"; @@ -244,58 +223,6 @@ TEST(URLCanonTest, UTF) { } } -TEST(URLCanonTest, ICUCharsetConverter) { - struct ICUCase { - const wchar_t* input; - const char* encoding; - const char* expected; - } icu_cases[] = { - // UTF-8. - {L"Hello, world", "utf-8", "Hello, world"}, - {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"}, - // Non-BMP UTF-8. - {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"}, - // Big5 - {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"}, - // Unrepresentable character in the destination set. - {L"hello\x4f60\x06de\x597dworld", "big5", "hello\xa7\x41%26%231758%3B\xa6\x6eworld"}, - }; - - for (size_t i = 0; i < ARRAYSIZE(icu_cases); i++) { - UConvScoper conv(icu_cases[i].encoding); - ASSERT_TRUE(conv.converter() != NULL); - ICUCharsetConverter converter(conv.converter()); - - std::string str; - StdStringCanonOutput output(&str); - - base::string16 input_str(WStringToUTF16(icu_cases[i].input)); - int input_len = static_cast<int>(input_str.length()); - converter.ConvertFromUTF16(input_str.c_str(), input_len, &output); - output.Complete(); - - EXPECT_STREQ(icu_cases[i].expected, str.c_str()); - } - - // Test string sizes around the resize boundary for the output to make sure - // the converter resizes as needed. - const int static_size = 16; - UConvScoper conv("utf-8"); - ASSERT_TRUE(conv.converter()); - ICUCharsetConverter converter(conv.converter()); - for (int i = static_size - 2; i <= static_size + 2; i++) { - // Make a string with the appropriate length. - base::string16 input; - for (int ch = 0; ch < i; ch++) - input.push_back('a'); - - RawCanonOutput<static_size> output; - converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()), - &output); - EXPECT_EQ(input.length(), static_cast<size_t>(output.length())); - } -} - TEST(URLCanonTest, Scheme) { // Here, we're mostly testing that unusual characters are handled properly. // The canonicalizer doesn't do any parsing or whitespace detection. It will @@ -1198,57 +1125,38 @@ TEST(URLCanonTest, Query) { struct QueryCase { const char* input8; const wchar_t* input16; - const char* encoding; const char* expected; } query_cases[] = { - // Regular ASCII case in some different encodings. - {"foo=bar", L"foo=bar", NULL, "?foo=bar"}, - {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"}, - {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"}, - {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"}, + // Regular ASCII case. + {"foo=bar", L"foo=bar", "?foo=bar"}, // Allow question marks in the query without escaping - {"as?df", L"as?df", NULL, "?as?df"}, + {"as?df", L"as?df", "?as?df"}, // Always escape '#' since it would mark the ref. - {"as#df", L"as#df", NULL, "?as%23df"}, + {"as#df", L"as#df", "?as%23df"}, // Escape some questionable 8-bit characters, but never unescape. - {"\x02hello\x7f bye", L"\x02hello\x7f bye", NULL, "?%02hello%7F%20bye"}, - {"%40%41123", L"%40%41123", NULL, "?%40%41123"}, + {"\x02hello\x7f bye", L"\x02hello\x7f bye", "?%02hello%7F%20bye"}, + {"%40%41123", L"%40%41123", "?%40%41123"}, // Chinese input/output - {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", NULL, "?q=%E4%BD%A0%E5%A5%BD"}, - {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312", "?q=%C4%E3%BA%C3"}, - {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"}, - // Unencodable character in the destination character set should be - // escaped. The escape sequence unescapes to be the entity name: - // "?q=你" - {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1", "?q=Chinese%26%2365319%3B"}, + {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "?q=%E4%BD%A0%E5%A5%BD"}, // Invalid UTF-8/16 input should be replaced with invalid characters. - {"q=\xed\xed", L"q=\xd800\xd800", NULL, "?q=%EF%BF%BD%EF%BF%BD"}, + {"q=\xed\xed", L"q=\xd800\xd800", "?q=%EF%BF%BD%EF%BF%BD"}, // Don't allow < or > because sometimes they are used for XSS if the // URL is echoed in content. Firefox does this, IE doesn't. - {"q=<asdf>", L"q=<asdf>", NULL, "?q=%3Casdf%3E"}, + {"q=<asdf>", L"q=<asdf>", "?q=%3Casdf%3E"}, // Escape double quotemarks in the query. - {"q=\"asdf\"", L"q=\"asdf\"", NULL, "?q=%22asdf%22"}, + {"q=\"asdf\"", L"q=\"asdf\"", "?q=%22asdf%22"}, }; for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) { Component out_comp; - UConvScoper conv(query_cases[i].encoding); - ASSERT_TRUE(!query_cases[i].encoding || conv.converter()); - ICUCharsetConverter converter(conv.converter()); - - // Map NULL to a NULL converter pointer. - ICUCharsetConverter* conv_pointer = &converter; - if (!query_cases[i].encoding) - conv_pointer = NULL; - if (query_cases[i].input8) { int len = static_cast<int>(strlen(query_cases[i].input8)); Component in_comp(0, len); std::string out_str; StdStringCanonOutput output(&out_str); - CanonicalizeQuery(query_cases[i].input8, in_comp, conv_pointer, &output, + CanonicalizeQuery(query_cases[i].input8, in_comp, NULL, &output, &out_comp); output.Complete(); @@ -1262,8 +1170,7 @@ TEST(URLCanonTest, Query) { std::string out_str; StdStringCanonOutput output(&out_str); - CanonicalizeQuery(input16.c_str(), in_comp, conv_pointer, &output, - &out_comp); + CanonicalizeQuery(input16.c_str(), in_comp, NULL, &output, &out_comp); output.Complete(); EXPECT_EQ(query_cases[i].expected, out_str); |