// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include #include #include "net/base/escape.h" #include "base/basictypes.h" #include "base/i18n/icu_string_conversions.h" #include "base/string_util.h" #include "base/stringprintf.h" #include "base/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" namespace net { namespace { const size_t kNpos = string16::npos; struct EscapeCase { const char* input; const char* output; }; struct UnescapeURLCase { const wchar_t* input; UnescapeRule::Type rules; const wchar_t* output; }; struct UnescapeURLCaseASCII { const char* input; UnescapeRule::Type rules; const char* output; }; struct UnescapeAndDecodeCase { const char* input; // The expected output when run through UnescapeURL. const char* url_unescaped; // The expected output when run through UnescapeQuery. const char* query_unescaped; // The expected output when run through UnescapeAndDecodeURLComponent. const wchar_t* decoded; }; struct AdjustOffsetCase { const char* input; size_t input_offset; size_t output_offset; }; struct EscapeForHTMLCase { const char* input; const char* expected_output; }; TEST(EscapeTest, EscapeTextForFormSubmission) { const EscapeCase escape_cases[] = { {"foo", "foo"}, {"foo bar", "foo+bar"}, {"foo++", "foo%2B%2B"} }; for (size_t i = 0; i < arraysize(escape_cases); ++i) { EscapeCase value = escape_cases[i]; EXPECT_EQ(UTF8ToUTF16(value.output), EscapeQueryParamValueUTF8(UTF8ToUTF16(value.input), true)); } const EscapeCase escape_cases_no_plus[] = { {"foo", "foo"}, {"foo bar", "foo%20bar"}, {"foo++", "foo%2B%2B"} }; for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) { EscapeCase value = escape_cases_no_plus[i]; EXPECT_EQ(ASCIIToUTF16(value.output), EscapeQueryParamValueUTF8(ASCIIToUTF16(value.input), false)); } // Test all the values in we're supposed to be escaping. const std::string no_escape( "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "!'()*-._~"); for (int i = 0; i < 256; ++i) { std::string in; in.push_back(i); std::string out = EscapeQueryParamValue(in, true); if (0 == i) { EXPECT_EQ(out, std::string("%00")); } else if (32 == i) { // Spaces are plus escaped like web forms. EXPECT_EQ(out, std::string("+")); } else if (no_escape.find(in) == std::string::npos) { // Check %hex escaping std::string expected = base::StringPrintf("%%%02X", i); EXPECT_EQ(expected, out); } else { // No change for things in the no_escape list. EXPECT_EQ(out, in); } } // Check to see if EscapeQueryParamValueUTF8 is the same as // EscapeQueryParamValue(..., kCodepageUTF8,) string16 test_str; test_str.reserve(5000); for (int i = 1; i < 5000; ++i) { test_str.push_back(i); } string16 utf16; EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, true, &utf16)); EXPECT_EQ(utf16, EscapeQueryParamValueUTF8(test_str, true)); EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, false, &utf16)); EXPECT_EQ(utf16, EscapeQueryParamValueUTF8(test_str, false)); } TEST(EscapeTest, EscapePath) { ASSERT_EQ( // Most of the character space we care about, un-escaped EscapePath( "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" "[\\]^_`abcdefghijklmnopqrstuvwxyz" "{|}~\x7f\x80\xff"), // Escaped "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;" "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ" "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" "%7B%7C%7D~%7F%80%FF"); } TEST(EscapeTest, EscapeUrlEncodedData) { ASSERT_EQ( // Most of the character space we care about, un-escaped EscapeUrlEncodedData( "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" "[\\]^_`abcdefghijklmnopqrstuvwxyz" "{|}~\x7f\x80\xff", true), // Escaped "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B" "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ" "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" "%7B%7C%7D~%7F%80%FF"); } TEST(EscapeTest, EscapeUrlEncodedDataSpace) { ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b"); ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b"); } TEST(EscapeTest, UnescapeURLComponentASCII) { const UnescapeURLCaseASCII unescape_cases[] = { {"", UnescapeRule::NORMAL, ""}, {"%2", UnescapeRule::NORMAL, "%2"}, {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"}, {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"}, {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"}, {"Some%20random text %25%2dOK", UnescapeRule::NONE, "Some%20random text %25%2dOK"}, {"Some%20random text %25%2dOK", UnescapeRule::NORMAL, "Some%20random text %25-OK"}, {"Some%20random text %25%2dOK", UnescapeRule::SPACES, "Some random text %25-OK"}, {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS, "Some%20random text %-OK"}, {"Some%20random text %25%2dOK", UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, "Some random text %-OK"}, {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"}, {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"}, // Certain URL-sensitive characters should not be unescaped unless asked. {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::URL_SPECIAL_CHARS, "Hello%20%13%10world ## ?? == && %% ++"}, // We can neither escape nor unescape '@' since some websites expect it to // be preserved as either '@' or "%40". // See http://b/996720 and http://crbug.com/23933 . {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"}, // Control characters. {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, "%01%02%03%04%05%06%07%08%09 %"}, {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"}, {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"}, }; for (size_t i = 0; i < arraysize(unescape_cases); i++) { std::string str(unescape_cases[i].input); EXPECT_EQ(std::string(unescape_cases[i].output), UnescapeURLComponent(str, unescape_cases[i].rules)); } // Test the NULL character unescaping (which wouldn't work above since those // are just char pointers). std::string input("Null"); input.push_back(0); // Also have a NULL in the input. input.append("%00%39Test"); // When we're unescaping NULLs std::string expected("Null"); expected.push_back(0); expected.push_back(0); expected.append("9Test"); EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); // When we're not unescaping NULLs. expected = "Null"; expected.push_back(0); expected.append("%009Test"); EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); } TEST(EscapeTest, UnescapeURLComponent) { const UnescapeURLCase unescape_cases[] = { {L"", UnescapeRule::NORMAL, L""}, {L"%2", UnescapeRule::NORMAL, L"%2"}, {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"}, {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"}, {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"}, {L"Some%20random text %25%2dOK", UnescapeRule::NONE, L"Some%20random text %25%2dOK"}, {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL, L"Some%20random text %25-OK"}, {L"Some%20random text %25%2dOK", UnescapeRule::SPACES, L"Some random text %25-OK"}, {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS, L"Some%20random text %-OK"}, {L"Some%20random text %25%2dOK", UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, L"Some random text %-OK"}, {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"}, {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"}, // Certain URL-sensitive characters should not be unescaped unless asked. {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::URL_SPECIAL_CHARS, L"Hello%20%13%10world ## ?? == && %% ++"}, // We can neither escape nor unescape '@' since some websites expect it to // be preserved as either '@' or "%40". // See http://b/996720 and http://crbug.com/23933 . {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"}, // Control characters. {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, L"%01%02%03%04%05%06%07%08%09 %"}, {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"}, {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, L"Hello%20\x13\x10\x02"}, {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS, L"Hello\x9824\x9827"}, }; for (size_t i = 0; i < arraysize(unescape_cases); i++) { string16 str(WideToUTF16(unescape_cases[i].input)); EXPECT_EQ(WideToUTF16(unescape_cases[i].output), UnescapeURLComponent(str, unescape_cases[i].rules)); } // Test the NULL character unescaping (which wouldn't work above since those // are just char pointers). string16 input(WideToUTF16(L"Null")); input.push_back(0); // Also have a NULL in the input. input.append(WideToUTF16(L"%00%39Test")); // When we're unescaping NULLs string16 expected(WideToUTF16(L"Null")); expected.push_back(0); expected.push_back(0); expected.append(ASCIIToUTF16("9Test")); EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); // When we're not unescaping NULLs. expected = WideToUTF16(L"Null"); expected.push_back(0); expected.append(WideToUTF16(L"%009Test")); EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); } TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) { const UnescapeAndDecodeCase unescape_cases[] = { { "%", "%", "%", L"%"}, { "+", "+", " ", L"+"}, { "%2+", "%2+", "%2 ", L"%2+"}, { "+%%%+%%%", "+%%%+%%%", " %%% %%%", L"+%%%+%%%"}, { "Don't escape anything", "Don't escape anything", "Don't escape anything", L"Don't escape anything"}, { "+Invalid %escape %2+", "+Invalid %escape %2+", " Invalid %escape %2 ", L"+Invalid %escape %2+"}, { "Some random text %25%2dOK", "Some random text %25-OK", "Some random text %25-OK", L"Some random text %25-OK"}, { "%01%02%03%04%05%06%07%08%09", "%01%02%03%04%05%06%07%08%09", "%01%02%03%04%05%06%07%08%09", L"%01%02%03%04%05%06%07%08%09"}, { "%E4%BD%A0+%E5%A5%BD", "\xE4\xBD\xA0+\xE5\xA5\xBD", "\xE4\xBD\xA0 \xE5\xA5\xBD", L"\x4f60+\x597d"}, { "%ED%ED", // Invalid UTF-8. "\xED\xED", "\xED\xED", L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped. }; for (size_t i = 0; i < arraysize(unescape_cases); i++) { std::string unescaped = UnescapeURLComponent(unescape_cases[i].input, UnescapeRule::NORMAL); EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped); unescaped = UnescapeURLComponent(unescape_cases[i].input, UnescapeRule::REPLACE_PLUS_WITH_SPACE); EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped); // TODO: Need to test unescape_spaces and unescape_percent. string16 decoded = UnescapeAndDecodeUTF8URLComponent( unescape_cases[i].input, UnescapeRule::NORMAL, NULL); EXPECT_EQ(WideToUTF16(unescape_cases[i].decoded), decoded); } } TEST(EscapeTest, AdjustOffset) { const AdjustOffsetCase adjust_cases[] = { {"", 0, std::string::npos}, {"test", 0, 0}, {"test", 2, 2}, {"test", 4, std::string::npos}, {"test", std::string::npos, std::string::npos}, {"%2dtest", 6, 4}, {"%2dtest", 2, std::string::npos}, {"test%2d", 2, 2}, {"%E4%BD%A0+%E5%A5%BD", 9, 1}, {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos}, {"%ED%B0%80+%E5%A5%BD", 6, 6}, }; for (size_t i = 0; i < arraysize(adjust_cases); i++) { size_t offset = adjust_cases[i].input_offset; UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input, UnescapeRule::NORMAL, &offset); EXPECT_EQ(adjust_cases[i].output_offset, offset); } } TEST(EscapeTest, EscapeForHTML) { const EscapeForHTMLCase tests[] = { { "hello", "hello" }, { "", "<hello>" }, { "don\'t mess with me", "don't mess with me" }, }; for (size_t i = 0; i < arraysize(tests); ++i) { std::string result = EscapeForHTML(std::string(tests[i].input)); EXPECT_EQ(std::string(tests[i].expected_output), result); } } TEST(EscapeTest, UnescapeForHTML) { const EscapeForHTMLCase tests[] = { { "", "" }, { "<hello>", "" }, { "don't mess with me", "don\'t mess with me" }, { "<>&"'", "<>&\"'" }, { "& lt; & ; &; '", "& lt; & ; &; '" }, { "&", "&" }, { """, "\"" }, { "'", "'" }, { "<", "<" }, { ">", ">" }, { "& &", "& &" }, }; for (size_t i = 0; i < arraysize(tests); ++i) { string16 result = UnescapeForHTML(ASCIIToUTF16(tests[i].input)); EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result); } } TEST(EscapeTest, AdjustEncodingOffset) { // Imagine we have strings as shown in the following cases where the // %XX's represent encoded characters // 1: abc%ECdef ==> abcXdef std::vector offsets; for (size_t t = 0; t < 9; ++t) offsets.push_back(t); internal::AdjustEncodingOffset::Adjustments adjustments; adjustments.push_back(3); std::for_each(offsets.begin(), offsets.end(), internal::AdjustEncodingOffset(adjustments)); size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6}; EXPECT_EQ(offsets.size(), arraysize(expected_1)); for (size_t i = 0; i < arraysize(expected_1); ++i) EXPECT_EQ(expected_1[i], offsets[i]); // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX offsets.clear(); for (size_t t = 0; t < 18; ++t) offsets.push_back(t); adjustments.clear(); adjustments.push_back(0); adjustments.push_back(6); adjustments.push_back(9); adjustments.push_back(15); std::for_each(offsets.begin(), offsets.end(), internal::AdjustEncodingOffset(adjustments)); size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos, kNpos, 6, 7, 8, 9, kNpos, kNpos}; EXPECT_EQ(offsets.size(), arraysize(expected_2)); for (size_t i = 0; i < arraysize(expected_2); ++i) EXPECT_EQ(expected_2[i], offsets[i]); } } // namespace } // namespace net