diff options
author | jshin@chromium.org <jshin@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-11-03 23:25:55 +0000 |
---|---|---|
committer | jshin@chromium.org <jshin@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-11-03 23:25:55 +0000 |
commit | c09fb1c79c0a3e76dbb6091e4b718fd9bb197395 (patch) | |
tree | aab648921cd1817792614596ba4e6f94c9c3d7e0 /base/i18n/icu_string_conversions_unittest.cc | |
parent | 0912579b25f74d5b66c8adc0d3d8a7f805141e89 (diff) | |
download | chromium_src-c09fb1c79c0a3e76dbb6091e4b718fd9bb197395.zip chromium_src-c09fb1c79c0a3e76dbb6091e4b718fd9bb197395.tar.gz chromium_src-c09fb1c79c0a3e76dbb6091e4b718fd9bb197395.tar.bz2 |
Add support for the extended header parameter syntax in Content-Disposition header (RFC 5987).
It's not generic, but is only used for 'filename' param.
The CL is originally by James Simonsen I reviewed at http://codereview.chromium.org/4254001/show
I added a check for ASCIIness for RFC 5987 extended header and a few tests to NetUti*.GetFileNameFromCD
(net_unittests) and I*.ConvertCo*Norma* (base_unittests). I also replaced '\uxxxx' notation with
the corresponding UTF-8 byte sequence because Visual Studio does not understand it yet.
BUG=57830
TEST="net_unittests --gtest_filter=NetU*.GetFil*", "base_unittests --gtest_filter=I*.Conver*Norm*" and tests
at http://greenbytes.de/tech/tc2231/
Original CL / Review: By James Simonsen; at http://codereview.chromium.org/4254001/show
Review URL: http://codereview.chromium.org/4435001
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@64987 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/i18n/icu_string_conversions_unittest.cc')
-rw-r--r-- | base/i18n/icu_string_conversions_unittest.cc | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/base/i18n/icu_string_conversions_unittest.cc b/base/i18n/icu_string_conversions_unittest.cc index 2083fa9..40b0fed 100644 --- a/base/i18n/icu_string_conversions_unittest.cc +++ b/base/i18n/icu_string_conversions_unittest.cc @@ -11,6 +11,7 @@ #include "base/basictypes.h" #include "base/i18n/icu_string_conversions.h" #include "base/logging.h" +#include "base/string_piece.h" #include "base/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" @@ -325,4 +326,33 @@ TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) { } } +static const struct { + const char* encoded; + const char* codepage_name; + bool expected_success; + const char* expected_value; +} kConvertAndNormalizeCases[] = { + {"foo-\xe4.html", "iso-8859-1", true, "foo-\xc3\xa4.html"}, + {"foo-\xe4.html", "iso-8859-7", true, "foo-\xce\xb4.html"}, + {"foo-\xe4.html", "foo-bar", false, ""}, + {"foo-\xff.html", "ascii", false, ""}, + {"foo.html", "ascii", true, "foo.html"}, + {"foo-a\xcc\x88.html", "utf-8", true, "foo-\xc3\xa4.html"}, + {"\x95\x32\x82\x36\xD2\xBB", "gb18030", true, "\xF0\xA0\x80\x80\xE4\xB8\x80"}, + {"\xA7\x41\xA6\x6E", "big5", true, "\xE4\xBD\xA0\xE5\xA5\xBD"}, + // Windows-1258 does have a combining character at xD2 (which is U+0309). + // The sequence of (U+00E2, U+0309) is also encoded as U+1EA9. + {"foo\xE2\xD2", "windows-1258", true, "foo\xE1\xBA\xA9"}, +}; +TEST(ICUStringConversionsTest, ConvertToUtf8AndNormalize) { + std::string result; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertAndNormalizeCases); ++i) { + bool success = ConvertToUtf8AndNormalize( + kConvertAndNormalizeCases[i].encoded, + kConvertAndNormalizeCases[i].codepage_name, &result); + EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success); + EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result); + } +} + } // namespace base |