diff options
author | hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-02-26 10:21:49 +0000 |
---|---|---|
committer | hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-02-26 10:21:49 +0000 |
commit | b5df94ed369ac59a76e81fb8a13552cac449509f (patch) | |
tree | 90aa349879e8fe6ad8ee550aaea8556b8d3410b4 /base | |
parent | 3ced98f4dad7b83de0d7b5f3fd9452c556799822 (diff) | |
download | chromium_src-b5df94ed369ac59a76e81fb8a13552cac449509f.zip chromium_src-b5df94ed369ac59a76e81fb8a13552cac449509f.tar.gz chromium_src-b5df94ed369ac59a76e81fb8a13552cac449509f.tar.bz2 |
Reverting 10456.
Review URL: http://codereview.chromium.org/28180
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@10458 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r-- | base/string_util.cc | 26 | ||||
-rw-r--r-- | base/string_util.h | 19 | ||||
-rw-r--r-- | base/string_util_unittest.cc | 47 |
3 files changed, 7 insertions, 85 deletions
diff --git a/base/string_util.cc b/base/string_util.cc index 55be9df..4ba8c4b 100644 --- a/base/string_util.cc +++ b/base/string_util.cc @@ -337,6 +337,8 @@ const char kWhitespaceASCII[] = { 0x0C, 0x0D, 0x20, // Space + '\x85', // <control-0085> + '\xa0', // No-Break Space 0 }; const char* const kCodepageUTF8 = "UTF-8"; @@ -391,32 +393,10 @@ TrimPositions TrimWhitespace(const std::wstring& input, return TrimStringT(input, kWhitespaceWide, positions, output); } -TrimPositions TrimWhitespaceASCII(const std::string& input, - TrimPositions positions, - std::string* output) { - return TrimStringT(input, kWhitespaceASCII, positions, output); -} - -TrimPositions TrimWhitespaceUTF8(const std::string& input, - TrimPositions positions, - std::string* output) { - // This implementation is not so fast since it converts the text encoding - // twice. Please feel free to file a bug if this function hurts the - // performance of Chrome. - DCHECK(IsStringUTF8(input)); - std::wstring input_wide = UTF8ToWide(input); - std::wstring output_wide; - TrimPositions result = TrimWhitespace(input_wide, positions, &output_wide); - *output = WideToUTF8(output_wide); - return result; -} - -// This function is only for backward-compatibility. -// To be removed when all callers are updated. TrimPositions TrimWhitespace(const std::string& input, TrimPositions positions, std::string* output) { - return TrimWhitespaceASCII(input, positions, output); + return TrimStringT(input, kWhitespaceASCII, positions, output); } std::wstring CollapseWhitespace(const std::wstring& text, diff --git a/base/string_util.h b/base/string_util.h index 64e724e..500a114 100644 --- a/base/string_util.h +++ b/base/string_util.h @@ -128,13 +128,9 @@ bool TrimString(const std::string& input, std::string* output); // Trims any whitespace from either end of the input string. Returns where -// whitespace was found. -// The non-wide version has two functions: -// * TrimWhitespaceASCII() -// This function is for ASCII strings and only looks for ASCII whitespace; -// * TrimWhitespaceUTF8() -// This function is for UTF-8 strings and looks for Unicode whitespace. -// Please choose the best one according to your usage. +// whitespace was found. The non-wide version of this function only looks for +// ASCII whitespace; UTF-8 code-points are not searched for (use the wide +// version instead). // NOTE: Safe to use the same variable for both input and output. enum TrimPositions { TRIM_NONE = 0, @@ -145,15 +141,6 @@ enum TrimPositions { TrimPositions TrimWhitespace(const std::wstring& input, TrimPositions positions, std::wstring* output); -TrimPositions TrimWhitespaceASCII(const std::string& input, - TrimPositions positions, - std::string* output); -TrimPositions TrimWhitespaceUTF8(const std::string& input, - TrimPositions positions, - std::string* output); - -// Deprecated. This function is only for backward compatibility and calls -// TrimWhitespaceASCII(). TrimPositions TrimWhitespace(const std::string& input, TrimPositions positions, std::string* output); diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc index c5fdb2c..2b7634f 100644 --- a/base/string_util_unittest.cc +++ b/base/string_util_unittest.cc @@ -49,6 +49,7 @@ static const struct trim_case_ascii { {" ", TRIM_TRAILING, "", TRIM_TRAILING}, {" ", TRIM_ALL, "", TRIM_ALL}, {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL}, + {"\x85Test String\xa0\x20", TRIM_ALL, "Test String", TRIM_ALL}, }; TEST(StringUtilTest, TrimWhitespace) { @@ -79,52 +80,6 @@ TEST(StringUtilTest, TrimWhitespace) { } } -static const struct trim_case_utf8 { - const char* input; - const TrimPositions positions; - const char* output; - const TrimPositions return_value; -} trim_cases_utf8[] = { - // UTF-8 strings that start (and end) with Unicode space characters - // (including zero-width spaces). - {"\xE2\x80\x80Test String\xE2\x80\x81", TRIM_ALL, "Test String", TRIM_ALL}, - {"\xE2\x80\x82Test String\xE2\x80\x83", TRIM_ALL, "Test String", TRIM_ALL}, - {"\xE2\x80\x84Test String\xE2\x80\x85", TRIM_ALL, "Test String", TRIM_ALL}, - {"\xE2\x80\x86Test String\xE2\x80\x87", TRIM_ALL, "Test String", TRIM_ALL}, - {"\xE2\x80\x88Test String\xE2\x80\x8A", TRIM_ALL, "Test String", TRIM_ALL}, - {"\xE3\x80\x80Test String\xE3\x80\x80", TRIM_ALL, "Test String", TRIM_ALL}, - // UTF-8 strings that end with 0x85 (NEL in ISO-8859). - {"\xD0\x85", TRIM_TRAILING, "\xD0\x85", TRIM_NONE}, - {"\xD9\x85", TRIM_TRAILING, "\xD9\x85", TRIM_NONE}, - {"\xEC\x97\x85", TRIM_TRAILING, "\xEC\x97\x85", TRIM_NONE}, - {"\xF0\x90\x80\x85", TRIM_TRAILING, "\xF0\x90\x80\x85", TRIM_NONE}, - // UTF-8 strings that end with 0xA0 (non-break space in ISO-8859-1). - {"\xD0\xA0", TRIM_TRAILING, "\xD0\xA0", TRIM_NONE}, - {"\xD9\xA0", TRIM_TRAILING, "\xD9\xA0", TRIM_NONE}, - {"\xEC\x97\xA0", TRIM_TRAILING, "\xEC\x97\xA0", TRIM_NONE}, - {"\xF0\x90\x80\xA0", TRIM_TRAILING, "\xF0\x90\x80\xA0", TRIM_NONE}, -}; - -TEST(StringUtilTest, TrimWhitespaceUTF8) { - std::string output_ascii; - for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) { - const trim_case_ascii& value = trim_cases_ascii[i]; - EXPECT_EQ(value.return_value, - TrimWhitespaceASCII(value.input, value.positions, &output_ascii)); - EXPECT_EQ(value.output, output_ascii); - } - - // Test that TrimWhiteSpaceUTF8() can remove Unicode space characters and - // prevent from removing UTF-8 characters that end with an ISO-8859 NEL. - std::string output_utf8; - for (size_t i = 0; i < arraysize(trim_cases_utf8); ++i) { - const trim_case_utf8& value = trim_cases_utf8[i]; - EXPECT_EQ(value.return_value, - TrimWhitespaceUTF8(value.input, value.positions, &output_utf8)); - EXPECT_EQ(value.output, output_utf8); - } -} - static const struct collapse_case { const wchar_t* input; const bool trim; |