Fixes Issue 7377: Regression: Omnibox trims URL ending with 0x85

To fix this issue, this change adds a new function TrimWhitespaceUTF8(), which trims space characters (including non-printable characters and broken UTF-8 characters) from either end of a UTF-8 string. Please feel free to give me your comments since I'm not sure this implimentation is correct. (Maybe this implementation trims too aggressively.) BUG=7377 Review URL: http://codereview.chromium.org/20219 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@10456 0039d316-1c4b-4281-b951-d872f2087c98
author: hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-02-26 10:02:52 +0000
committer: hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-02-26 10:02:52 +0000
commit: 0676a96af598e97e3b6016cd3675e2f424dc8407 (patch)
tree: 5ff6d221bdbbc1bba12a5b41f160fd68e2ef453a /base/string_util_unittest.cc
parent: 503683f23ea6fe3eb728f2d09f81f2603ffc7d6f (diff)
download: chromium_src-0676a96af598e97e3b6016cd3675e2f424dc8407.zip
chromium_src-0676a96af598e97e3b6016cd3675e2f424dc8407.tar.gz
chromium_src-0676a96af598e97e3b6016cd3675e2f424dc8407.tar.bz2
1 files changed, 46 insertions, 1 deletions
diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc
index 2b7634f..c5fdb2c 100644
--- a/base/string_util_unittest.cc
+++ b/base/string_util_unittest.cc
@@ -49,7 +49,6 @@ static const struct trim_case_ascii {
   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
   {"  ", TRIM_ALL, "", TRIM_ALL},
   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
-  {"\x85Test String\xa0\x20", TRIM_ALL, "Test String", TRIM_ALL},
 };
 
 TEST(StringUtilTest, TrimWhitespace) {
@@ -80,6 +79,52 @@ TEST(StringUtilTest, TrimWhitespace) {
   }
 }
 
+static const struct trim_case_utf8 {
+  const char* input;
+  const TrimPositions positions;
+  const char* output;
+  const TrimPositions return_value;
+} trim_cases_utf8[] = {
+  // UTF-8 strings that start (and end) with Unicode space characters
+  // (including zero-width spaces).
+  {"\xE2\x80\x80Test String\xE2\x80\x81", TRIM_ALL, "Test String", TRIM_ALL},
+  {"\xE2\x80\x82Test String\xE2\x80\x83", TRIM_ALL, "Test String", TRIM_ALL},
+  {"\xE2\x80\x84Test String\xE2\x80\x85", TRIM_ALL, "Test String", TRIM_ALL},
+  {"\xE2\x80\x86Test String\xE2\x80\x87", TRIM_ALL, "Test String", TRIM_ALL},
+  {"\xE2\x80\x88Test String\xE2\x80\x8A", TRIM_ALL, "Test String", TRIM_ALL},
+  {"\xE3\x80\x80Test String\xE3\x80\x80", TRIM_ALL, "Test String", TRIM_ALL},
+  // UTF-8 strings that end with 0x85 (NEL in ISO-8859).
+  {"\xD0\x85", TRIM_TRAILING, "\xD0\x85", TRIM_NONE},
+  {"\xD9\x85", TRIM_TRAILING, "\xD9\x85", TRIM_NONE},
+  {"\xEC\x97\x85", TRIM_TRAILING, "\xEC\x97\x85", TRIM_NONE},
+  {"\xF0\x90\x80\x85", TRIM_TRAILING, "\xF0\x90\x80\x85", TRIM_NONE},
+  // UTF-8 strings that end with 0xA0 (non-break space in ISO-8859-1).
+  {"\xD0\xA0", TRIM_TRAILING, "\xD0\xA0", TRIM_NONE},
+  {"\xD9\xA0", TRIM_TRAILING, "\xD9\xA0", TRIM_NONE},
+  {"\xEC\x97\xA0", TRIM_TRAILING, "\xEC\x97\xA0", TRIM_NONE},
+  {"\xF0\x90\x80\xA0", TRIM_TRAILING, "\xF0\x90\x80\xA0", TRIM_NONE},
+};
+
+TEST(StringUtilTest, TrimWhitespaceUTF8) {
+  std::string output_ascii;
+  for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
+    const trim_case_ascii& value = trim_cases_ascii[i];
+    EXPECT_EQ(value.return_value,
+              TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
+    EXPECT_EQ(value.output, output_ascii);
+  }
+
+  // Test that TrimWhiteSpaceUTF8() can remove Unicode space characters and
+  // prevent from removing UTF-8 characters that end with an ISO-8859 NEL.
+  std::string output_utf8;
+  for (size_t i = 0; i < arraysize(trim_cases_utf8); ++i) {
+    const trim_case_utf8& value = trim_cases_utf8[i];
+    EXPECT_EQ(value.return_value,
+              TrimWhitespaceUTF8(value.input, value.positions, &output_utf8));
+    EXPECT_EQ(value.output, output_utf8);
+  }
+}
+
 static const struct collapse_case {
   const wchar_t* input;
   const bool trim;
author	hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-02-26 10:02:52 +0000
committer	hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-02-26 10:02:52 +0000
commit	0676a96af598e97e3b6016cd3675e2f424dc8407 (patch)
tree	5ff6d221bdbbc1bba12a5b41f160fd68e2ef453a /base/string_util_unittest.cc
parent	503683f23ea6fe3eb728f2d09f81f2603ffc7d6f (diff)
download	chromium_src-0676a96af598e97e3b6016cd3675e2f424dc8407.zip chromium_src-0676a96af598e97e3b6016cd3675e2f424dc8407.tar.gz chromium_src-0676a96af598e97e3b6016cd3675e2f424dc8407.tar.bz2