summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--base/string_util.cc26
-rw-r--r--base/string_util.h19
-rw-r--r--base/string_util_unittest.cc47
-rw-r--r--build/googleurl.xcodeproj/project.pbxproj2
-rw-r--r--chrome/browser/net/url_fixer_upper.cc2
-rw-r--r--chrome/browser/net/url_fixer_upper_unittest.cc20
6 files changed, 8 insertions, 108 deletions
diff --git a/base/string_util.cc b/base/string_util.cc
index 55be9df..4ba8c4b 100644
--- a/base/string_util.cc
+++ b/base/string_util.cc
@@ -337,6 +337,8 @@ const char kWhitespaceASCII[] = {
0x0C,
0x0D,
0x20, // Space
+ '\x85', // <control-0085>
+ '\xa0', // No-Break Space
0
};
const char* const kCodepageUTF8 = "UTF-8";
@@ -391,32 +393,10 @@ TrimPositions TrimWhitespace(const std::wstring& input,
return TrimStringT(input, kWhitespaceWide, positions, output);
}
-TrimPositions TrimWhitespaceASCII(const std::string& input,
- TrimPositions positions,
- std::string* output) {
- return TrimStringT(input, kWhitespaceASCII, positions, output);
-}
-
-TrimPositions TrimWhitespaceUTF8(const std::string& input,
- TrimPositions positions,
- std::string* output) {
- // This implementation is not so fast since it converts the text encoding
- // twice. Please feel free to file a bug if this function hurts the
- // performance of Chrome.
- DCHECK(IsStringUTF8(input));
- std::wstring input_wide = UTF8ToWide(input);
- std::wstring output_wide;
- TrimPositions result = TrimWhitespace(input_wide, positions, &output_wide);
- *output = WideToUTF8(output_wide);
- return result;
-}
-
-// This function is only for backward-compatibility.
-// To be removed when all callers are updated.
TrimPositions TrimWhitespace(const std::string& input,
TrimPositions positions,
std::string* output) {
- return TrimWhitespaceASCII(input, positions, output);
+ return TrimStringT(input, kWhitespaceASCII, positions, output);
}
std::wstring CollapseWhitespace(const std::wstring& text,
diff --git a/base/string_util.h b/base/string_util.h
index 64e724e..500a114 100644
--- a/base/string_util.h
+++ b/base/string_util.h
@@ -128,13 +128,9 @@ bool TrimString(const std::string& input,
std::string* output);
// Trims any whitespace from either end of the input string. Returns where
-// whitespace was found.
-// The non-wide version has two functions:
-// * TrimWhitespaceASCII()
-// This function is for ASCII strings and only looks for ASCII whitespace;
-// * TrimWhitespaceUTF8()
-// This function is for UTF-8 strings and looks for Unicode whitespace.
-// Please choose the best one according to your usage.
+// whitespace was found. The non-wide version of this function only looks for
+// ASCII whitespace; UTF-8 code-points are not searched for (use the wide
+// version instead).
// NOTE: Safe to use the same variable for both input and output.
enum TrimPositions {
TRIM_NONE = 0,
@@ -145,15 +141,6 @@ enum TrimPositions {
TrimPositions TrimWhitespace(const std::wstring& input,
TrimPositions positions,
std::wstring* output);
-TrimPositions TrimWhitespaceASCII(const std::string& input,
- TrimPositions positions,
- std::string* output);
-TrimPositions TrimWhitespaceUTF8(const std::string& input,
- TrimPositions positions,
- std::string* output);
-
-// Deprecated. This function is only for backward compatibility and calls
-// TrimWhitespaceASCII().
TrimPositions TrimWhitespace(const std::string& input,
TrimPositions positions,
std::string* output);
diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc
index c5fdb2c..2b7634f 100644
--- a/base/string_util_unittest.cc
+++ b/base/string_util_unittest.cc
@@ -49,6 +49,7 @@ static const struct trim_case_ascii {
{" ", TRIM_TRAILING, "", TRIM_TRAILING},
{" ", TRIM_ALL, "", TRIM_ALL},
{"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
+ {"\x85Test String\xa0\x20", TRIM_ALL, "Test String", TRIM_ALL},
};
TEST(StringUtilTest, TrimWhitespace) {
@@ -79,52 +80,6 @@ TEST(StringUtilTest, TrimWhitespace) {
}
}
-static const struct trim_case_utf8 {
- const char* input;
- const TrimPositions positions;
- const char* output;
- const TrimPositions return_value;
-} trim_cases_utf8[] = {
- // UTF-8 strings that start (and end) with Unicode space characters
- // (including zero-width spaces).
- {"\xE2\x80\x80Test String\xE2\x80\x81", TRIM_ALL, "Test String", TRIM_ALL},
- {"\xE2\x80\x82Test String\xE2\x80\x83", TRIM_ALL, "Test String", TRIM_ALL},
- {"\xE2\x80\x84Test String\xE2\x80\x85", TRIM_ALL, "Test String", TRIM_ALL},
- {"\xE2\x80\x86Test String\xE2\x80\x87", TRIM_ALL, "Test String", TRIM_ALL},
- {"\xE2\x80\x88Test String\xE2\x80\x8A", TRIM_ALL, "Test String", TRIM_ALL},
- {"\xE3\x80\x80Test String\xE3\x80\x80", TRIM_ALL, "Test String", TRIM_ALL},
- // UTF-8 strings that end with 0x85 (NEL in ISO-8859).
- {"\xD0\x85", TRIM_TRAILING, "\xD0\x85", TRIM_NONE},
- {"\xD9\x85", TRIM_TRAILING, "\xD9\x85", TRIM_NONE},
- {"\xEC\x97\x85", TRIM_TRAILING, "\xEC\x97\x85", TRIM_NONE},
- {"\xF0\x90\x80\x85", TRIM_TRAILING, "\xF0\x90\x80\x85", TRIM_NONE},
- // UTF-8 strings that end with 0xA0 (non-break space in ISO-8859-1).
- {"\xD0\xA0", TRIM_TRAILING, "\xD0\xA0", TRIM_NONE},
- {"\xD9\xA0", TRIM_TRAILING, "\xD9\xA0", TRIM_NONE},
- {"\xEC\x97\xA0", TRIM_TRAILING, "\xEC\x97\xA0", TRIM_NONE},
- {"\xF0\x90\x80\xA0", TRIM_TRAILING, "\xF0\x90\x80\xA0", TRIM_NONE},
-};
-
-TEST(StringUtilTest, TrimWhitespaceUTF8) {
- std::string output_ascii;
- for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
- const trim_case_ascii& value = trim_cases_ascii[i];
- EXPECT_EQ(value.return_value,
- TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
- EXPECT_EQ(value.output, output_ascii);
- }
-
- // Test that TrimWhiteSpaceUTF8() can remove Unicode space characters and
- // prevent from removing UTF-8 characters that end with an ISO-8859 NEL.
- std::string output_utf8;
- for (size_t i = 0; i < arraysize(trim_cases_utf8); ++i) {
- const trim_case_utf8& value = trim_cases_utf8[i];
- EXPECT_EQ(value.return_value,
- TrimWhitespaceUTF8(value.input, value.positions, &output_utf8));
- EXPECT_EQ(value.output, output_utf8);
- }
-}
-
static const struct collapse_case {
const wchar_t* input;
const bool trim;
diff --git a/build/googleurl.xcodeproj/project.pbxproj b/build/googleurl.xcodeproj/project.pbxproj
index 1db641c..38f074f 100644
--- a/build/googleurl.xcodeproj/project.pbxproj
+++ b/build/googleurl.xcodeproj/project.pbxproj
@@ -60,7 +60,6 @@
7BA019240E5A2BD700044150 /* libgtest.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 7BA019210E5A2BCB00044150 /* libgtest.a */; };
7BA019640E5A2C2B00044150 /* libbase.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 7BA019440E5A2BFC00044150 /* libbase.a */; };
7BA019700E5A2C4700044150 /* libicuuc.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 7BA0195F0E5A2C1200044150 /* libicuuc.a */; };
- 793B6B0D0F4D140000C68483 /* libicui18n.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 7BA0195B0E5A2C1200044150 /* libicui18n.a */; };
7BA019740E5A2C5C00044150 /* libicudata.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 7BA019570E5A2C1200044150 /* libicudata.a */; };
/* End PBXBuildFile section */
@@ -265,7 +264,6 @@
7BA018EF0E5A2B3300044150 /* libgoogleurl.a in Frameworks */,
7BA019240E5A2BD700044150 /* libgtest.a in Frameworks */,
7BA019740E5A2C5C00044150 /* libicudata.a in Frameworks */,
- 793B6B0D0F4D140000C68483 /* libicui18n.a in Frameworks */,
7BA019700E5A2C4700044150 /* libicuuc.a in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
diff --git a/chrome/browser/net/url_fixer_upper.cc b/chrome/browser/net/url_fixer_upper.cc
index 08d60d2..121807b 100644
--- a/chrome/browser/net/url_fixer_upper.cc
+++ b/chrome/browser/net/url_fixer_upper.cc
@@ -360,7 +360,7 @@ string URLFixerUpper::SegmentURL(const string& text,
string URLFixerUpper::FixupURL(const string& text,
const string& desired_tld) {
string trimmed;
- TrimWhitespaceUTF8(text, TRIM_ALL, &trimmed);
+ TrimWhitespace(text, TRIM_ALL, &trimmed);
if (trimmed.empty())
return string(); // Nothing here.
diff --git a/chrome/browser/net/url_fixer_upper_unittest.cc b/chrome/browser/net/url_fixer_upper_unittest.cc
index ef26b5e..1e6dbc7 100644
--- a/chrome/browser/net/url_fixer_upper_unittest.cc
+++ b/chrome/browser/net/url_fixer_upper_unittest.cc
@@ -177,26 +177,6 @@ struct fixup_case {
{"ftpblah.google.com", "", "http://ftpblah.google.com/"},
{"ftp", "", "http://ftp/"},
{"google.ftp.com", "", "http://google.ftp.com/"},
- // URLs which end with an ISO-8859 next-line (0x85).
- { "http://google.com/search?q=\xd0\x85", "",
- "http://google.com/search?q=\xd0\x85"
- },
- { "http://google.com/search?q=\xec\x97\x85", "",
- "http://google.com/search?q=\xec\x97\x85"
- },
- { "http://google.com/search?q=\xf0\x90\x80\x85", "",
- "http://google.com/search?q=\xf0\x90\x80\x85"
- },
- // URLs which end with a non-break space (0xA0).
- { "http://google.com/search?q=\xd0\xa0", "",
- "http://google.com/search?q=\xd0\xa0"
- },
- { "http://google.com/search?q=\xec\x97\xa0", "",
- "http://google.com/search?q=\xec\x97\xa0"
- },
- { "http://google.com/search?q=\xf0\x90\x80\xa0", "",
- "http://google.com/search?q=\xf0\x90\x80\xa0"
- },
};
TEST(URLFixerUpperTest, FixupURL) {