summaryrefslogtreecommitdiffstats
path: root/ui
diff options
context:
space:
mode:
authorasvitkine@chromium.org <asvitkine@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-12-15 22:09:20 +0000
committerasvitkine@chromium.org <asvitkine@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-12-15 22:09:20 +0000
commitda06e3089a9649187c7c93e115376c9b23c96cb7 (patch)
tree696eac9ae9561add157c724197966dbc74aeedaf /ui
parent862deef098a8f161cb9926363bd4786f83fdbdc9 (diff)
downloadchromium_src-da06e3089a9649187c7c93e115376c9b23c96cb7.zip
chromium_src-da06e3089a9649187c7c93e115376c9b23c96cb7.tar.gz
chromium_src-da06e3089a9649187c7c93e115376c9b23c96cb7.tar.bz2
Fix |ElideText()| to handle UTF16 surrogate pairs correctly.
BUG=107703 TEST=New unit test in text_elider_unittest.cc. Review URL: http://codereview.chromium.org/8910018 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@114705 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'ui')
-rw-r--r--ui/base/text/text_elider.cc96
-rw-r--r--ui/base/text/text_elider_unittest.cc45
2 files changed, 115 insertions, 26 deletions
diff --git a/ui/base/text/text_elider.cc b/ui/base/text/text_elider.cc
index 13ee279..08435a2 100644
--- a/ui/base/text/text_elider.cc
+++ b/ui/base/text/text_elider.cc
@@ -32,26 +32,66 @@ const char16 kForwardSlash = '/';
namespace {
-// Cuts |text| to be |length| characters long. If |cut_in_middle| is true, the
-// middle of the string is removed to leave equal-length pieces from the
-// beginning and end of the string; otherwise, the end of the string is removed
-// and only the beginning remains. If |insert_ellipsis| is true, then an
-// ellipsis character will by inserted at the cut point.
-string16 CutString(const string16& text,
- size_t length,
- bool cut_in_middle,
- bool insert_ellipsis) {
- // TODO(tony): This is wrong, it might split the string in the middle of a
- // surrogate pair.
- const string16 kInsert = insert_ellipsis ? UTF8ToUTF16(kEllipsis) :
- string16();
- if (!cut_in_middle)
- return text.substr(0, length) + kInsert;
- // We put the extra character, if any, before the cut.
- const size_t half_length = length / 2;
- return text.substr(0, length - half_length) + kInsert +
- text.substr(text.length() - half_length, half_length);
-}
+// Helper class to split + elide text, while respecting UTF16 surrogate pairs.
+class StringSlicer {
+ public:
+ StringSlicer(const string16& text,
+ const string16& ellipsis,
+ bool elide_in_middle)
+ : text_(text),
+ ellipsis_(ellipsis),
+ elide_in_middle_(elide_in_middle) {
+ }
+
+ // Cuts |text_| to be |length| characters long. If |cut_in_middle_| is true,
+ // the middle of the string is removed to leave equal-length pieces from the
+ // beginning and end of the string; otherwise, the end of the string is
+ // removed and only the beginning remains. If |insert_ellipsis| is true,
+ // then an ellipsis character will by inserted at the cut point.
+ string16 CutString(size_t length, bool insert_ellipsis) {
+ const string16 kInsert = insert_ellipsis ? ellipsis_ : string16();
+
+ if (!elide_in_middle_)
+ return text_.substr(0, FindValidBoundaryBefore(length)) + kInsert;
+
+ // We put the extra character, if any, before the cut.
+ size_t half_length = length / 2;
+ size_t prefix_length = FindValidBoundaryBefore(length - half_length);
+ size_t suffix_start_guess = text_.length() - half_length;
+ size_t suffix_start = FindValidBoundaryAfter(suffix_start_guess);
+ size_t suffix_length = half_length - (suffix_start_guess - suffix_start);
+ return text_.substr(0, prefix_length) + kInsert +
+ text_.substr(suffix_start, suffix_length);
+ }
+
+ private:
+ // Returns a valid cut boundary at or before |index|.
+ size_t FindValidBoundaryBefore(size_t index) {
+ DCHECK_LE(index, text_.length());
+ if (index != text_.length())
+ U16_SET_CP_START(text_.data(), 0, index);
+ return index;
+ }
+
+ // Returns a valid cut boundary at or after |index|.
+ size_t FindValidBoundaryAfter(size_t index) {
+ DCHECK_LE(index, text_.length());
+ if (index != text_.length())
+ U16_SET_CP_LIMIT(text_.data(), 0, index, text_.length());
+ return index;
+ }
+
+ // The text to be sliced.
+ const string16& text_;
+
+ // Ellipsis string to use.
+ const string16& ellipsis_;
+
+ // If true, the middle of the string will be elided.
+ bool elide_in_middle_;
+
+ DISALLOW_COPY_AND_ASSIGN(StringSlicer);
+};
// Build a path from the first |num_components| elements in |path_elements|.
// Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate.
@@ -350,10 +390,14 @@ string16 ElideText(const string16& text,
if (text.empty())
return text;
+ const string16 kEllipsisUTF16 = UTF8ToUTF16(kEllipsis);
+
int current_text_pixel_width = font.GetStringWidth(text);
bool elide_in_middle = (elide_behavior == ui::ELIDE_IN_MIDDLE);
bool insert_ellipsis = (elide_behavior != ui::TRUNCATE_AT_END);
+ StringSlicer slicer(text, kEllipsisUTF16, elide_in_middle);
+
// Pango will return 0 width for absurdly long strings. Cut the string in
// half and try again.
// This is caused by an int overflow in Pango (specifically, in
@@ -363,14 +407,14 @@ string16 ElideText(const string16& text,
// (eliding way too much from a ridiculous string is probably still
// ridiculous), but we should check other widths for bogus values as well.
if (current_text_pixel_width <= 0 && !text.empty()) {
- return ElideText(CutString(text, text.length() / 2, elide_in_middle, false),
- font, available_pixel_width, elide_behavior);
+ string16 cut = slicer.CutString(text.length() / 2, false);
+ return ElideText(cut, font, available_pixel_width, elide_behavior);
}
if (current_text_pixel_width <= available_pixel_width)
return text;
- if (font.GetStringWidth(UTF8ToUTF16(kEllipsis)) > available_pixel_width)
+ if (font.GetStringWidth(kEllipsisUTF16) > available_pixel_width)
return string16();
// Use binary search to compute the elided text.
@@ -380,12 +424,12 @@ string16 ElideText(const string16& text,
for (guess = (lo + hi) / 2; lo <= hi; guess = (lo + hi) / 2) {
// We check the length of the whole desired string at once to ensure we
// handle kerning/ligatures/etc. correctly.
- string16 cut = CutString(text, guess, elide_in_middle, insert_ellipsis);
+ string16 cut = slicer.CutString(guess, insert_ellipsis);
int guess_length = font.GetStringWidth(cut);
// Check again that we didn't hit a Pango width overflow. If so, cut the
// current string in half and start over.
if (guess_length <= 0) {
- return ElideText(CutString(text, guess / 2, elide_in_middle, false),
+ return ElideText(slicer.CutString(guess / 2, false),
font, available_pixel_width, elide_behavior);
}
if (guess_length > available_pixel_width)
@@ -394,7 +438,7 @@ string16 ElideText(const string16& text,
lo = guess + 1;
}
- return CutString(text, guess, elide_in_middle, insert_ellipsis);
+ return slicer.CutString(guess, insert_ellipsis);
}
SortedDisplayURL::SortedDisplayURL(const GURL& url,
diff --git a/ui/base/text/text_elider_unittest.cc b/ui/base/text/text_elider_unittest.cc
index b716a00..439db7b 100644
--- a/ui/base/text/text_elider_unittest.cc
+++ b/ui/base/text/text_elider_unittest.cc
@@ -237,6 +237,51 @@ TEST(TextEliderTest, ElideTextTruncate) {
}
}
+// Checks that all occurrences of |first_char| are followed by |second_char| and
+// all occurrences of |second_char| are preceded by |first_char| in |text|.
+static void CheckSurrogatePairs(const string16& text,
+ char16 first_char,
+ char16 second_char) {
+ size_t index = text.find_first_of(first_char);
+ while (index != string16::npos) {
+ EXPECT_LT(index, text.length() - 1);
+ EXPECT_EQ(second_char, text[index + 1]);
+ index = text.find_first_of(first_char, index + 1);
+ }
+ index = text.find_first_of(second_char);
+ while (index != string16::npos) {
+ EXPECT_GT(index, 0U);
+ EXPECT_EQ(first_char, text[index - 1]);
+ index = text.find_first_of(second_char, index + 1);
+ }
+}
+
+TEST(TextEliderTest, ElideTextSurrogatePairs) {
+ const gfx::Font font;
+ // The below is 'MUSICAL SYMBOL G CLEF', which is represented in UTF-16 as
+ // two characters forming a surrogate pair 0x0001D11E.
+ const std::string kSurrogate = "\xF0\x9D\x84\x9E";
+ const string16 kTestString =
+ UTF8ToUTF16(kSurrogate + "ab" + kSurrogate + kSurrogate + "cd");
+ const int kTestStringWidth = font.GetStringWidth(kTestString);
+ const char16 kSurrogateFirstChar = kTestString[0];
+ const char16 kSurrogateSecondChar = kTestString[1];
+ string16 result;
+
+ // Elide |kTextString| to all possible widths and check that no instance of
+ // |kSurrogate| was split in two.
+ for (int width = 0; width <= kTestStringWidth; width++) {
+ result = ui::ElideText(kTestString, font, width, ui::TRUNCATE_AT_END);
+ CheckSurrogatePairs(result, kSurrogateFirstChar, kSurrogateSecondChar);
+
+ result = ui::ElideText(kTestString, font, width, ui::ELIDE_AT_END);
+ CheckSurrogatePairs(result, kSurrogateFirstChar, kSurrogateSecondChar);
+
+ result = ui::ElideText(kTestString, font, width, ui::ELIDE_IN_MIDDLE);
+ CheckSurrogatePairs(result, kSurrogateFirstChar, kSurrogateSecondChar);
+ }
+}
+
TEST(TextEliderTest, ElideTextLongStrings) {
const string16 kEllipsisStr = UTF8ToUTF16(kEllipsis);
string16 data_scheme(UTF8ToUTF16("data:text/plain,"));