diff options
author | asvitkine@chromium.org <asvitkine@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-12-15 22:09:20 +0000 |
---|---|---|
committer | asvitkine@chromium.org <asvitkine@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-12-15 22:09:20 +0000 |
commit | da06e3089a9649187c7c93e115376c9b23c96cb7 (patch) | |
tree | 696eac9ae9561add157c724197966dbc74aeedaf /ui | |
parent | 862deef098a8f161cb9926363bd4786f83fdbdc9 (diff) | |
download | chromium_src-da06e3089a9649187c7c93e115376c9b23c96cb7.zip chromium_src-da06e3089a9649187c7c93e115376c9b23c96cb7.tar.gz chromium_src-da06e3089a9649187c7c93e115376c9b23c96cb7.tar.bz2 |
Fix |ElideText()| to handle UTF16 surrogate pairs correctly.
BUG=107703
TEST=New unit test in text_elider_unittest.cc.
Review URL: http://codereview.chromium.org/8910018
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@114705 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'ui')
-rw-r--r-- | ui/base/text/text_elider.cc | 96 | ||||
-rw-r--r-- | ui/base/text/text_elider_unittest.cc | 45 |
2 files changed, 115 insertions, 26 deletions
diff --git a/ui/base/text/text_elider.cc b/ui/base/text/text_elider.cc index 13ee279..08435a2 100644 --- a/ui/base/text/text_elider.cc +++ b/ui/base/text/text_elider.cc @@ -32,26 +32,66 @@ const char16 kForwardSlash = '/'; namespace { -// Cuts |text| to be |length| characters long. If |cut_in_middle| is true, the -// middle of the string is removed to leave equal-length pieces from the -// beginning and end of the string; otherwise, the end of the string is removed -// and only the beginning remains. If |insert_ellipsis| is true, then an -// ellipsis character will by inserted at the cut point. -string16 CutString(const string16& text, - size_t length, - bool cut_in_middle, - bool insert_ellipsis) { - // TODO(tony): This is wrong, it might split the string in the middle of a - // surrogate pair. - const string16 kInsert = insert_ellipsis ? UTF8ToUTF16(kEllipsis) : - string16(); - if (!cut_in_middle) - return text.substr(0, length) + kInsert; - // We put the extra character, if any, before the cut. - const size_t half_length = length / 2; - return text.substr(0, length - half_length) + kInsert + - text.substr(text.length() - half_length, half_length); -} +// Helper class to split + elide text, while respecting UTF16 surrogate pairs. +class StringSlicer { + public: + StringSlicer(const string16& text, + const string16& ellipsis, + bool elide_in_middle) + : text_(text), + ellipsis_(ellipsis), + elide_in_middle_(elide_in_middle) { + } + + // Cuts |text_| to be |length| characters long. If |cut_in_middle_| is true, + // the middle of the string is removed to leave equal-length pieces from the + // beginning and end of the string; otherwise, the end of the string is + // removed and only the beginning remains. If |insert_ellipsis| is true, + // then an ellipsis character will by inserted at the cut point. + string16 CutString(size_t length, bool insert_ellipsis) { + const string16 kInsert = insert_ellipsis ? ellipsis_ : string16(); + + if (!elide_in_middle_) + return text_.substr(0, FindValidBoundaryBefore(length)) + kInsert; + + // We put the extra character, if any, before the cut. + size_t half_length = length / 2; + size_t prefix_length = FindValidBoundaryBefore(length - half_length); + size_t suffix_start_guess = text_.length() - half_length; + size_t suffix_start = FindValidBoundaryAfter(suffix_start_guess); + size_t suffix_length = half_length - (suffix_start_guess - suffix_start); + return text_.substr(0, prefix_length) + kInsert + + text_.substr(suffix_start, suffix_length); + } + + private: + // Returns a valid cut boundary at or before |index|. + size_t FindValidBoundaryBefore(size_t index) { + DCHECK_LE(index, text_.length()); + if (index != text_.length()) + U16_SET_CP_START(text_.data(), 0, index); + return index; + } + + // Returns a valid cut boundary at or after |index|. + size_t FindValidBoundaryAfter(size_t index) { + DCHECK_LE(index, text_.length()); + if (index != text_.length()) + U16_SET_CP_LIMIT(text_.data(), 0, index, text_.length()); + return index; + } + + // The text to be sliced. + const string16& text_; + + // Ellipsis string to use. + const string16& ellipsis_; + + // If true, the middle of the string will be elided. + bool elide_in_middle_; + + DISALLOW_COPY_AND_ASSIGN(StringSlicer); +}; // Build a path from the first |num_components| elements in |path_elements|. // Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate. @@ -350,10 +390,14 @@ string16 ElideText(const string16& text, if (text.empty()) return text; + const string16 kEllipsisUTF16 = UTF8ToUTF16(kEllipsis); + int current_text_pixel_width = font.GetStringWidth(text); bool elide_in_middle = (elide_behavior == ui::ELIDE_IN_MIDDLE); bool insert_ellipsis = (elide_behavior != ui::TRUNCATE_AT_END); + StringSlicer slicer(text, kEllipsisUTF16, elide_in_middle); + // Pango will return 0 width for absurdly long strings. Cut the string in // half and try again. // This is caused by an int overflow in Pango (specifically, in @@ -363,14 +407,14 @@ string16 ElideText(const string16& text, // (eliding way too much from a ridiculous string is probably still // ridiculous), but we should check other widths for bogus values as well. if (current_text_pixel_width <= 0 && !text.empty()) { - return ElideText(CutString(text, text.length() / 2, elide_in_middle, false), - font, available_pixel_width, elide_behavior); + string16 cut = slicer.CutString(text.length() / 2, false); + return ElideText(cut, font, available_pixel_width, elide_behavior); } if (current_text_pixel_width <= available_pixel_width) return text; - if (font.GetStringWidth(UTF8ToUTF16(kEllipsis)) > available_pixel_width) + if (font.GetStringWidth(kEllipsisUTF16) > available_pixel_width) return string16(); // Use binary search to compute the elided text. @@ -380,12 +424,12 @@ string16 ElideText(const string16& text, for (guess = (lo + hi) / 2; lo <= hi; guess = (lo + hi) / 2) { // We check the length of the whole desired string at once to ensure we // handle kerning/ligatures/etc. correctly. - string16 cut = CutString(text, guess, elide_in_middle, insert_ellipsis); + string16 cut = slicer.CutString(guess, insert_ellipsis); int guess_length = font.GetStringWidth(cut); // Check again that we didn't hit a Pango width overflow. If so, cut the // current string in half and start over. if (guess_length <= 0) { - return ElideText(CutString(text, guess / 2, elide_in_middle, false), + return ElideText(slicer.CutString(guess / 2, false), font, available_pixel_width, elide_behavior); } if (guess_length > available_pixel_width) @@ -394,7 +438,7 @@ string16 ElideText(const string16& text, lo = guess + 1; } - return CutString(text, guess, elide_in_middle, insert_ellipsis); + return slicer.CutString(guess, insert_ellipsis); } SortedDisplayURL::SortedDisplayURL(const GURL& url, diff --git a/ui/base/text/text_elider_unittest.cc b/ui/base/text/text_elider_unittest.cc index b716a00..439db7b 100644 --- a/ui/base/text/text_elider_unittest.cc +++ b/ui/base/text/text_elider_unittest.cc @@ -237,6 +237,51 @@ TEST(TextEliderTest, ElideTextTruncate) { } } +// Checks that all occurrences of |first_char| are followed by |second_char| and +// all occurrences of |second_char| are preceded by |first_char| in |text|. +static void CheckSurrogatePairs(const string16& text, + char16 first_char, + char16 second_char) { + size_t index = text.find_first_of(first_char); + while (index != string16::npos) { + EXPECT_LT(index, text.length() - 1); + EXPECT_EQ(second_char, text[index + 1]); + index = text.find_first_of(first_char, index + 1); + } + index = text.find_first_of(second_char); + while (index != string16::npos) { + EXPECT_GT(index, 0U); + EXPECT_EQ(first_char, text[index - 1]); + index = text.find_first_of(second_char, index + 1); + } +} + +TEST(TextEliderTest, ElideTextSurrogatePairs) { + const gfx::Font font; + // The below is 'MUSICAL SYMBOL G CLEF', which is represented in UTF-16 as + // two characters forming a surrogate pair 0x0001D11E. + const std::string kSurrogate = "\xF0\x9D\x84\x9E"; + const string16 kTestString = + UTF8ToUTF16(kSurrogate + "ab" + kSurrogate + kSurrogate + "cd"); + const int kTestStringWidth = font.GetStringWidth(kTestString); + const char16 kSurrogateFirstChar = kTestString[0]; + const char16 kSurrogateSecondChar = kTestString[1]; + string16 result; + + // Elide |kTextString| to all possible widths and check that no instance of + // |kSurrogate| was split in two. + for (int width = 0; width <= kTestStringWidth; width++) { + result = ui::ElideText(kTestString, font, width, ui::TRUNCATE_AT_END); + CheckSurrogatePairs(result, kSurrogateFirstChar, kSurrogateSecondChar); + + result = ui::ElideText(kTestString, font, width, ui::ELIDE_AT_END); + CheckSurrogatePairs(result, kSurrogateFirstChar, kSurrogateSecondChar); + + result = ui::ElideText(kTestString, font, width, ui::ELIDE_IN_MIDDLE); + CheckSurrogatePairs(result, kSurrogateFirstChar, kSurrogateSecondChar); + } +} + TEST(TextEliderTest, ElideTextLongStrings) { const string16 kEllipsisStr = UTF8ToUTF16(kEllipsis); string16 data_scheme(UTF8ToUTF16("data:text/plain,")); |