diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-09-11 00:42:28 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-09-11 00:42:28 +0000 |
commit | cf7ca8aba2960aa5ba1b7accda08ba045a60c98d (patch) | |
tree | 51e5f8dfd75a5efa9eb46c03fa2b59f073c2a43a /base/strings | |
parent | b8982bf7ec898420ced6999746dbc20a06fa0aff (diff) | |
download | chromium_src-cf7ca8aba2960aa5ba1b7accda08ba045a60c98d.zip chromium_src-cf7ca8aba2960aa5ba1b7accda08ba045a60c98d.tar.gz chromium_src-cf7ca8aba2960aa5ba1b7accda08ba045a60c98d.tar.bz2 |
Switch the offset conversion routines from an "offsets point at characters"
worldview to an "offsets point between characters" worldview.
This more closely aligns with how the omnibox autocomplete code (which is what
this was originally written for) expects things to behave.
Direct fallout from this change:
* An input offset of 0 will always map to an output offset of 0.
* An input offset of (length of string) will always map to the length of the
output string, instead of npos.
* It's possible for multiple unique input offsets to map to a single non-npos
output offset, if they e.g. point to the start and end of a collapsed
sequence.
* Input offsets pointing into the middle of a completely-removed sequence may
not be set to npos if they fall on the boundaries of a subsequence processed
by the transformer. For example, when running FormatUrlWithOffsets() on
"http://user:pass@domain.com/" and directing it to omit both the scheme and
username/password, an input offset of "7" that points in between the scheme
and the username/password will be transformed to an output offset of 0
instead of npos.
Indirect fallout:
* A caller like SearchProvider::NavigationToMatch() will now mark certain
matches as "allowed to be default" that it didn't before. Specifically, if
the user's input string ends at the same point as the desired
|fill_into_edit|, the autocomplete offset will be calculated as (length of
string) instead of npos, and thus the match will be thought of as "inlinable"
and thus "allowed to be default".
BUG=284781
TEST=none
R=msw@chromium.org, willchan@chromium.org
Review URL: https://codereview.chromium.org/23619016
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@222426 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/strings')
-rw-r--r-- | base/strings/utf_offset_string_conversions.cc | 4 | ||||
-rw-r--r-- | base/strings/utf_offset_string_conversions.h | 20 | ||||
-rw-r--r-- | base/strings/utf_offset_string_conversions_unittest.cc | 42 |
3 files changed, 39 insertions, 27 deletions
diff --git a/base/strings/utf_offset_string_conversions.cc b/base/strings/utf_offset_string_conversions.cc index bb402e4..339bd5e 100644 --- a/base/strings/utf_offset_string_conversions.cc +++ b/base/strings/utf_offset_string_conversions.cc @@ -148,10 +148,6 @@ void OffsetAdjuster::AdjustOffset(std::vector<size_t>::iterator offset) { size_t adjustment = 0; for (std::vector<Adjustment>::const_iterator i = adjustments_.begin(); i != adjustments_.end(); ++i) { - if (*offset == i->original_offset && i->output_length == 0) { - *offset = string16::npos; - return; - } if (*offset <= i->original_offset) break; if (*offset < (i->original_offset + i->original_length)) { diff --git a/base/strings/utf_offset_string_conversions.h b/base/strings/utf_offset_string_conversions.h index 1b615f4..bdb7c11 100644 --- a/base/strings/utf_offset_string_conversions.h +++ b/base/strings/utf_offset_string_conversions.h @@ -15,11 +15,15 @@ namespace base { // Like the conversions in utf_string_conversions.h, but also takes one or more -// offsets (|offset[s]_for_adjustment|) into the source strings, each offset -// will be adjusted to point at the same logical place in the result strings. -// If this isn't possible because an offset points past the end of the source -// strings or into the middle of a multibyte sequence, the offending offset will -// be set to string16::npos. |offset[s]_for_adjustment| may be NULL. +// |offset[s]_for_adjustment| representing insertion/selection points between +// characters: if |src| is "abcd", then 0 is before 'a', 2 is between 'b' and +// 'c', and 4 is at the end of the string. Valid input offsets range from 0 to +// |src_len|. On exit, each offset will have been modified to point at the same +// logical position in the output string. If an offset cannot be successfully +// adjusted (e.g. because it points into the middle of a multibyte sequence), it +// will be set to string16::npos. +// +// |offset[s]_for_adjustment| may be NULL. BASE_EXPORT bool UTF8ToUTF16AndAdjustOffset(const char* src, size_t src_len, string16* output, @@ -44,14 +48,16 @@ BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets( std::vector<size_t>* offsets_for_adjustment); // Limiting function callable by std::for_each which will replace any value -// which is equal to or greater than |limit| with npos. +// which is greater than |limit| with npos. Typically this is called with a +// string length to clamp offsets into the string to [0, length] (as opposed to +// [0, length); see comments above). template <typename T> struct LimitOffset { explicit LimitOffset(size_t limit) : limit_(limit) {} void operator()(size_t& offset) { - if (offset >= limit_) + if (offset > limit_) offset = T::npos; } diff --git a/base/strings/utf_offset_string_conversions_unittest.cc b/base/strings/utf_offset_string_conversions_unittest.cc index 5545c0d..7626e4c 100644 --- a/base/strings/utf_offset_string_conversions_unittest.cc +++ b/base/strings/utf_offset_string_conversions_unittest.cc @@ -23,13 +23,16 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffset) { size_t input_offset; size_t output_offset; } utf8_to_utf16_cases[] = { - {"", 0, kNpos}, + {"", 0, 0}, + {"", kNpos, kNpos}, {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, kNpos}, {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1}, {"\xed\xb0\x80z", 3, 1}, {"A\xF0\x90\x8C\x80z", 1, 1}, {"A\xF0\x90\x8C\x80z", 2, kNpos}, {"A\xF0\x90\x8C\x80z", 5, 3}, + {"A\xF0\x90\x8C\x80z", 6, 4}, + {"A\xF0\x90\x8C\x80z", kNpos, kNpos}, }; for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_utf16_cases); ++i) { size_t offset = utf8_to_utf16_cases[i].input_offset; @@ -42,18 +45,22 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffset) { size_t input_offset; size_t output_offset; } utf16_to_utf8_cases[] = { - {{}, 0, kNpos}, + {{}, 0, 0}, // Converted to 3-byte utf-8 sequences - {{0x5909, 0x63DB}, 2, kNpos}, + {{0x5909, 0x63DB}, 3, kNpos}, + {{0x5909, 0x63DB}, 2, 6}, {{0x5909, 0x63DB}, 1, 3}, + {{0x5909, 0x63DB}, 0, 0}, // Converted to 2-byte utf-8 sequences {{'A', 0x00bc, 0x00be, 'z'}, 1, 1}, {{'A', 0x00bc, 0x00be, 'z'}, 2, 3}, {{'A', 0x00bc, 0x00be, 'z'}, 3, 5}, + {{'A', 0x00bc, 0x00be, 'z'}, 4, 6}, // Surrogate pair {{'A', 0xd800, 0xdf00, 'z'}, 1, 1}, {{'A', 0xd800, 0xdf00, 'z'}, 2, kNpos}, {{'A', 0xd800, 0xdf00, 'z'}, 3, 5}, + {{'A', 0xd800, 0xdf00, 'z'}, 4, 6}, }; for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_utf8_cases); ++i) { size_t offset = utf16_to_utf8_cases[i].input_offset; @@ -73,10 +80,10 @@ TEST(UTFOffsetStringConversionsTest, LimitOffsets) { size_t unlimited_count = 0; for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end(); ++ti) { - if (*ti < kLimit && *ti != kNpos) + if (*ti != kNpos) ++unlimited_count; } - EXPECT_EQ(10U, unlimited_count); + EXPECT_EQ(11U, unlimited_count); // Reverse the values in the vector and try again. size_ts.clear(); @@ -87,10 +94,10 @@ TEST(UTFOffsetStringConversionsTest, LimitOffsets) { unlimited_count = 0; for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end(); ++ti) { - if (*ti < kLimit && *ti != kNpos) + if (*ti != kNpos) ++unlimited_count; } - EXPECT_EQ(10U, unlimited_count); + EXPECT_EQ(11U, unlimited_count); } TEST(UTFOffsetStringConversionsTest, AdjustOffsets) { @@ -99,13 +106,13 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffsets) { // 1: abcXXXdef ==> abcXdef { std::vector<size_t> offsets; - for (size_t t = 0; t < 9; ++t) + for (size_t t = 0; t <= 9; ++t) offsets.push_back(t); { OffsetAdjuster offset_adjuster(&offsets); offset_adjuster.Add(OffsetAdjuster::Adjustment(3, 3, 1)); } - size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6}; + size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6, 7}; EXPECT_EQ(offsets.size(), arraysize(expected_1)); for (size_t i = 0; i < arraysize(expected_1); ++i) EXPECT_EQ(expected_1[i], offsets[i]); @@ -114,7 +121,7 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffsets) { // 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX { std::vector<size_t> offsets; - for (size_t t = 0; t < 23; ++t) + for (size_t t = 0; t <= 23; ++t) offsets.push_back(t); { OffsetAdjuster offset_adjuster(&offsets); @@ -123,9 +130,10 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffsets) { offset_adjuster.Add(OffsetAdjuster::Adjustment(10, 7, 4)); offset_adjuster.Add(OffsetAdjuster::Adjustment(20, 3, 1)); } - size_t expected_2[] = {0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6, - kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 10, 11, 12, - 13, kNpos, kNpos}; + size_t expected_2[] = { + 0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6, kNpos, kNpos, kNpos, + kNpos, kNpos, kNpos, 10, 11, 12, 13, kNpos, kNpos, 14 + }; EXPECT_EQ(offsets.size(), arraysize(expected_2)); for (size_t i = 0; i < arraysize(expected_2); ++i) EXPECT_EQ(expected_2[i], offsets[i]); @@ -134,7 +142,7 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffsets) { // 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe { std::vector<size_t> offsets; - for (size_t t = 0; t < 17; ++t) + for (size_t t = 0; t <= 17; ++t) offsets.push_back(t); { OffsetAdjuster offset_adjuster(&offsets); @@ -143,8 +151,10 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffsets) { offset_adjuster.Add(OffsetAdjuster::Adjustment(11, 3, 3)); offset_adjuster.Add(OffsetAdjuster::Adjustment(15, 2, 0)); } - size_t expected_3[] = {kNpos, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6, - 7, 8, kNpos, kNpos, 11, kNpos, kNpos}; + size_t expected_3[] = { + 0, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6, 7, 8, kNpos, kNpos, 11, + 12, kNpos, 12 + }; EXPECT_EQ(offsets.size(), arraysize(expected_3)); for (size_t i = 0; i < arraysize(expected_3); ++i) EXPECT_EQ(expected_3[i], offsets[i]); |