Switch the offset conversion routines from an "offsets point at characters"

worldview to an "offsets point between characters" worldview. This more closely aligns with how the omnibox autocomplete code (which is what this was originally written for) expects things to behave. Direct fallout from this change: * An input offset of 0 will always map to an output offset of 0. * An input offset of (length of string) will always map to the length of the output string, instead of npos. * It's possible for multiple unique input offsets to map to a single non-npos output offset, if they e.g. point to the start and end of a collapsed sequence. * Input offsets pointing into the middle of a completely-removed sequence may not be set to npos if they fall on the boundaries of a subsequence processed by the transformer. For example, when running FormatUrlWithOffsets() on "http://user:pass@domain.com/" and directing it to omit both the scheme and username/password, an input offset of "7" that points in between the scheme and the username/password will be transformed to an output offset of 0 instead of npos. Indirect fallout: * A caller like SearchProvider::NavigationToMatch() will now mark certain matches as "allowed to be default" that it didn't before. Specifically, if the user's input string ends at the same point as the desired |fill_into_edit|, the autocomplete offset will be calculated as (length of string) instead of npos, and thus the match will be thought of as "inlinable" and thus "allowed to be default". BUG=284781 TEST=none R=msw@chromium.org, willchan@chromium.org Review URL: https://codereview.chromium.org/23619016 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@222426 0039d316-1c4b-4281-b951-d872f2087c98
author: pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-09-11 00:42:28 +0000
committer: pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-09-11 00:42:28 +0000
commit: cf7ca8aba2960aa5ba1b7accda08ba045a60c98d (patch)
tree: 51e5f8dfd75a5efa9eb46c03fa2b59f073c2a43a /base/strings
parent: b8982bf7ec898420ced6999746dbc20a06fa0aff (diff)
download: chromium_src-cf7ca8aba2960aa5ba1b7accda08ba045a60c98d.zip
chromium_src-cf7ca8aba2960aa5ba1b7accda08ba045a60c98d.tar.gz
chromium_src-cf7ca8aba2960aa5ba1b7accda08ba045a60c98d.tar.bz2
3 files changed, 39 insertions, 27 deletions
diff --git a/base/strings/utf_offset_string_conversions.cc b/base/strings/utf_offset_string_conversions.cc
index bb402e4..339bd5e 100644
--- a/base/strings/utf_offset_string_conversions.cc
+++ b/base/strings/utf_offset_string_conversions.cc
@@ -148,10 +148,6 @@ void OffsetAdjuster::AdjustOffset(std::vector<size_t>::iterator offset) {
   size_t adjustment = 0;
   for (std::vector<Adjustment>::const_iterator i = adjustments_.begin();
        i != adjustments_.end(); ++i) {
-    if (*offset == i->original_offset && i->output_length == 0) {
-      *offset = string16::npos;
-      return;
-    }
     if (*offset <= i->original_offset)
       break;
     if (*offset < (i->original_offset + i->original_length)) {
diff --git a/base/strings/utf_offset_string_conversions.h b/base/strings/utf_offset_string_conversions.h
index 1b615f4..bdb7c11 100644
--- a/base/strings/utf_offset_string_conversions.h
+++ b/base/strings/utf_offset_string_conversions.h
@@ -15,11 +15,15 @@
 namespace base {
 
 // Like the conversions in utf_string_conversions.h, but also takes one or more
-// offsets (|offset[s]_for_adjustment|) into the source strings, each offset
-// will be adjusted to point at the same logical place in the result strings.
-// If this isn't possible because an offset points past the end of the source
-// strings or into the middle of a multibyte sequence, the offending offset will
-// be set to string16::npos. |offset[s]_for_adjustment| may be NULL.
+// |offset[s]_for_adjustment| representing insertion/selection points between
+// characters: if |src| is "abcd", then 0 is before 'a', 2 is between 'b' and
+// 'c', and 4 is at the end of the string.  Valid input offsets range from 0 to
+// |src_len|.  On exit, each offset will have been modified to point at the same
+// logical position in the output string.  If an offset cannot be successfully
+// adjusted (e.g. because it points into the middle of a multibyte sequence), it
+// will be set to string16::npos.
+//
+// |offset[s]_for_adjustment| may be NULL.
 BASE_EXPORT bool UTF8ToUTF16AndAdjustOffset(const char* src,
                                             size_t src_len,
                                             string16* output,
@@ -44,14 +48,16 @@ BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets(
     std::vector<size_t>* offsets_for_adjustment);
 
 // Limiting function callable by std::for_each which will replace any value
-// which is equal to or greater than |limit| with npos.
+// which is greater than |limit| with npos.  Typically this is called with a
+// string length to clamp offsets into the string to [0, length] (as opposed to
+// [0, length); see comments above).
 template <typename T>
 struct LimitOffset {
   explicit LimitOffset(size_t limit)
     : limit_(limit) {}
 
   void operator()(size_t& offset) {
-    if (offset >= limit_)
+    if (offset > limit_)
       offset = T::npos;
   }
 
diff --git a/base/strings/utf_offset_string_conversions_unittest.cc b/base/strings/utf_offset_string_conversions_unittest.cc
index 5545c0d..7626e4c 100644
--- a/base/strings/utf_offset_string_conversions_unittest.cc
+++ b/base/strings/utf_offset_string_conversions_unittest.cc
@@ -23,13 +23,16 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffset) {
     size_t input_offset;
     size_t output_offset;
   } utf8_to_utf16_cases[] = {
-    {"", 0, kNpos},
+    {"", 0, 0},
+    {"", kNpos, kNpos},
     {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, kNpos},
     {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1},
     {"\xed\xb0\x80z", 3, 1},
     {"A\xF0\x90\x8C\x80z", 1, 1},
     {"A\xF0\x90\x8C\x80z", 2, kNpos},
     {"A\xF0\x90\x8C\x80z", 5, 3},
+    {"A\xF0\x90\x8C\x80z", 6, 4},
+    {"A\xF0\x90\x8C\x80z", kNpos, kNpos},
   };
   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_utf16_cases); ++i) {
     size_t offset = utf8_to_utf16_cases[i].input_offset;
@@ -42,18 +45,22 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffset) {
     size_t input_offset;
     size_t output_offset;
   } utf16_to_utf8_cases[] = {
-      {{}, 0, kNpos},
+      {{}, 0, 0},
       // Converted to 3-byte utf-8 sequences
-      {{0x5909, 0x63DB}, 2, kNpos},
+      {{0x5909, 0x63DB}, 3, kNpos},
+      {{0x5909, 0x63DB}, 2, 6},
       {{0x5909, 0x63DB}, 1, 3},
+      {{0x5909, 0x63DB}, 0, 0},
       // Converted to 2-byte utf-8 sequences
       {{'A', 0x00bc, 0x00be, 'z'}, 1, 1},
       {{'A', 0x00bc, 0x00be, 'z'}, 2, 3},
       {{'A', 0x00bc, 0x00be, 'z'}, 3, 5},
+      {{'A', 0x00bc, 0x00be, 'z'}, 4, 6},
       // Surrogate pair
       {{'A', 0xd800, 0xdf00, 'z'}, 1, 1},
       {{'A', 0xd800, 0xdf00, 'z'}, 2, kNpos},
       {{'A', 0xd800, 0xdf00, 'z'}, 3, 5},
+      {{'A', 0xd800, 0xdf00, 'z'}, 4, 6},
   };
   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_utf8_cases); ++i) {
     size_t offset = utf16_to_utf8_cases[i].input_offset;
@@ -73,10 +80,10 @@ TEST(UTFOffsetStringConversionsTest, LimitOffsets) {
   size_t unlimited_count = 0;
   for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
        ++ti) {
-    if (*ti < kLimit && *ti != kNpos)
+    if (*ti != kNpos)
       ++unlimited_count;
   }
-  EXPECT_EQ(10U, unlimited_count);
+  EXPECT_EQ(11U, unlimited_count);
 
   // Reverse the values in the vector and try again.
   size_ts.clear();
@@ -87,10 +94,10 @@ TEST(UTFOffsetStringConversionsTest, LimitOffsets) {
   unlimited_count = 0;
   for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
        ++ti) {
-    if (*ti < kLimit && *ti != kNpos)
+    if (*ti != kNpos)
       ++unlimited_count;
   }
-  EXPECT_EQ(10U, unlimited_count);
+  EXPECT_EQ(11U, unlimited_count);
 }
 
 TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
@@ -99,13 +106,13 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
   // 1: abcXXXdef ==> abcXdef
   {
     std::vector<size_t> offsets;
-    for (size_t t = 0; t < 9; ++t)
+    for (size_t t = 0; t <= 9; ++t)
       offsets.push_back(t);
     {
       OffsetAdjuster offset_adjuster(&offsets);
       offset_adjuster.Add(OffsetAdjuster::Adjustment(3, 3, 1));
     }
-    size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
+    size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6, 7};
     EXPECT_EQ(offsets.size(), arraysize(expected_1));
     for (size_t i = 0; i < arraysize(expected_1); ++i)
       EXPECT_EQ(expected_1[i], offsets[i]);
@@ -114,7 +121,7 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
   // 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX
   {
     std::vector<size_t> offsets;
-    for (size_t t = 0; t < 23; ++t)
+    for (size_t t = 0; t <= 23; ++t)
       offsets.push_back(t);
     {
       OffsetAdjuster offset_adjuster(&offsets);
@@ -123,9 +130,10 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
       offset_adjuster.Add(OffsetAdjuster::Adjustment(10, 7, 4));
       offset_adjuster.Add(OffsetAdjuster::Adjustment(20, 3, 1));
     }
-    size_t expected_2[] = {0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6,
-                           kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 10, 11, 12,
-                           13, kNpos, kNpos};
+    size_t expected_2[] = {
+      0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6, kNpos, kNpos, kNpos,
+      kNpos, kNpos, kNpos, 10, 11, 12, 13, kNpos, kNpos, 14
+    };
     EXPECT_EQ(offsets.size(), arraysize(expected_2));
     for (size_t i = 0; i < arraysize(expected_2); ++i)
       EXPECT_EQ(expected_2[i], offsets[i]);
@@ -134,7 +142,7 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
   // 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe
   {
     std::vector<size_t> offsets;
-    for (size_t t = 0; t < 17; ++t)
+    for (size_t t = 0; t <= 17; ++t)
       offsets.push_back(t);
     {
       OffsetAdjuster offset_adjuster(&offsets);
@@ -143,8 +151,10 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
       offset_adjuster.Add(OffsetAdjuster::Adjustment(11, 3, 3));
       offset_adjuster.Add(OffsetAdjuster::Adjustment(15, 2, 0));
     }
-    size_t expected_3[] = {kNpos, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6,
-                           7, 8, kNpos, kNpos, 11, kNpos, kNpos};
+    size_t expected_3[] = {
+      0, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6, 7, 8, kNpos, kNpos, 11,
+      12, kNpos, 12
+    };
     EXPECT_EQ(offsets.size(), arraysize(expected_3));
     for (size_t i = 0; i < arraysize(expected_3); ++i)
       EXPECT_EQ(expected_3[i], offsets[i]);
author	pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-09-11 00:42:28 +0000
committer	pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-09-11 00:42:28 +0000
commit	cf7ca8aba2960aa5ba1b7accda08ba045a60c98d (patch)
tree	51e5f8dfd75a5efa9eb46c03fa2b59f073c2a43a /base/strings
parent	b8982bf7ec898420ced6999746dbc20a06fa0aff (diff)
download	chromium_src-cf7ca8aba2960aa5ba1b7accda08ba045a60c98d.zip chromium_src-cf7ca8aba2960aa5ba1b7accda08ba045a60c98d.tar.gz chromium_src-cf7ca8aba2960aa5ba1b7accda08ba045a60c98d.tar.bz2