Revert 81343 - Add multiple-offset versions of the various URL reformatting functions. Fixed a couple of erroneous unit tests of offsets into username/password.Note: This does not complete the work required for 78153 -- tis but the first 2/3rds.BUG=78153TEST=Many unit tests updated and added.Review URL: http://codereview.chromium.org/6822038

TBR=mrossetti@chromium.org Review URL: http://codereview.chromium.org/6833011 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@81348 0039d316-1c4b-4281-b951-d872f2087c98
author: mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-04-13 01:11:50 +0000
committer: mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-04-13 01:11:50 +0000
commit: f7b09de5cca6923a03f20be3616830a0d3fafa88 (patch)
tree: 8fbfe4c736468da5a4b5a7203414a32add49f9a1 /base
parent: 4069944681ad77bcc80b5299aff0820a667b447e (diff)
download: chromium_src-f7b09de5cca6923a03f20be3616830a0d3fafa88.zip
chromium_src-f7b09de5cca6923a03f20be3616830a0d3fafa88.tar.gz
chromium_src-f7b09de5cca6923a03f20be3616830a0d3fafa88.tar.bz2
3 files changed, 33 insertions, 306 deletions
diff --git a/base/utf_offset_string_conversions.cc b/base/utf_offset_string_conversions.cc
index f091cb4..4c47ef8 100644
--- a/base/utf_offset_string_conversions.cc
+++ b/base/utf_offset_string_conversions.cc
@@ -1,12 +1,9 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 #include "base/utf_offset_string_conversions.h"
 
-#include <algorithm>
-
-#include "base/scoped_ptr.h"
 #include "base/string_piece.h"
 #include "base/utf_string_conversion_utils.h"
 
@@ -24,16 +21,13 @@ template<typename SRC_CHAR>
 bool ConvertUnicode(const SRC_CHAR* src,
                     size_t src_len,
                     std::wstring* output,
-                    std::vector<size_t>* offsets_for_adjustment) {
-  if (offsets_for_adjustment) {
-    std::for_each(offsets_for_adjustment->begin(),
-                  offsets_for_adjustment->end(),
-                  LimitOffset<std::wstring>(src_len));
-  }
+                    size_t* offset_for_adjustment) {
+  size_t output_offset =
+      (offset_for_adjustment && *offset_for_adjustment < src_len) ?
+          *offset_for_adjustment : std::wstring::npos;
 
   // ICU requires 32-bit numbers.
   bool success = true;
-  AdjustOffset::Adjustments adjustments;
   int32 src_len32 = static_cast<int32>(src_len);
   for (int32 i = 0; i < src_len32; i++) {
     uint32 code_point;
@@ -45,23 +39,21 @@ bool ConvertUnicode(const SRC_CHAR* src,
       chars_written = WriteUnicodeCharacter(0xFFFD, output);
       success = false;
     }
-    if (offsets_for_adjustment) {
+    if ((output_offset != std::wstring::npos) &&
+        (*offset_for_adjustment > original_i)) {
       // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
       // character read, not after it (so that incrementing it in the loop
       // increment will place it at the right location), so we need to account
       // for that in determining the amount that was read.
-      adjustments.push_back(AdjustOffset::Adjustment(
-          original_i, i - original_i + 1, chars_written));
+      if (*offset_for_adjustment <= static_cast<size_t>(i))
+        output_offset = std::wstring::npos;
+      else
+        output_offset += chars_written - (i - original_i + 1);
     }
   }
 
-  // Make offset adjustment.
-  if (offsets_for_adjustment && !adjustments.empty()) {
-    std::for_each(offsets_for_adjustment->begin(),
-                  offsets_for_adjustment->end(),
-                  AdjustOffset(adjustments));
-  }
-
+  if (offset_for_adjustment)
+    *offset_for_adjustment = output_offset;
   return success;
 }
 
@@ -71,44 +63,16 @@ bool UTF8ToWideAndAdjustOffset(const char* src,
                                size_t src_len,
                                std::wstring* output,
                                size_t* offset_for_adjustment) {
-  std::vector<size_t> offsets;
-  if (offset_for_adjustment)
-    offsets.push_back(*offset_for_adjustment);
-  PrepareForUTF16Or32Output(src, src_len, output);
-  bool ret = ConvertUnicode(src, src_len, output, &offsets);
-  if (offset_for_adjustment)
-    *offset_for_adjustment = offsets[0];
-  return ret;
-}
-
-bool UTF8ToWideAndAdjustOffsets(const char* src,
-                                size_t src_len,
-                                std::wstring* output,
-                                std::vector<size_t>* offsets_for_adjustment) {
   PrepareForUTF16Or32Output(src, src_len, output);
-  return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
+  return ConvertUnicode(src, src_len, output, offset_for_adjustment);
 }
 
 std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
                                        size_t* offset_for_adjustment) {
-  std::vector<size_t> offsets;
-  if (offset_for_adjustment)
-    offsets.push_back(*offset_for_adjustment);
-  std::wstring result;
-  UTF8ToWideAndAdjustOffsets(utf8.data(), utf8.length(), &result,
-                             &offsets);
-  if (offset_for_adjustment)
-    *offset_for_adjustment = offsets[0];
-  return result;
-}
-
-std::wstring UTF8ToWideAndAdjustOffsets(const base::StringPiece& utf8,
-                                        std::vector<size_t>*
-                                            offsets_for_adjustment) {
-  std::wstring result;
-  UTF8ToWideAndAdjustOffsets(utf8.data(), utf8.length(), &result,
-                             offsets_for_adjustment);
-  return result;
+  std::wstring ret;
+  UTF8ToWideAndAdjustOffset(utf8.data(), utf8.length(), &ret,
+                            offset_for_adjustment);
+  return ret;
 }
 
 // UTF-16 <-> Wide -------------------------------------------------------------
@@ -126,19 +90,6 @@ bool UTF16ToWideAndAdjustOffset(const char16* src,
   return true;
 }
 
-bool UTF16ToWideAndAdjustOffsets(const char16* src,
-                                 size_t src_len,
-                                 std::wstring* output,
-                                 std::vector<size_t>* offsets_for_adjustment) {
-  output->assign(src, src_len);
-  if (offsets_for_adjustment) {
-    std::for_each(offsets_for_adjustment->begin(),
-                  offsets_for_adjustment->end(),
-                  LimitOffset<std::wstring>(src_len));
-  }
-  return true;
-}
-
 std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
                                         size_t* offset_for_adjustment) {
   if (offset_for_adjustment && (*offset_for_adjustment >= utf16.length()))
@@ -146,109 +97,25 @@ std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
   return utf16;
 }
 
-std::wstring UTF16ToWideAndAdjustOffsets(
-    const string16& utf16,
-    std::vector<size_t>* offsets_for_adjustment) {
-  if (offsets_for_adjustment) {
-    std::for_each(offsets_for_adjustment->begin(),
-                  offsets_for_adjustment->end(),
-                  LimitOffset<std::wstring>(utf16.length()));
-  }
-  return utf16;
-}
-
 #elif defined(WCHAR_T_IS_UTF32)
 
 bool UTF16ToWideAndAdjustOffset(const char16* src,
                                 size_t src_len,
                                 std::wstring* output,
                                 size_t* offset_for_adjustment) {
-  std::vector<size_t> offsets;
-  if (offset_for_adjustment)
-    offsets.push_back(*offset_for_adjustment);
-  output->clear();
-  // Assume that normally we won't have any non-BMP characters so the counts
-  // will be the same.
-  output->reserve(src_len);
-  bool ret = ConvertUnicode(src, src_len, output, &offsets);
-  if (offset_for_adjustment)
-    *offset_for_adjustment = offsets[0];
-  return ret;
-}
-
-bool UTF16ToWideAndAdjustOffsets(const char16* src,
-                                 size_t src_len,
-                                 std::wstring* output,
-                                 std::vector<size_t>* offsets_for_adjustment) {
   output->clear();
   // Assume that normally we won't have any non-BMP characters so the counts
   // will be the same.
   output->reserve(src_len);
-  return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
+  return ConvertUnicode(src, src_len, output, offset_for_adjustment);
 }
 
 std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
                                         size_t* offset_for_adjustment) {
-  std::vector<size_t> offsets;
-  if (offset_for_adjustment)
-    offsets.push_back(*offset_for_adjustment);
-  std::wstring result;
-  UTF16ToWideAndAdjustOffsets(utf16.data(), utf16.length(), &result,
-                              &offsets);
-  if (offset_for_adjustment)
-    *offset_for_adjustment = offsets[0];
-  return result;
-}
-
-std::wstring UTF16ToWideAndAdjustOffsets(
-    const string16& utf16,
-    std::vector<size_t>* offsets_for_adjustment) {
-  std::wstring result;
-  UTF16ToWideAndAdjustOffsets(utf16.data(), utf16.length(), &result,
-                              offsets_for_adjustment);
-  return result;
+  std::wstring ret;
+  UTF16ToWideAndAdjustOffset(utf16.data(), utf16.length(), &ret,
+                             offset_for_adjustment);
+  return ret;
 }
 
 #endif  // defined(WCHAR_T_IS_UTF32)
-
-template <typename T>
-LimitOffset<T>::LimitOffset(size_t limit)
-  : limit_(limit) {}
-
-template <typename T>
-void LimitOffset<T>::operator()(size_t& offset) {
-  if (offset >= limit_)
-    offset = T::npos;
-}
-
-AdjustOffset::Adjustment::Adjustment(size_t location,
-                                     size_t old_length,
-                                     size_t new_length)
-  : location(location),
-    old_length(old_length),
-    new_length(new_length) {}
-
-AdjustOffset::AdjustOffset(const Adjustments& adjustments)
-    : adjustments_(adjustments) {}
-
-void AdjustOffset::operator()(size_t& offset) {
-  if (offset == std::wstring::npos)
-    return;
-  size_t adjustment = 0;
-  for (Adjustments::const_iterator i = adjustments_.begin();
-       i != adjustments_.end(); ++i) {
-    size_t location = i->location;
-    if (offset == location && i->new_length == 0) {
-      offset = std::wstring::npos;
-      return;
-    }
-    if (offset <= location)
-      break;
-    if (offset < (location + i->old_length)) {
-      offset = std::wstring::npos;
-      return;
-    }
-    adjustment += (i->old_length - i->new_length);
-  }
-  offset -= adjustment;
-}
diff --git a/base/utf_offset_string_conversions.h b/base/utf_offset_string_conversions.h
index 19b312a..13df1b4 100644
--- a/base/utf_offset_string_conversions.h
+++ b/base/utf_offset_string_conversions.h
@@ -7,7 +7,6 @@
 #pragma once
 
 #include <string>
-#include <vector>
 
 #include "base/base_api.h"
 #include "base/string16.h"
@@ -16,78 +15,23 @@ namespace base {
 class StringPiece;
 }
 
-// Like the conversions in utf_string_conversions.h, but also takes one or more
-// offsets (|offset[s]_for_adjustment|) into the source strings, each offset
-// will be adjusted to point at the same logical place in the result strings.
-// If this isn't possible because an offset points past the end of the source
-// strings or into the middle of a multibyte sequence, the offending offset will
-// be set to std::wstring::npos. |offset[s]_for_adjustment| may be NULL.
+// Like the conversions in utf_string_conversions.h, but also take offsets into
+// the source strings, which will be adjusted to point at the same logical place
+// in the result strings.  If this isn't possible because the offsets point past
+// the end of the source strings or into the middle of multibyte sequences, they
+// will be set to std::wstring::npos.  |offset_for_adjustment| may be NULL.
 BASE_API bool UTF8ToWideAndAdjustOffset(const char* src,
                                         size_t src_len,
                                         std::wstring* output,
                                         size_t* offset_for_adjustment);
-BASE_API bool UTF8ToWideAndAdjustOffsets(
-    const char* src,
-    size_t src_len,
-    std::wstring* output,
-    std::vector<size_t>* offsets_for_adjustment);
-
 BASE_API std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
                                                 size_t* offset_for_adjustment);
-BASE_API std::wstring UTF8ToWideAndAdjustOffsets(
-    const base::StringPiece& utf8,
-    std::vector<size_t>* offsets_for_adjustment);
 
 BASE_API bool UTF16ToWideAndAdjustOffset(const char16* src,
                                          size_t src_len,
                                          std::wstring* output,
                                          size_t* offset_for_adjustment);
-BASE_API bool UTF16ToWideAndAdjustOffsets(
-    const char16* src,
-    size_t src_len,
-    std::wstring* output,
-    std::vector<size_t>* offsets_for_adjustment);
-
 BASE_API std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
                                                  size_t* offset_for_adjustment);
-BASE_API std::wstring UTF16ToWideAndAdjustOffsets(
-    const string16& utf16,
-    std::vector<size_t>* offsets_for_adjustment);
-
-// Limiting function callable by std::for_each which will replace any value
-// which is equal to or greater than |limit| with npos.
-template <typename T>
-struct LimitOffset {
-  explicit LimitOffset(size_t limit);
-  void operator()(size_t& offset);
-
-  size_t limit_;
-};
-
-// Adjustment function called by std::transform which will adjust any offset
-// that occurs after one or more modified substrings. To use, create any
-// number of AdjustOffset::Adjustments, drop them into a vector, then call
-// std::transform with the transform function being something similar to
-// AdjustOffset(adjustments). Each Adjustment gives the original |location|
-// of the encoded section and the |old_length| and |new_length| of the section
-// before and after decoding.
-struct AdjustOffset {
-  // Helper structure which indicates where an encoded character occurred
-  // and how long that encoding was.
-  struct Adjustment {
-    Adjustment(size_t location, size_t old_length, size_t new_length);
-
-    size_t location;
-    size_t old_length;
-    size_t new_length;
-  };
-
-  typedef std::vector<Adjustment> Adjustments;
-
-  explicit AdjustOffset(const Adjustments& adjustments);
-  void operator()(size_t& offset);
-
-  const Adjustments& adjustments_;
-};
 
 #endif  // BASE_UTF_OFFSET_STRING_CONVERSIONS_H_
diff --git a/base/utf_offset_string_conversions_unittest.cc b/base/utf_offset_string_conversions_unittest.cc
index b731b9e..4f13ab3 100644
--- a/base/utf_offset_string_conversions_unittest.cc
+++ b/base/utf_offset_string_conversions_unittest.cc
@@ -1,9 +1,7 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include <algorithm>
-
 #include "base/logging.h"
 #include "base/string_piece.h"
 #include "base/utf_offset_string_conversions.h"
@@ -13,8 +11,6 @@ namespace base {
 
 namespace {
 
-static const size_t kNpos = std::wstring::npos;
-
 // Given a null-terminated string of wchar_t with each wchar_t representing
 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.
 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)
@@ -44,12 +40,12 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffset) {
     size_t input_offset;
     size_t output_offset;
   } utf8_to_wide_cases[] = {
-    {"", 0, kNpos},
-    {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, kNpos},
+    {"", 0, std::wstring::npos},
+    {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, std::wstring::npos},
     {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1},
     {"\xed\xb0\x80z", 3, 1},
     {"A\xF0\x90\x8C\x80z", 1, 1},
-    {"A\xF0\x90\x8C\x80z", 2, kNpos},
+    {"A\xF0\x90\x8C\x80z", 2, std::wstring::npos},
 #if defined(WCHAR_T_IS_UTF16)
     {"A\xF0\x90\x8C\x80z", 5, 3},
 #elif defined(WCHAR_T_IS_UTF32)
@@ -69,7 +65,7 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffset) {
     size_t output_offset;
   } utf16_to_wide_cases[] = {
     {L"\xD840\xDC00\x4E00", 0, 0},
-    {L"\xD840\xDC00\x4E00", 1, kNpos},
+    {L"\xD840\xDC00\x4E00", 1, std::wstring::npos},
     {L"\xD840\xDC00\x4E00", 2, 1},
   };
   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_wide_cases); ++i) {
@@ -81,84 +77,4 @@ TEST(UTFOffsetStringConversionsTest, AdjustOffset) {
 #endif
 }
 
-TEST(UTFOffsetStringConversionsTest, LimitOffsets) {
-  const size_t kLimit = 10;
-  const size_t kItems = 20;
-  std::vector<size_t> size_ts;
-  for (size_t t = 0; t < kItems; ++t)
-    size_ts.push_back(t);
-  std::for_each(size_ts.begin(), size_ts.end(),
-                LimitOffset<std::wstring>(kLimit));
-  size_t unlimited_count = 0;
-  for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
-       ++ti) {
-    if (*ti < kLimit && *ti != kNpos)
-      ++unlimited_count;
-  }
-  EXPECT_EQ(10U, unlimited_count);
-
-  // Reverse the values in the vector and try again.
-  size_ts.clear();
-  for (size_t t = kItems; t > 0; --t)
-    size_ts.push_back(t - 1);
-  std::for_each(size_ts.begin(), size_ts.end(),
-                LimitOffset<std::wstring>(kLimit));
-  unlimited_count = 0;
-  for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
-       ++ti) {
-    if (*ti < kLimit && *ti != kNpos)
-      ++unlimited_count;
-  }
-  EXPECT_EQ(10U, unlimited_count);
-}
-
-TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
-  // Imagine we have strings as shown in the following cases where the
-  // X's represent encoded characters.
-  // 1: abcXXXdef ==> abcXdef
-  std::vector<size_t> offsets;
-  for (size_t t = 0; t < 9; ++t)
-    offsets.push_back(t);
-  AdjustOffset::Adjustments adjustments;
-  adjustments.push_back(AdjustOffset::Adjustment(3, 3, 1));
-  std::for_each(offsets.begin(), offsets.end(), AdjustOffset(adjustments));
-  size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
-  EXPECT_EQ(offsets.size(), arraysize(expected_1));
-  for (size_t i = 0; i < arraysize(expected_1); ++i)
-    EXPECT_EQ(expected_1[i], offsets[i]);
-
-  // 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX
-  offsets.clear();
-  for (size_t t = 0; t < 23; ++t)
-    offsets.push_back(t);
-  adjustments.clear();
-  adjustments.push_back(AdjustOffset::Adjustment(0, 3, 1));
-  adjustments.push_back(AdjustOffset::Adjustment(4, 4, 2));
-  adjustments.push_back(AdjustOffset::Adjustment(10, 7, 4));
-  adjustments.push_back(AdjustOffset::Adjustment(20, 3, 1));
-  std::for_each(offsets.begin(), offsets.end(), AdjustOffset(adjustments));
-  size_t expected_2[] = {0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6,
-                         kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 10, 11, 12,
-                         13, kNpos, kNpos};
-  EXPECT_EQ(offsets.size(), arraysize(expected_2));
-  for (size_t i = 0; i < arraysize(expected_2); ++i)
-    EXPECT_EQ(expected_2[i], offsets[i]);
-
-  // 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe
-  offsets.clear();
-  for (size_t t = 0; t < 17; ++t)
-    offsets.push_back(t);
-  adjustments.clear();
-  adjustments.push_back(AdjustOffset::Adjustment(0, 3, 0));
-  adjustments.push_back(AdjustOffset::Adjustment(4, 4, 4));
-  adjustments.push_back(AdjustOffset::Adjustment(11, 3, 3));
-  adjustments.push_back(AdjustOffset::Adjustment(15, 2, 0));
-  std::for_each(offsets.begin(), offsets.end(), AdjustOffset(adjustments));
-  size_t expected_3[] = {kNpos, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6,
-                         7, 8, kNpos, kNpos, 11, kNpos, kNpos};
-  EXPECT_EQ(offsets.size(), arraysize(expected_3));
-  for (size_t i = 0; i < arraysize(expected_3); ++i)
-    EXPECT_EQ(expected_3[i], offsets[i]);
-}
-
 }  // namaspace base
author	mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-04-13 01:11:50 +0000
committer	mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-04-13 01:11:50 +0000
commit	f7b09de5cca6923a03f20be3616830a0d3fafa88 (patch)
tree	8fbfe4c736468da5a4b5a7203414a32add49f9a1 /base
parent	4069944681ad77bcc80b5299aff0820a667b447e (diff)
download	chromium_src-f7b09de5cca6923a03f20be3616830a0d3fafa88.zip chromium_src-f7b09de5cca6923a03f20be3616830a0d3fafa88.tar.gz chromium_src-f7b09de5cca6923a03f20be3616830a0d3fafa88.tar.bz2