Add multiple-offset versions of the various URL reformatting functions. Fixed a couple of erroneous unit tests of offsets into username/password.

Note: This does not complete the work required for 78153 -- tis but the first 2/3rds. BUG=78153 TEST=Many unit tests updated and added. Review URL: http://codereview.chromium.org/6822038 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@81343 0039d316-1c4b-4281-b951-d872f2087c98
author: mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-04-13 00:45:39 +0000
committer: mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-04-13 00:45:39 +0000
commit: a47f8eadd67f75d3b663fdcc898caabb335bad0b (patch)
tree: fdf872770d4cd58ee753f219475850490a008f6d /base/utf_offset_string_conversions.cc
parent: 2e0e8253a232fa499d22e47753c5bbadaebd69e7 (diff)
download: chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.zip
chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.gz
chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.bz2
1 files changed, 156 insertions, 23 deletions
diff --git a/base/utf_offset_string_conversions.cc b/base/utf_offset_string_conversions.cc
index 4c47ef8..f091cb4 100644
--- a/base/utf_offset_string_conversions.cc
+++ b/base/utf_offset_string_conversions.cc
@@ -1,9 +1,12 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 #include "base/utf_offset_string_conversions.h"
 
+#include <algorithm>
+
+#include "base/scoped_ptr.h"
 #include "base/string_piece.h"
 #include "base/utf_string_conversion_utils.h"
 
@@ -21,13 +24,16 @@ template<typename SRC_CHAR>
 bool ConvertUnicode(const SRC_CHAR* src,
                     size_t src_len,
                     std::wstring* output,
-                    size_t* offset_for_adjustment) {
-  size_t output_offset =
-      (offset_for_adjustment && *offset_for_adjustment < src_len) ?
-          *offset_for_adjustment : std::wstring::npos;
+                    std::vector<size_t>* offsets_for_adjustment) {
+  if (offsets_for_adjustment) {
+    std::for_each(offsets_for_adjustment->begin(),
+                  offsets_for_adjustment->end(),
+                  LimitOffset<std::wstring>(src_len));
+  }
 
   // ICU requires 32-bit numbers.
   bool success = true;
+  AdjustOffset::Adjustments adjustments;
   int32 src_len32 = static_cast<int32>(src_len);
   for (int32 i = 0; i < src_len32; i++) {
     uint32 code_point;
@@ -39,21 +45,23 @@ bool ConvertUnicode(const SRC_CHAR* src,
       chars_written = WriteUnicodeCharacter(0xFFFD, output);
       success = false;
     }
-    if ((output_offset != std::wstring::npos) &&
-        (*offset_for_adjustment > original_i)) {
+    if (offsets_for_adjustment) {
       // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
       // character read, not after it (so that incrementing it in the loop
       // increment will place it at the right location), so we need to account
       // for that in determining the amount that was read.
-      if (*offset_for_adjustment <= static_cast<size_t>(i))
-        output_offset = std::wstring::npos;
-      else
-        output_offset += chars_written - (i - original_i + 1);
+      adjustments.push_back(AdjustOffset::Adjustment(
+          original_i, i - original_i + 1, chars_written));
     }
   }
 
-  if (offset_for_adjustment)
-    *offset_for_adjustment = output_offset;
+  // Make offset adjustment.
+  if (offsets_for_adjustment && !adjustments.empty()) {
+    std::for_each(offsets_for_adjustment->begin(),
+                  offsets_for_adjustment->end(),
+                  AdjustOffset(adjustments));
+  }
+
   return success;
 }
 
@@ -63,16 +71,44 @@ bool UTF8ToWideAndAdjustOffset(const char* src,
                                size_t src_len,
                                std::wstring* output,
                                size_t* offset_for_adjustment) {
+  std::vector<size_t> offsets;
+  if (offset_for_adjustment)
+    offsets.push_back(*offset_for_adjustment);
+  PrepareForUTF16Or32Output(src, src_len, output);
+  bool ret = ConvertUnicode(src, src_len, output, &offsets);
+  if (offset_for_adjustment)
+    *offset_for_adjustment = offsets[0];
+  return ret;
+}
+
+bool UTF8ToWideAndAdjustOffsets(const char* src,
+                                size_t src_len,
+                                std::wstring* output,
+                                std::vector<size_t>* offsets_for_adjustment) {
   PrepareForUTF16Or32Output(src, src_len, output);
-  return ConvertUnicode(src, src_len, output, offset_for_adjustment);
+  return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
 }
 
 std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
                                        size_t* offset_for_adjustment) {
-  std::wstring ret;
-  UTF8ToWideAndAdjustOffset(utf8.data(), utf8.length(), &ret,
-                            offset_for_adjustment);
-  return ret;
+  std::vector<size_t> offsets;
+  if (offset_for_adjustment)
+    offsets.push_back(*offset_for_adjustment);
+  std::wstring result;
+  UTF8ToWideAndAdjustOffsets(utf8.data(), utf8.length(), &result,
+                             &offsets);
+  if (offset_for_adjustment)
+    *offset_for_adjustment = offsets[0];
+  return result;
+}
+
+std::wstring UTF8ToWideAndAdjustOffsets(const base::StringPiece& utf8,
+                                        std::vector<size_t>*
+                                            offsets_for_adjustment) {
+  std::wstring result;
+  UTF8ToWideAndAdjustOffsets(utf8.data(), utf8.length(), &result,
+                             offsets_for_adjustment);
+  return result;
 }
 
 // UTF-16 <-> Wide -------------------------------------------------------------
@@ -90,6 +126,19 @@ bool UTF16ToWideAndAdjustOffset(const char16* src,
   return true;
 }
 
+bool UTF16ToWideAndAdjustOffsets(const char16* src,
+                                 size_t src_len,
+                                 std::wstring* output,
+                                 std::vector<size_t>* offsets_for_adjustment) {
+  output->assign(src, src_len);
+  if (offsets_for_adjustment) {
+    std::for_each(offsets_for_adjustment->begin(),
+                  offsets_for_adjustment->end(),
+                  LimitOffset<std::wstring>(src_len));
+  }
+  return true;
+}
+
 std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
                                         size_t* offset_for_adjustment) {
   if (offset_for_adjustment && (*offset_for_adjustment >= utf16.length()))
@@ -97,25 +146,109 @@ std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
   return utf16;
 }
 
+std::wstring UTF16ToWideAndAdjustOffsets(
+    const string16& utf16,
+    std::vector<size_t>* offsets_for_adjustment) {
+  if (offsets_for_adjustment) {
+    std::for_each(offsets_for_adjustment->begin(),
+                  offsets_for_adjustment->end(),
+                  LimitOffset<std::wstring>(utf16.length()));
+  }
+  return utf16;
+}
+
 #elif defined(WCHAR_T_IS_UTF32)
 
 bool UTF16ToWideAndAdjustOffset(const char16* src,
                                 size_t src_len,
                                 std::wstring* output,
                                 size_t* offset_for_adjustment) {
+  std::vector<size_t> offsets;
+  if (offset_for_adjustment)
+    offsets.push_back(*offset_for_adjustment);
+  output->clear();
+  // Assume that normally we won't have any non-BMP characters so the counts
+  // will be the same.
+  output->reserve(src_len);
+  bool ret = ConvertUnicode(src, src_len, output, &offsets);
+  if (offset_for_adjustment)
+    *offset_for_adjustment = offsets[0];
+  return ret;
+}
+
+bool UTF16ToWideAndAdjustOffsets(const char16* src,
+                                 size_t src_len,
+                                 std::wstring* output,
+                                 std::vector<size_t>* offsets_for_adjustment) {
   output->clear();
   // Assume that normally we won't have any non-BMP characters so the counts
   // will be the same.
   output->reserve(src_len);
-  return ConvertUnicode(src, src_len, output, offset_for_adjustment);
+  return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
 }
 
 std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
                                         size_t* offset_for_adjustment) {
-  std::wstring ret;
-  UTF16ToWideAndAdjustOffset(utf16.data(), utf16.length(), &ret,
-                             offset_for_adjustment);
-  return ret;
+  std::vector<size_t> offsets;
+  if (offset_for_adjustment)
+    offsets.push_back(*offset_for_adjustment);
+  std::wstring result;
+  UTF16ToWideAndAdjustOffsets(utf16.data(), utf16.length(), &result,
+                              &offsets);
+  if (offset_for_adjustment)
+    *offset_for_adjustment = offsets[0];
+  return result;
+}
+
+std::wstring UTF16ToWideAndAdjustOffsets(
+    const string16& utf16,
+    std::vector<size_t>* offsets_for_adjustment) {
+  std::wstring result;
+  UTF16ToWideAndAdjustOffsets(utf16.data(), utf16.length(), &result,
+                              offsets_for_adjustment);
+  return result;
 }
 
 #endif  // defined(WCHAR_T_IS_UTF32)
+
+template <typename T>
+LimitOffset<T>::LimitOffset(size_t limit)
+  : limit_(limit) {}
+
+template <typename T>
+void LimitOffset<T>::operator()(size_t& offset) {
+  if (offset >= limit_)
+    offset = T::npos;
+}
+
+AdjustOffset::Adjustment::Adjustment(size_t location,
+                                     size_t old_length,
+                                     size_t new_length)
+  : location(location),
+    old_length(old_length),
+    new_length(new_length) {}
+
+AdjustOffset::AdjustOffset(const Adjustments& adjustments)
+    : adjustments_(adjustments) {}
+
+void AdjustOffset::operator()(size_t& offset) {
+  if (offset == std::wstring::npos)
+    return;
+  size_t adjustment = 0;
+  for (Adjustments::const_iterator i = adjustments_.begin();
+       i != adjustments_.end(); ++i) {
+    size_t location = i->location;
+    if (offset == location && i->new_length == 0) {
+      offset = std::wstring::npos;
+      return;
+    }
+    if (offset <= location)
+      break;
+    if (offset < (location + i->old_length)) {
+      offset = std::wstring::npos;
+      return;
+    }
+    adjustment += (i->old_length - i->new_length);
+  }
+  offset -= adjustment;
+}
author	mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-04-13 00:45:39 +0000
committer	mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-04-13 00:45:39 +0000
commit	a47f8eadd67f75d3b663fdcc898caabb335bad0b (patch)
tree	fdf872770d4cd58ee753f219475850490a008f6d /base/utf_offset_string_conversions.cc
parent	2e0e8253a232fa499d22e47753c5bbadaebd69e7 (diff)
download	chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.zip chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.gz chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.bz2