diff options
author | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-13 00:45:39 +0000 |
---|---|---|
committer | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-13 00:45:39 +0000 |
commit | a47f8eadd67f75d3b663fdcc898caabb335bad0b (patch) | |
tree | fdf872770d4cd58ee753f219475850490a008f6d /base/utf_offset_string_conversions.cc | |
parent | 2e0e8253a232fa499d22e47753c5bbadaebd69e7 (diff) | |
download | chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.zip chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.gz chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.bz2 |
Add multiple-offset versions of the various URL reformatting functions. Fixed a couple of erroneous unit tests of offsets into username/password.
Note: This does not complete the work required for 78153 -- tis but the first 2/3rds.
BUG=78153
TEST=Many unit tests updated and added.
Review URL: http://codereview.chromium.org/6822038
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@81343 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/utf_offset_string_conversions.cc')
-rw-r--r-- | base/utf_offset_string_conversions.cc | 179 |
1 files changed, 156 insertions, 23 deletions
diff --git a/base/utf_offset_string_conversions.cc b/base/utf_offset_string_conversions.cc index 4c47ef8..f091cb4 100644 --- a/base/utf_offset_string_conversions.cc +++ b/base/utf_offset_string_conversions.cc @@ -1,9 +1,12 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "base/utf_offset_string_conversions.h" +#include <algorithm> + +#include "base/scoped_ptr.h" #include "base/string_piece.h" #include "base/utf_string_conversion_utils.h" @@ -21,13 +24,16 @@ template<typename SRC_CHAR> bool ConvertUnicode(const SRC_CHAR* src, size_t src_len, std::wstring* output, - size_t* offset_for_adjustment) { - size_t output_offset = - (offset_for_adjustment && *offset_for_adjustment < src_len) ? - *offset_for_adjustment : std::wstring::npos; + std::vector<size_t>* offsets_for_adjustment) { + if (offsets_for_adjustment) { + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + LimitOffset<std::wstring>(src_len)); + } // ICU requires 32-bit numbers. bool success = true; + AdjustOffset::Adjustments adjustments; int32 src_len32 = static_cast<int32>(src_len); for (int32 i = 0; i < src_len32; i++) { uint32 code_point; @@ -39,21 +45,23 @@ bool ConvertUnicode(const SRC_CHAR* src, chars_written = WriteUnicodeCharacter(0xFFFD, output); success = false; } - if ((output_offset != std::wstring::npos) && - (*offset_for_adjustment > original_i)) { + if (offsets_for_adjustment) { // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last // character read, not after it (so that incrementing it in the loop // increment will place it at the right location), so we need to account // for that in determining the amount that was read. - if (*offset_for_adjustment <= static_cast<size_t>(i)) - output_offset = std::wstring::npos; - else - output_offset += chars_written - (i - original_i + 1); + adjustments.push_back(AdjustOffset::Adjustment( + original_i, i - original_i + 1, chars_written)); } } - if (offset_for_adjustment) - *offset_for_adjustment = output_offset; + // Make offset adjustment. + if (offsets_for_adjustment && !adjustments.empty()) { + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + AdjustOffset(adjustments)); + } + return success; } @@ -63,16 +71,44 @@ bool UTF8ToWideAndAdjustOffset(const char* src, size_t src_len, std::wstring* output, size_t* offset_for_adjustment) { + std::vector<size_t> offsets; + if (offset_for_adjustment) + offsets.push_back(*offset_for_adjustment); + PrepareForUTF16Or32Output(src, src_len, output); + bool ret = ConvertUnicode(src, src_len, output, &offsets); + if (offset_for_adjustment) + *offset_for_adjustment = offsets[0]; + return ret; +} + +bool UTF8ToWideAndAdjustOffsets(const char* src, + size_t src_len, + std::wstring* output, + std::vector<size_t>* offsets_for_adjustment) { PrepareForUTF16Or32Output(src, src_len, output); - return ConvertUnicode(src, src_len, output, offset_for_adjustment); + return ConvertUnicode(src, src_len, output, offsets_for_adjustment); } std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8, size_t* offset_for_adjustment) { - std::wstring ret; - UTF8ToWideAndAdjustOffset(utf8.data(), utf8.length(), &ret, - offset_for_adjustment); - return ret; + std::vector<size_t> offsets; + if (offset_for_adjustment) + offsets.push_back(*offset_for_adjustment); + std::wstring result; + UTF8ToWideAndAdjustOffsets(utf8.data(), utf8.length(), &result, + &offsets); + if (offset_for_adjustment) + *offset_for_adjustment = offsets[0]; + return result; +} + +std::wstring UTF8ToWideAndAdjustOffsets(const base::StringPiece& utf8, + std::vector<size_t>* + offsets_for_adjustment) { + std::wstring result; + UTF8ToWideAndAdjustOffsets(utf8.data(), utf8.length(), &result, + offsets_for_adjustment); + return result; } // UTF-16 <-> Wide ------------------------------------------------------------- @@ -90,6 +126,19 @@ bool UTF16ToWideAndAdjustOffset(const char16* src, return true; } +bool UTF16ToWideAndAdjustOffsets(const char16* src, + size_t src_len, + std::wstring* output, + std::vector<size_t>* offsets_for_adjustment) { + output->assign(src, src_len); + if (offsets_for_adjustment) { + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + LimitOffset<std::wstring>(src_len)); + } + return true; +} + std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, size_t* offset_for_adjustment) { if (offset_for_adjustment && (*offset_for_adjustment >= utf16.length())) @@ -97,25 +146,109 @@ std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, return utf16; } +std::wstring UTF16ToWideAndAdjustOffsets( + const string16& utf16, + std::vector<size_t>* offsets_for_adjustment) { + if (offsets_for_adjustment) { + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + LimitOffset<std::wstring>(utf16.length())); + } + return utf16; +} + #elif defined(WCHAR_T_IS_UTF32) bool UTF16ToWideAndAdjustOffset(const char16* src, size_t src_len, std::wstring* output, size_t* offset_for_adjustment) { + std::vector<size_t> offsets; + if (offset_for_adjustment) + offsets.push_back(*offset_for_adjustment); + output->clear(); + // Assume that normally we won't have any non-BMP characters so the counts + // will be the same. + output->reserve(src_len); + bool ret = ConvertUnicode(src, src_len, output, &offsets); + if (offset_for_adjustment) + *offset_for_adjustment = offsets[0]; + return ret; +} + +bool UTF16ToWideAndAdjustOffsets(const char16* src, + size_t src_len, + std::wstring* output, + std::vector<size_t>* offsets_for_adjustment) { output->clear(); // Assume that normally we won't have any non-BMP characters so the counts // will be the same. output->reserve(src_len); - return ConvertUnicode(src, src_len, output, offset_for_adjustment); + return ConvertUnicode(src, src_len, output, offsets_for_adjustment); } std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, size_t* offset_for_adjustment) { - std::wstring ret; - UTF16ToWideAndAdjustOffset(utf16.data(), utf16.length(), &ret, - offset_for_adjustment); - return ret; + std::vector<size_t> offsets; + if (offset_for_adjustment) + offsets.push_back(*offset_for_adjustment); + std::wstring result; + UTF16ToWideAndAdjustOffsets(utf16.data(), utf16.length(), &result, + &offsets); + if (offset_for_adjustment) + *offset_for_adjustment = offsets[0]; + return result; +} + +std::wstring UTF16ToWideAndAdjustOffsets( + const string16& utf16, + std::vector<size_t>* offsets_for_adjustment) { + std::wstring result; + UTF16ToWideAndAdjustOffsets(utf16.data(), utf16.length(), &result, + offsets_for_adjustment); + return result; } #endif // defined(WCHAR_T_IS_UTF32) + +template <typename T> +LimitOffset<T>::LimitOffset(size_t limit) + : limit_(limit) {} + +template <typename T> +void LimitOffset<T>::operator()(size_t& offset) { + if (offset >= limit_) + offset = T::npos; +} + +AdjustOffset::Adjustment::Adjustment(size_t location, + size_t old_length, + size_t new_length) + : location(location), + old_length(old_length), + new_length(new_length) {} + +AdjustOffset::AdjustOffset(const Adjustments& adjustments) + : adjustments_(adjustments) {} + +void AdjustOffset::operator()(size_t& offset) { + if (offset == std::wstring::npos) + return; + size_t adjustment = 0; + for (Adjustments::const_iterator i = adjustments_.begin(); + i != adjustments_.end(); ++i) { + size_t location = i->location; + if (offset == location && i->new_length == 0) { + offset = std::wstring::npos; + return; + } + if (offset <= location) + break; + if (offset < (location + i->old_length)) { + offset = std::wstring::npos; + return; + } + adjustment += (i->old_length - i->new_length); + } + offset -= adjustment; +} |