// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "base/utf_offset_string_conversions.h" #include #include "base/memory/scoped_ptr.h" #include "base/string_piece.h" #include "base/utf_string_conversion_utils.h" using base::PrepareForUTF16Or32Output; using base::ReadUnicodeCharacter; using base::WriteUnicodeCharacter; // Converts the given source Unicode character type to the given destination // Unicode character type as a STL string. The given input buffer and size // determine the source, and the given output STL string will be replaced by // the result. bool ConvertUnicode(const char* src, size_t src_len, string16* output, std::vector* offsets_for_adjustment) { if (offsets_for_adjustment) { std::for_each(offsets_for_adjustment->begin(), offsets_for_adjustment->end(), LimitOffset(src_len)); } // ICU requires 32-bit numbers. bool success = true; OffsetAdjuster offset_adjuster(offsets_for_adjustment); int32 src_len32 = static_cast(src_len); for (int32 i = 0; i < src_len32; i++) { uint32 code_point; size_t original_i = i; size_t chars_written = 0; if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { chars_written = WriteUnicodeCharacter(code_point, output); } else { chars_written = WriteUnicodeCharacter(0xFFFD, output); success = false; } if (offsets_for_adjustment) { // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last // character read, not after it (so that incrementing it in the loop // increment will place it at the right location), so we need to account // for that in determining the amount that was read. offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i, i - original_i + 1, chars_written)); } } return success; } bool UTF8ToUTF16AndAdjustOffset(const char* src, size_t src_len, string16* output, size_t* offset_for_adjustment) { std::vector offsets; if (offset_for_adjustment) offsets.push_back(*offset_for_adjustment); PrepareForUTF16Or32Output(src, src_len, output); bool ret = ConvertUnicode(src, src_len, output, &offsets); if (offset_for_adjustment) *offset_for_adjustment = offsets[0]; return ret; } bool UTF8ToUTF16AndAdjustOffsets(const char* src, size_t src_len, string16* output, std::vector* offsets_for_adjustment) { PrepareForUTF16Or32Output(src, src_len, output); return ConvertUnicode(src, src_len, output, offsets_for_adjustment); } string16 UTF8ToUTF16AndAdjustOffset(const base::StringPiece& utf8, size_t* offset_for_adjustment) { std::vector offsets; if (offset_for_adjustment) offsets.push_back(*offset_for_adjustment); string16 result; UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result, &offsets); if (offset_for_adjustment) *offset_for_adjustment = offsets[0]; return result; } string16 UTF8ToUTF16AndAdjustOffsets( const base::StringPiece& utf8, std::vector* offsets_for_adjustment) { string16 result; UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result, offsets_for_adjustment); return result; } OffsetAdjuster::Adjustment::Adjustment(size_t original_offset, size_t original_length, size_t output_length) : original_offset(original_offset), original_length(original_length), output_length(output_length) { } OffsetAdjuster::OffsetAdjuster(std::vector* offsets_for_adjustment) : offsets_for_adjustment_(offsets_for_adjustment) { } OffsetAdjuster::~OffsetAdjuster() { if (!offsets_for_adjustment_ || adjustments_.empty()) return; for (std::vector::iterator i(offsets_for_adjustment_->begin()); i != offsets_for_adjustment_->end(); ++i) AdjustOffset(i); } void OffsetAdjuster::Add(const Adjustment& adjustment) { adjustments_.push_back(adjustment); } void OffsetAdjuster::AdjustOffset(std::vector::iterator offset) { if (*offset == string16::npos) return; size_t adjustment = 0; for (std::vector::const_iterator i = adjustments_.begin(); i != adjustments_.end(); ++i) { if (*offset == i->original_offset && i->output_length == 0) { *offset = string16::npos; return; } if (*offset <= i->original_offset) break; if (*offset < (i->original_offset + i->original_length)) { *offset = string16::npos; return; } adjustment += (i->original_length - i->output_length); } *offset -= adjustment; }