summaryrefslogtreecommitdiffstats
path: root/base/utf_offset_string_conversions_unittest.cc
blob: ff03a750533733e0544b240e37521a539381940f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <algorithm>

#include "base/logging.h"
#include "base/string_piece.h"
#include "base/utf_offset_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace base {

namespace {

static const size_t kNpos = string16::npos;

}  // namespace

TEST(UTFOffsetStringConversionsTest, AdjustOffset) {
  struct UTF8ToUTF16Case {
    const char* utf8;
    size_t input_offset;
    size_t output_offset;
  } utf8_to_utf16_cases[] = {
    {"", 0, kNpos},
    {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, kNpos},
    {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1},
    {"\xed\xb0\x80z", 3, 1},
    {"A\xF0\x90\x8C\x80z", 1, 1},
    {"A\xF0\x90\x8C\x80z", 2, kNpos},
    {"A\xF0\x90\x8C\x80z", 5, 3},
  };
  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_utf16_cases); ++i) {
    size_t offset = utf8_to_utf16_cases[i].input_offset;
    UTF8ToUTF16AndAdjustOffset(utf8_to_utf16_cases[i].utf8, &offset);
    EXPECT_EQ(utf8_to_utf16_cases[i].output_offset, offset);
  }

  struct UTF16ToUTF8Case {
    char16 utf16[10];
    size_t input_offset;
    size_t output_offset;
  } utf16_to_utf8_cases[] = {
      {{}, 0, kNpos},
      // Converted to 3-byte utf-8 sequences
      {{0x5909, 0x63DB}, 2, kNpos},
      {{0x5909, 0x63DB}, 1, 3},
      // Converted to 2-byte utf-8 sequences
      {{'A', 0x00bc, 0x00be, 'z'}, 1, 1},
      {{'A', 0x00bc, 0x00be, 'z'}, 2, 3},
      {{'A', 0x00bc, 0x00be, 'z'}, 3, 5},
      // Surrogate pair
      {{'A', 0xd800, 0xdf00, 'z'}, 1, 1},
      {{'A', 0xd800, 0xdf00, 'z'}, 2, kNpos},
      {{'A', 0xd800, 0xdf00, 'z'}, 3, 5},
  };
  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_utf8_cases); ++i) {
    size_t offset = utf16_to_utf8_cases[i].input_offset;
    UTF16ToUTF8AndAdjustOffset(utf16_to_utf8_cases[i].utf16, &offset);
    EXPECT_EQ(utf16_to_utf8_cases[i].output_offset, offset);
  }
}

TEST(UTFOffsetStringConversionsTest, LimitOffsets) {
  const size_t kLimit = 10;
  const size_t kItems = 20;
  std::vector<size_t> size_ts;
  for (size_t t = 0; t < kItems; ++t)
    size_ts.push_back(t);
  std::for_each(size_ts.begin(), size_ts.end(),
                LimitOffset<string16>(kLimit));
  size_t unlimited_count = 0;
  for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
       ++ti) {
    if (*ti < kLimit && *ti != kNpos)
      ++unlimited_count;
  }
  EXPECT_EQ(10U, unlimited_count);

  // Reverse the values in the vector and try again.
  size_ts.clear();
  for (size_t t = kItems; t > 0; --t)
    size_ts.push_back(t - 1);
  std::for_each(size_ts.begin(), size_ts.end(),
                LimitOffset<string16>(kLimit));
  unlimited_count = 0;
  for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
       ++ti) {
    if (*ti < kLimit && *ti != kNpos)
      ++unlimited_count;
  }
  EXPECT_EQ(10U, unlimited_count);
}

TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
  // Imagine we have strings as shown in the following cases where the
  // X's represent encoded characters.
  // 1: abcXXXdef ==> abcXdef
  {
    std::vector<size_t> offsets;
    for (size_t t = 0; t < 9; ++t)
      offsets.push_back(t);
    {
      OffsetAdjuster offset_adjuster(&offsets);
      offset_adjuster.Add(OffsetAdjuster::Adjustment(3, 3, 1));
    }
    size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
    EXPECT_EQ(offsets.size(), arraysize(expected_1));
    for (size_t i = 0; i < arraysize(expected_1); ++i)
      EXPECT_EQ(expected_1[i], offsets[i]);
  }

  // 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX
  {
    std::vector<size_t> offsets;
    for (size_t t = 0; t < 23; ++t)
      offsets.push_back(t);
    {
      OffsetAdjuster offset_adjuster(&offsets);
      offset_adjuster.Add(OffsetAdjuster::Adjustment(0, 3, 1));
      offset_adjuster.Add(OffsetAdjuster::Adjustment(4, 4, 2));
      offset_adjuster.Add(OffsetAdjuster::Adjustment(10, 7, 4));
      offset_adjuster.Add(OffsetAdjuster::Adjustment(20, 3, 1));
    }
    size_t expected_2[] = {0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6,
                           kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 10, 11, 12,
                           13, kNpos, kNpos};
    EXPECT_EQ(offsets.size(), arraysize(expected_2));
    for (size_t i = 0; i < arraysize(expected_2); ++i)
      EXPECT_EQ(expected_2[i], offsets[i]);
  }

  // 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe
  {
    std::vector<size_t> offsets;
    for (size_t t = 0; t < 17; ++t)
      offsets.push_back(t);
    {
      OffsetAdjuster offset_adjuster(&offsets);
      offset_adjuster.Add(OffsetAdjuster::Adjustment(0, 3, 0));
      offset_adjuster.Add(OffsetAdjuster::Adjustment(4, 4, 4));
      offset_adjuster.Add(OffsetAdjuster::Adjustment(11, 3, 3));
      offset_adjuster.Add(OffsetAdjuster::Adjustment(15, 2, 0));
    }
    size_t expected_3[] = {kNpos, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6,
                           7, 8, kNpos, kNpos, 11, kNpos, kNpos};
    EXPECT_EQ(offsets.size(), arraysize(expected_3));
    for (size_t i = 0; i < arraysize(expected_3); ++i)
      EXPECT_EQ(expected_3[i], offsets[i]);
  }
}

}  // namaspace base