chrome_frame/icu_stubs.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169

// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/logging.h"
#include "base/string_util.h"
#include "googleurl/src/url_canon.h"

#include <windows.h>

////////////////////////////////////////////////////////////////////////////////
// Avoid dependency on string_util_icu.cc (which pulls in icu).

std::string WideToAnsiDirect(const wchar_t* wide, size_t wide_len) {
  std::string ret;
  char* write = WriteInto(&ret, wide_len + 1);
  for (size_t i = 0; i < wide_len; ++i) {
    // We can only convert characters below 0x80 directly from wide to ansi.
    DCHECK(wide[i] <= 127) << "can't convert";
    write[i] = static_cast<char>(wide[i]);
  }

  write[wide_len] = '\0';

  return ret;
}

bool WideToUTF8(const wchar_t* wide, size_t wide_len, std::string* utf8) {
  DCHECK(utf8);

  // Add a cutoff. If it's all ASCII, convert it directly
  size_t i;
  for (i = 0; i < wide_len; ++i) {
    if (wide[i] > 127)
      break;
  }

  // If we made it to the end without breaking, then it's all ANSI, so do a
  // quick convert
  if (i == wide_len) {
    *utf8 = WideToAnsiDirect(wide, wide_len);
    return true;
  }

  // Figure out how long the string is
  int size = WideCharToMultiByte(CP_UTF8, 0, wide, wide_len + 1, NULL, 0, NULL,
                                 NULL);

  if (size > 0) {
    WideCharToMultiByte(CP_UTF8, 0, wide, wide_len + 1, WriteInto(utf8, size),
                        size, NULL, NULL);
  }

  return (size > 0);
}

std::string WideToUTF8(const std::wstring& wide) {
  std::string ret;
  if (!wide.empty()) {
    // Ignore the success flag of this call, it will do the best it can for
    // invalid input, which is what we want here.
    WideToUTF8(wide.data(), wide.length(), &ret);
  }
  return ret;
}

bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
  DCHECK(output);

  if (src_len == 0) {
    output->clear();
    return true;
  }

  int wide_chars = MultiByteToWideChar(CP_UTF8, 0, src, src_len, NULL, 0);
  if (!wide_chars) {
    NOTREACHED();
    return false;
  }

  wide_chars++;  // make room for L'\0'
  // Note that WriteInto will fill the string with '\0', so in the case
  // where the input string is not \0 terminated, we will still be ensured
  // that the output string will be.
  if (!MultiByteToWideChar(CP_UTF8, 0, src, src_len,
                           WriteInto(output, wide_chars), wide_chars)) {
    NOTREACHED();
    output->clear();
    return false;
  }

  return true;
}

std::wstring UTF8ToWide(const base::StringPiece& utf8) {
  std::wstring ret;
  if (!utf8.empty())
    UTF8ToWide(utf8.data(), utf8.length(), &ret);
  return ret;
}

#ifdef WCHAR_T_IS_UTF16
string16 UTF8ToUTF16(const std::string& utf8) {
  std::wstring ret;
  if (!utf8.empty())
    UTF8ToWide(utf8.data(), utf8.length(), &ret);
  return ret;
}
#else
#error Need WCHAR_T_IS_UTF16
#endif

////////////////////////////////////////////////////////////////////////////////
// Replace ICU dependent functions in googleurl.
/*#define __UTF_H__
#include "third_party/icu38/public/common/unicode/utf16.h"
#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
extern const char16 kUnicodeReplacementCharacter;*/

namespace url_canon {

bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output) {
  // We should only hit this when the user attempts to navigate
  // CF to an invalid URL.
  DLOG(WARNING) << __FUNCTION__ << " not implemented";
  return false;
}

bool ReadUTFChar(const char* str, int* begin, int length,
                 unsigned* code_point_out) {
  // We should only hit this when the user attempts to navigate
  // CF to an invalid URL.
  DLOG(WARNING) << __FUNCTION__ << " not implemented";

  // TODO(tommi): consider if we can use something like
  // http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
  return false;
}

bool ReadUTFChar(const char16* str, int* begin, int length,
                 unsigned* code_point) {
/*
  if (U16_IS_SURROGATE(str[*begin])) {
    if (!U16_IS_SURROGATE_LEAD(str[*begin]) || *begin + 1 >= length ||
        !U16_IS_TRAIL(str[*begin + 1])) {
      // Invalid surrogate pair.
      *code_point = kUnicodeReplacementCharacter;
      return false;
    } else {
      // Valid surrogate pair.
      *code_point = U16_GET_SUPPLEMENTARY(str[*begin], str[*begin + 1]);
      (*begin)++;
    }
  } else {
    // Not a surrogate, just one 16-bit word.
    *code_point = str[*begin];
  }

  if (U_IS_UNICODE_CHAR(*code_point))
    return true;

  // Invalid code point.
  *code_point = kUnicodeReplacementCharacter;
  return false;*/
  CHECK(false);
  return false;
}

}  // namespace url_canon