summaryrefslogtreecommitdiffstats
path: root/ui/base/ime/character_composer.cc
blob: 18b6b013f24de04f12c6690c9d8ecc700f05f85c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "ui/base/ime/character_composer.h"

#include <X11/Xlib.h>

#include <algorithm>
#include <iterator>

#include "base/strings/utf_string_conversions.h"
#include "base/third_party/icu/icu_utf.h"
// Note for Gtk removal: gdkkeysyms.h only contains a set of
// '#define GDK_KeyName 0xNNNN' macros and does not #include any Gtk headers.
#include "third_party/gtk+/gdk/gdkkeysyms.h"
#include "ui/base/events/event_constants.h"
#include "ui/base/glib/glib_integers.h"
#include "ui/base/x/x11_util.h"

// Note for Gtk removal: gtkimcontextsimpleseqs.h does not #include any Gtk
// headers and only contains one big guint16 array |gtk_compose_seqs_compact|
// which defines the main compose table. The table has internal linkage.
// The order of header inclusion is out of order because
// gtkimcontextsimpleseqs.h depends on guint16, which is defined in
// "ui/base/glib/glib_integers.h".
#include "third_party/gtk+/gtk/gtkimcontextsimpleseqs.h"

namespace {

typedef std::vector<unsigned int> ComposeBufferType;

// An iterator class to apply std::lower_bound for composition table.
class SequenceIterator
    : public std::iterator<std::random_access_iterator_tag, const uint16*> {
 public:
  SequenceIterator() : ptr_(NULL), stride_(0) {}
  SequenceIterator(const uint16* ptr, int stride)
      : ptr_(ptr), stride_(stride) {}

  const uint16* ptr() const {return ptr_;}
  int stride() const {return stride_;}

  SequenceIterator& operator++() {
    ptr_ += stride_;
    return *this;
  }
  SequenceIterator& operator+=(int n) {
    ptr_ += stride_*n;
    return *this;
  }

  const uint16* operator*() const {return ptr_;}

 private:
  const uint16* ptr_;
  int stride_;
};

inline SequenceIterator operator+(const SequenceIterator& l, int r) {
  return SequenceIterator(l) += r;
}

inline int operator-(const SequenceIterator& l, const SequenceIterator& r) {
  const int d = l.ptr() - r.ptr();
  DCHECK(l.stride() == r.stride() && l.stride() > 0 && d%l.stride() == 0);
  return d/l.stride();
}

inline bool operator==(const SequenceIterator& l, const SequenceIterator& r) {
  DCHECK(l.stride() == r.stride());
  return l.ptr() == r.ptr();
}

inline bool operator!=(const SequenceIterator& l, const SequenceIterator& r) {
  return !(l == r);
}

// A function to compare key value.
inline int CompareSequenceValue(unsigned int l, unsigned int r) {
  return (l > r) ? 1 : ((l < r) ? -1 : 0);
}

// A template to make |CompareFunc| work like operator<.
// |CompareFunc| is required to implement a member function,
// int operator()(const ComposeBufferType& l, const uint16* r) const.
template<typename CompareFunc>
struct ComparatorAdoptor {
  bool operator()(const ComposeBufferType& l, const uint16* r) const {
    return CompareFunc()(l, r) == -1;
  }
  bool operator()(const uint16* l, const ComposeBufferType& r) const {
    return CompareFunc()(r, l) == 1;
  }
};

class ComposeChecker {
 public:
  // This class does not take the ownership of |data|, |data| should be alive
  // for the lifetime of the object.
  // |data| is a pointer to the head of an array of
  // length (|max_sequence_length| + 2)*|n_sequences|.
  // Every (|max_sequence_length| + 2) elements of |data| represent an entry.
  // First |max_sequence_length| elements of an entry is the sequecne which
  // composes the character represented by the last two elements of the entry.
  ComposeChecker(const uint16* data, int max_sequence_length, int n_sequences);
  bool CheckSequence(const ComposeBufferType& sequence,
                     uint32* composed_character) const;

 private:
  struct CompareSequence {
    int operator()(const ComposeBufferType& l, const uint16* r) const;
  };

  // This class does not take the ownership of |data_|,
  // the dtor does not delete |data_|.
  const uint16* data_;
  int max_sequence_length_;
  int n_sequences_;
  int row_stride_;

  DISALLOW_COPY_AND_ASSIGN(ComposeChecker);
};

ComposeChecker::ComposeChecker(const uint16* data,
                               int max_sequence_length,
                               int n_sequences)
    : data_(data),
      max_sequence_length_(max_sequence_length),
      n_sequences_(n_sequences),
      row_stride_(max_sequence_length + 2) {
}

bool ComposeChecker::CheckSequence(const ComposeBufferType& sequence,
                                   uint32* composed_character) const {
  const int sequence_length = sequence.size();
  if (sequence_length > max_sequence_length_)
    return false;
  // Find sequence in the table.
  const SequenceIterator begin(data_, row_stride_);
  const SequenceIterator end = begin + n_sequences_;
  const SequenceIterator found = std::lower_bound(
      begin, end, sequence, ComparatorAdoptor<CompareSequence>());
  if (found == end || CompareSequence()(sequence, *found) != 0)
    return false;

  if (sequence_length == max_sequence_length_ ||
      (*found)[sequence_length] == 0) {
    // |found| is not partially matching. It's fully matching.
    if (found + 1 == end ||
        CompareSequence()(sequence, *(found + 1)) != 0) {
      // There is no composition longer than |found| which matches to
      // |sequence|.
      const uint32 value = ((*found)[max_sequence_length_] << 16) |
          (*found)[max_sequence_length_ + 1];
      *composed_character = value;
    }
  }
  return true;
}

int ComposeChecker::CompareSequence::operator()(const ComposeBufferType& l,
                                                const uint16* r) const {
  for(size_t i = 0; i < l.size(); ++i) {
    const int compare_result = CompareSequenceValue(l[i], r[i]);
    if(compare_result)
      return compare_result;
  }
  return 0;
}


class ComposeCheckerWithCompactTable {
 public:
  // This class does not take the ownership of |data|, |data| should be alive
  // for the lifetime of the object.
  // First |index_size|*|index_stride| elements of |data| are an index table.
  // Every |index_stride| elements of an index table are an index entry.
  // If you are checking with a sequence of length N beginning with character C,
  // you have to find an index entry whose first element is C, then get the N-th
  // element of the index entry as the index.
  // The index is pointing the element of |data| where the composition table for
  // sequences of length N beginning with C is placed.

  ComposeCheckerWithCompactTable(const uint16* data,
                                 int max_sequence_length,
                                 int index_size,
                                 int index_stride);
  bool CheckSequence(const ComposeBufferType& sequence,
                     uint32* composed_character) const;

 private:
  struct CompareSequenceFront {
    int operator()(const ComposeBufferType& l, const uint16* r) const;
  };
  struct CompareSequenceSkipFront {
    int operator()(const ComposeBufferType& l, const uint16* r) const;
  };

  // This class does not take the ownership of |data_|,
  // the dtor does not delete |data_|.
  const uint16* data_;
  int max_sequence_length_;
  int index_size_;
  int index_stride_;
};

ComposeCheckerWithCompactTable::ComposeCheckerWithCompactTable(
    const uint16* data,
    int max_sequence_length,
    int index_size,
    int index_stride)
    : data_(data),
      max_sequence_length_(max_sequence_length),
      index_size_(index_size),
      index_stride_(index_stride) {
}

bool ComposeCheckerWithCompactTable::CheckSequence(
    const ComposeBufferType& sequence,
    uint32* composed_character) const {
  const int compose_length = sequence.size();
  if (compose_length > max_sequence_length_)
    return false;
  // Find corresponding index for the first keypress.
  const SequenceIterator index_begin(data_, index_stride_);
  const SequenceIterator index_end = index_begin + index_size_;
  const SequenceIterator index =
      std::lower_bound(index_begin, index_end, sequence,
                       ComparatorAdoptor<CompareSequenceFront>());
  if (index == index_end || CompareSequenceFront()(sequence, *index) != 0)
    return false;
  if (compose_length == 1)
    return true;
  // Check for composition sequences.
  for (int length = compose_length - 1; length < max_sequence_length_;
       ++length) {
    const uint16* table = data_ + (*index)[length];
    const uint16* table_next = data_ + (*index)[length + 1];
    if (table_next > table) {
      // There are composition sequences for this |length|.
      const int row_stride = length + 1;
      const int n_sequences = (table_next - table)/row_stride;
      const SequenceIterator table_begin(table, row_stride);
      const SequenceIterator table_end = table_begin + n_sequences;
      const SequenceIterator found =
          std::lower_bound(table_begin, table_end, sequence,
                           ComparatorAdoptor<CompareSequenceSkipFront>());
      if (found != table_end &&
          CompareSequenceSkipFront()(sequence, *found) == 0) {
        if (length == compose_length - 1)  // Exact match.
          *composed_character = (*found)[length];
        return true;
      }
    }
  }
  return false;
}

int ComposeCheckerWithCompactTable::CompareSequenceFront::operator()(
    const ComposeBufferType& l, const uint16* r) const {
  return CompareSequenceValue(l[0], r[0]);
}

int ComposeCheckerWithCompactTable::CompareSequenceSkipFront::operator()(
    const ComposeBufferType& l, const uint16* r) const {
  for(size_t i = 1; i < l.size(); ++i) {
    const int compare_result = CompareSequenceValue(l[i], r[i - 1]);
    if(compare_result)
      return compare_result;
  }
  return 0;
}


// Additional table.

// The difference between this and the default input method is the handling
// of C+acute - this method produces C WITH CEDILLA rather than C WITH ACUTE.
// For languages that use CCedilla and not acute, this is the preferred mapping,
// and is particularly important for pt_BR, where the us-intl keyboard is
// used extensively.

const uint16 cedilla_compose_seqs[] = {
  // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
  GDK_KEY_dead_acute, GDK_KEY_C, 0, 0, 0, 0x00C7,
  // LATIN_SMALL_LETTER_C_WITH_CEDILLA
  GDK_KEY_dead_acute, GDK_KEY_c, 0, 0, 0, 0x00E7,
  // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
  GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_C, 0, 0, 0x00C7,
  // LATIN_SMALL_LETTER_C_WITH_CEDILLA
  GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_c, 0, 0, 0x00E7,
  // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
  GDK_KEY_Multi_key, GDK_KEY_C, GDK_KEY_apostrophe, 0, 0, 0x00C7,
  // LATIN_SMALL_LETTER_C_WITH_CEDILLA
  GDK_KEY_Multi_key, GDK_KEY_c, GDK_KEY_apostrophe, 0, 0, 0x00E7,
};

bool KeypressShouldBeIgnored(unsigned int keyval) {
  switch(keyval) {
    case GDK_KEY_Shift_L:
    case GDK_KEY_Shift_R:
    case GDK_KEY_Control_L:
    case GDK_KEY_Control_R:
    case GDK_KEY_Caps_Lock:
    case GDK_KEY_Shift_Lock:
    case GDK_KEY_Meta_L:
    case GDK_KEY_Meta_R:
    case GDK_KEY_Alt_L:
    case GDK_KEY_Alt_R:
    case GDK_KEY_Super_L:
    case GDK_KEY_Super_R:
    case GDK_KEY_Hyper_L:
    case GDK_KEY_Hyper_R:
    case GDK_KEY_Mode_switch:
    case GDK_KEY_ISO_Level3_Shift:
      return true;
    default:
      return false;
  }
}

bool CheckCharacterComposeTable(const ComposeBufferType& sequence,
                                uint32* composed_character) {
  // Check cedilla compose table.
  const ComposeChecker kCedillaComposeChecker(
      cedilla_compose_seqs, 4, arraysize(cedilla_compose_seqs)/(4 + 2));
  if (kCedillaComposeChecker.CheckSequence(sequence, composed_character))
    return true;

  // Check main compose table.
  const ComposeCheckerWithCompactTable kMainComposeChecker(
      gtk_compose_seqs_compact, 5, 24, 6);
  if (kMainComposeChecker.CheckSequence(sequence, composed_character))
    return true;

  return false;
}

// Converts |character| to UTF16 string.
// Returns false when |character| is not a valid character.
bool UTF32CharacterToUTF16(uint32 character, string16* output) {
  output->clear();
  // Reject invalid character. (e.g. codepoint greater than 0x10ffff)
  if (!CBU_IS_UNICODE_CHAR(character))
    return false;
  if (character) {
    output->resize(CBU16_LENGTH(character));
    size_t i = 0;
    CBU16_APPEND_UNSAFE(&(*output)[0], i, character);
  }
  return true;
}

// Converts a X keycode to a X keysym with no modifiers.
KeySym XKeyCodeToXKeySym(unsigned int keycode) {
  Display* display = ui::GetXDisplay();
  if (!display)
    return NoSymbol;

  XKeyEvent x_key_event = {0};
  x_key_event.type = KeyPress;
  x_key_event.display = display;
  x_key_event.keycode = keycode;
  return ::XLookupKeysym(&x_key_event, 0);
}

// Returns an hexadecimal digit integer (0 to 15) corresponding to |keyval|.
// -1 is returned when |keyval| cannot be a hexadecimal digit.
int KeyvalToHexDigit(unsigned int keyval) {
  if (GDK_KEY_0 <= keyval && keyval <= GDK_KEY_9)
    return keyval - GDK_KEY_0;
  if (GDK_KEY_a <= keyval && keyval <= GDK_KEY_f)
    return keyval - GDK_KEY_a + 10;
  if (GDK_KEY_A <= keyval && keyval <= GDK_KEY_F)
    return keyval - GDK_KEY_A + 10;
  return -1;  // |keyval| cannot be a hexadecimal digit.
}

}  // namespace

namespace ui {

CharacterComposer::CharacterComposer() : composition_mode_(KEY_SEQUENCE_MODE) {}

CharacterComposer::~CharacterComposer() {}

void CharacterComposer::Reset() {
  compose_buffer_.clear();
  composed_character_.clear();
  preedit_string_.clear();
  composition_mode_ = KEY_SEQUENCE_MODE;
}

bool CharacterComposer::FilterKeyPress(unsigned int keyval,
                                       unsigned int keycode,
                                       int flags) {
  composed_character_.clear();
  preedit_string_.clear();

  // We don't care about modifier key presses.
  if(KeypressShouldBeIgnored(keyval))
    return false;

  // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE.
  // We don't care about other modifiers like Alt.  When CapsLock is down, we
  // do nothing because what we receive is Ctrl+Shift+u (not U).
  if (keyval == GDK_KEY_U && (flags & EF_SHIFT_DOWN) &&
      (flags & EF_CONTROL_DOWN)) {
    if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) {
      // There is no ongoing composition.  Let's switch to HEX_MODE.
      composition_mode_ = HEX_MODE;
      UpdatePreeditStringHexMode();
      return true;
    }
  }

  // Filter key press in an appropriate manner.
  switch (composition_mode_) {
    case KEY_SEQUENCE_MODE:
      return FilterKeyPressSequenceMode(keyval, keycode, flags);
    case HEX_MODE:
      return FilterKeyPressHexMode(keyval, keycode, flags);
    default:
      NOTREACHED();
      return false;
  }
}

bool CharacterComposer::FilterKeyPressSequenceMode(unsigned int keyval,
                                                   unsigned int keycode,
                                                   int flags) {
  DCHECK(composition_mode_ == KEY_SEQUENCE_MODE);
  compose_buffer_.push_back(keyval);

  // Check compose table.
  uint32 composed_character_utf32 = 0;
  if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) {
    // Key press is recognized as a part of composition.
    if (composed_character_utf32 != 0) {
      // We get a composed character.
      compose_buffer_.clear();
      UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
    }
    return true;
  }
  // Key press is not a part of composition.
  compose_buffer_.pop_back();  // Remove the keypress added this time.
  if (!compose_buffer_.empty()) {
    compose_buffer_.clear();
    return true;
  }
  return false;
}

bool CharacterComposer::FilterKeyPressHexMode(unsigned int keyval,
                                              unsigned int keycode,
                                              int flags) {
  DCHECK(composition_mode_ == HEX_MODE);
  const size_t kMaxHexSequenceLength = 8;
  int hex_digit = KeyvalToHexDigit(keyval);
  if (hex_digit < 0) {
    // With 101 keyboard, control + shift + 3 produces '#', but a user may
    // have intended to type '3'.  So, if a hexadecimal character was not found,
    // suppose a user is holding shift key (and possibly control key, too) and
    // try a character with modifier keys removed.
    hex_digit = KeyvalToHexDigit(XKeyCodeToXKeySym(keycode));
  }

  if (keyval == GDK_KEY_Escape) {
    // Cancel composition when ESC is pressed.
    Reset();
  } else if (keyval == GDK_KEY_Return || keyval == GDK_KEY_KP_Enter ||
             keyval == GDK_KEY_ISO_Enter ||
             keyval == GDK_KEY_space || keyval == GDK_KEY_KP_Space) {
    // Commit the composed character when Enter or space is pressed.
    CommitHex();
  } else if (keyval == GDK_KEY_BackSpace) {
    // Pop back the buffer when Backspace is pressed.
    if (!compose_buffer_.empty()) {
      compose_buffer_.pop_back();
    } else {
      // If there is no character in |compose_buffer_|, cancel composition.
      Reset();
    }
  } else if (hex_digit >= 0 &&
             compose_buffer_.size() < kMaxHexSequenceLength) {
    // Add the key to the buffer if it is a hex digit.
    compose_buffer_.push_back(hex_digit);
  }

  UpdatePreeditStringHexMode();

  return true;
}

void CharacterComposer::CommitHex() {
  DCHECK(composition_mode_ == HEX_MODE);
  uint32 composed_character_utf32 = 0;
  for (size_t i = 0; i != compose_buffer_.size(); ++i) {
    const uint32 digit = compose_buffer_[i];
    DCHECK(0 <= digit && digit < 16);
    composed_character_utf32 <<= 4;
    composed_character_utf32 |= digit;
  }
  Reset();
  UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
}

void CharacterComposer::UpdatePreeditStringHexMode() {
  if (composition_mode_ != HEX_MODE) {
    preedit_string_.clear();
    return;
  }
  std::string preedit_string_ascii("u");
  for (size_t i = 0; i != compose_buffer_.size(); ++i) {
    const int digit = compose_buffer_[i];
    DCHECK(0 <= digit && digit < 16);
    preedit_string_ascii += digit <= 9 ? ('0' + digit) : ('a' + (digit - 10));
  }
  preedit_string_ = ASCIIToUTF16(preedit_string_ascii);
}

}  // namespace ui