diff options
Diffstat (limited to 'third_party/liblouis/nacl_wrapper/liblouis_wrapper.cc')
-rw-r--r-- | third_party/liblouis/nacl_wrapper/liblouis_wrapper.cc | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/third_party/liblouis/nacl_wrapper/liblouis_wrapper.cc b/third_party/liblouis/nacl_wrapper/liblouis_wrapper.cc new file mode 100644 index 0000000..390e192 --- /dev/null +++ b/third_party/liblouis/nacl_wrapper/liblouis_wrapper.cc @@ -0,0 +1,204 @@ +// Copyright 2013 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include "liblouis_wrapper.h" + +#include <cstddef> + +#include "liblouis/liblouis.h" + +namespace { + +// Decodes UTF-8 into 16-bit wide characters. +// This implementation is very permissive and may miss encoding errors. +// It ignores charaters which are not in the Unicode Basic Multilingual Plane. +// TODO(jbroman): Handle more than BMP if liblouis changes to accept UTF-16. +static bool DecodeUtf8(const std::string& in, std::vector<widechar>* out) { + int len = in.length(); + std::vector<widechar> result; + result.reserve(len); + int i = 0; + while (i < len) { + int ch = static_cast<unsigned char>(in[i++]); + widechar cp; + if ((ch & 0x80) == 0x00) { // U+0000 - U+007F + cp = ch; + } else if ((ch & 0xe0) == 0xc0 && i < len) { // U+0080 - U+07FF + cp = (ch & 0x1f) << 6; + ch = static_cast<unsigned char>(in[i++]); + cp |= (ch & 0x3f); + } else if ((ch & 0xf0) == 0xe0 && i+1 < len) { // U+0800 - U+FFFF + cp = (ch & 0x0f) << 12; + ch = static_cast<unsigned char>(in[i++]); + cp |= (ch & 0x3f) << 6; + ch = static_cast<unsigned char>(in[i++]); + cp |= (ch & 0x3f); + } else if ((ch & 0xf8) == 0xf0 && i+2 < len) { // U+10000 - U+1FFFFF + i += 3; + continue; + } else if ((ch & 0xfc) == 0xf8 && i+3 < len) { // U+200000 - U+3FFFFFF + i += 4; + continue; + } else if ((ch & 0xfe) == 0xfc && i+4 < len) { // U+4000000 - U+7FFFFFFF + i += 5; + continue; + } else { + // Invalid first code point. + return false; + } + result.push_back(cp); + } + out->swap(result); + return true; +} + +// Encodes 16-bit wide characters into UTF-8. +// This implementation is very permissive and may miss invalid code points in +// its input. +// TODO(jbroman): Handle more than BMP if widechar ever becomes larger. +static bool EncodeUtf8(const std::vector<widechar>& in, std::string* out) { + std::string result; + result.reserve(in.size() * 2); + for (std::vector<widechar>::const_iterator it = in.begin(); it != in.end(); + ++it) { + unsigned int cp = *it; + if (cp <= 0x007f) { // U+0000 - U+007F + result.push_back(static_cast<char>(cp)); + } else if (cp <= 0x07ff) { // U+0080 - U+07FF + result.push_back(static_cast<char>(0xc0 | ((cp >> 6) & 0x1f))); + result.push_back(static_cast<char>(0x80 | (cp & 0x3f))); + } else if (cp <= 0xffff) { // U+0800 - U+FFFF + result.push_back(static_cast<char>(0xe0 | ((cp >> 12) & 0x0f))); + result.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3f))); + result.push_back(static_cast<char>(0x80 | (cp & 0x3f))); + } else { + // This can't happen if widechar is 16 bits wide. + // TODO(jbroman): assert this + } + } + out->swap(result); + return true; +} + +} // namespace + + +namespace liblouis_nacl { + +LibLouisWrapper::LibLouisWrapper() { + char data_path[] = "/"; // Needed because lou_setDataPath takes a char*. + lou_setDataPath(data_path); +} + +LibLouisWrapper::~LibLouisWrapper() { + lou_free(); +} + +const char* LibLouisWrapper::tables_dir() const { + return "/liblouis/tables"; +} + +bool LibLouisWrapper::CheckTable(const std::string& table_name) { + return lou_getTable(table_name.c_str()) != NULL; +} + +bool LibLouisWrapper::Translate(const TranslationParams& params, + TranslationResult* out) { + // Convert the character set of the input text. + std::vector<widechar> inbuf; + if (!DecodeUtf8(params.text, &inbuf)) { + // TODO(jbroman): log this + return false; + } + + int inlen = inbuf.size(); + int outlen = inlen * 2; // TODO(jbroman): choose this size more accurately. + std::vector<widechar> outbuf(outlen); + std::vector<int> text_to_braille(inlen); + std::vector<int> braille_to_text(outlen); + + // Compute the cursor position pointer to pass to liblouis. + int out_cursor_position; + int* out_cursor_position_ptr; + if (params.cursor_position < 0) { + out_cursor_position = -1; + out_cursor_position_ptr = NULL; + } else { + out_cursor_position = params.cursor_position; + out_cursor_position_ptr = &out_cursor_position; + } + + // Invoke liblouis. + int result = lou_translate(params.table_name.c_str(), + &inbuf[0], &inlen, &outbuf[0], &outlen, + NULL /* typeform */, NULL /* spacing */, + &text_to_braille[0], &braille_to_text[0], + out_cursor_position_ptr, dotsIO /* mode */); + if (result == 0) { + // TODO(jbroman): log this + return false; + } + + // Massage the result. + std::vector<unsigned char> cells; + cells.reserve(outlen); + for (int i = 0; i < outlen; i++) { + cells.push_back(outbuf[i]); + } + braille_to_text.resize(outlen); + + // Return the translation result. + out->cells.swap(cells); + out->text_to_braille.swap(text_to_braille); + out->braille_to_text.swap(braille_to_text); + out->cursor_position = out_cursor_position; + return true; +} + +bool LibLouisWrapper::BackTranslate(const std::string& table_name, + const std::vector<unsigned char>& cells, std::string* out) { + std::vector<widechar> inbuf; + inbuf.reserve(cells.size()); + for (std::vector<unsigned char>::const_iterator it = cells.begin(); + it != cells.end(); ++it) { + // Set the high-order bit to prevent liblouis from dropping empty cells. + inbuf.push_back(*it | 0x8000); + } + int inlen = inbuf.size(); + int outlen = inlen * 2; // TODO(jbroman): choose this size more accurately. + std::vector<widechar> outbuf(outlen); + + // Invoke liblouis. + int result = lou_backTranslateString(table_name.c_str(), + &inbuf[0], &inlen, &outbuf[0], &outlen, + NULL /* typeform */, NULL /* spacing */, dotsIO /* mode */); + if (result == 0) { + // TODO(njbroman): log this + return false; + } + + // Massage the result. + outbuf.resize(outlen); + std::string text; + if (!EncodeUtf8(outbuf, &text)) { + // TODO(jbroman): log this + return false; + } + + // Return the back translation result. + out->swap(text); + return true; +} + +} // namespace liblouis_nacl |