diff options
author | Elliott Hughes <enh@google.com> | 2014-04-30 22:03:12 -0700 |
---|---|---|
committer | Elliott Hughes <enh@google.com> | 2014-05-01 14:46:54 -0700 |
commit | 5a0aa3dee247a313f04252cf45608097695d5953 (patch) | |
tree | 1bbc0d1e4e60717285b17b40ab155bdfbace5e37 /libc/bionic | |
parent | 9fb53dd4dbaa7633c234d9da8417827fa3d3c32f (diff) | |
download | bionic-5a0aa3dee247a313f04252cf45608097695d5953.zip bionic-5a0aa3dee247a313f04252cf45608097695d5953.tar.gz bionic-5a0aa3dee247a313f04252cf45608097695d5953.tar.bz2 |
Switch to a working UTF-8 mb/wc implementation.
Although glibc gets by with an 8-byte mbstate_t, OpenBSD uses 12 bytes (of
the 128 bytes it reserves!).
We can actually implement UTF-8 encoding/decoding with a 0-byte mbstate_t
which means we can make things work on LP32 too, as long as we accept the
limitation that the caller needs to present us with a complete sequence
before we'll process it.
Our behavior is fine when going from characters to bytes; we just
update the source wchar_t** to say how far through the input we got.
I'll come back and use the 4 bytes we do have to cope with byte sequences
split across multiple input buffers. The fact that we don't support
UTF-8 sequences longer than 4 bytes plus the fact that the first byte of
a UTF-8 sequence encodes the length means we shouldn't need the other
fields OpenBSD used (at the cost of some recomputation in cases where a
sequence is split across buffers).
This patch also makes the minimal changes necessary to setlocale(3) to
make us behave like glibc when an app requests UTF-8. (The difference
being that our "C" locale is the same as our "C.UTF-8" locale.)
Change-Id: Ied327a8c4643744b3611bf6bb005a9b389ba4c2f
Diffstat (limited to 'libc/bionic')
-rw-r--r-- | libc/bionic/locale.cpp | 28 | ||||
-rw-r--r-- | libc/bionic/wchar.cpp | 406 | ||||
-rw-r--r-- | libc/bionic/wctype.cpp | 91 |
3 files changed, 355 insertions, 170 deletions
diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp index 5ab834d..3752fa4 100644 --- a/libc/bionic/locale.cpp +++ b/libc/bionic/locale.cpp @@ -75,8 +75,12 @@ static void __locale_init() { gLocale.int_n_sign_posn = CHAR_MAX; } +static bool __bionic_current_locale_is_utf8 = false; + static bool __is_supported_locale(const char* locale) { - return (strcmp(locale, "") == 0 || strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0); + return (strcmp(locale, "") == 0 || + strcmp(locale, "C") == 0 || strcmp(locale, "C.UTF-8") == 0 || + strcmp(locale, "POSIX") == 0); } static locale_t __new_locale() { @@ -115,26 +119,24 @@ locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/ return __new_locale(); } -char* setlocale(int category, char const* locale_name) { +char* setlocale(int category, const char* locale_name) { // Is 'category' valid? if (category < LC_CTYPE || category > LC_IDENTIFICATION) { errno = EINVAL; return NULL; } - // Caller just wants to query the current locale? - if (locale_name == NULL) { - return const_cast<char*>("C"); - } - - // Caller wants one of the mandatory POSIX locales? - if (__is_supported_locale(locale_name)) { - return const_cast<char*>("C"); + // Caller wants to set the locale rather than just query? + if (locale_name != NULL) { + if (!__is_supported_locale(locale_name)) { + // We don't support this locale. + errno = ENOENT; + return NULL; + } + __bionic_current_locale_is_utf8 = (strstr(locale_name, "UTF-8") != NULL); } - // We don't support any other locales. - errno = ENOENT; - return NULL; + return const_cast<char*>(__bionic_current_locale_is_utf8 ? "C.UTF-8" : "C"); } locale_t uselocale(locale_t new_locale) { diff --git a/libc/bionic/wchar.cpp b/libc/bionic/wchar.cpp index badd256..0ec9504 100644 --- a/libc/bionic/wchar.cpp +++ b/libc/bionic/wchar.cpp @@ -1,215 +1,307 @@ -/* - * Copyright (C) 2008 The Android Open Source Project +/* $OpenBSD: citrus_utf8.c,v 1.6 2012/12/05 23:19:59 deraadt Exp $ */ + +/*- + * Copyright (c) 2002-2004 Tim J. Robbins * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * * Redistributions of source code must retain the above copyright + * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#include <ctype.h> #include <errno.h> -#include <limits.h> -#include <stdint.h> -#include <stdlib.h> #include <string.h> +#include <sys/param.h> #include <wchar.h> -/* stubs for wide-char functions */ - -int iswalnum(wint_t wc) { return isalnum(wc); } -int iswalpha(wint_t wc) { return isalpha(wc); } -int iswblank(wint_t wc) { return isblank(wc); } -int iswcntrl(wint_t wc) { return iscntrl(wc); } -int iswdigit(wint_t wc) { return isdigit(wc); } -int iswgraph(wint_t wc) { return isgraph(wc); } -int iswlower(wint_t wc) { return islower(wc); } -int iswprint(wint_t wc) { return isprint(wc); } -int iswpunct(wint_t wc) { return ispunct(wc); } -int iswspace(wint_t wc) { return isspace(wc); } -int iswupper(wint_t wc) { return isupper(wc); } -int iswxdigit(wint_t wc) { return isxdigit(wc); } - -int iswctype(wint_t wc, wctype_t char_class) { - switch (char_class) { - case WC_TYPE_ALNUM: return isalnum(wc); - case WC_TYPE_ALPHA: return isalpha(wc); - case WC_TYPE_BLANK: return isblank(wc); - case WC_TYPE_CNTRL: return iscntrl(wc); - case WC_TYPE_DIGIT: return isdigit(wc); - case WC_TYPE_GRAPH: return isgraph(wc); - case WC_TYPE_LOWER: return islower(wc); - case WC_TYPE_PRINT: return isprint(wc); - case WC_TYPE_PUNCT: return ispunct(wc); - case WC_TYPE_SPACE: return isspace(wc); - case WC_TYPE_UPPER: return isupper(wc); - case WC_TYPE_XDIGIT: return isxdigit(wc); - default: return 0; - } -} +// +// This file is basically OpenBSD's citrus_utf8.c but rewritten to not require a 12-byte mbstate_t +// so we're backwards-compatible with our LP32 ABI where mbstate_t was only 4 bytes. An additional +// advantage of this is that callers who don't supply their own mbstate_t won't be accessing shared +// state. +// +// We also implement the POSIX interface directly rather than being accessed via function pointers. +// + +#define ERR_ILLEGAL_SEQUENCE static_cast<size_t>(-1) +#define ERR_INCOMPLETE_SEQUENCE static_cast<size_t>(-2) -int mbsinit(const mbstate_t* /*ps*/) { +int mbsinit(const mbstate_t*) { + // We have no state, so we're always in the initial state. return 1; } -size_t mbrtowc(wchar_t* pwc, const char* s, size_t n, mbstate_t* /*ps*/) { +size_t mbrtowc(wchar_t* pwc, const char* s, size_t n, mbstate_t*) { if (s == NULL) { - return 0; + s = ""; + n = 1; + pwc = NULL; } + if (n == 0) { return 0; } - if (pwc != NULL) { - *pwc = *s; + + int ch; + if (((ch = static_cast<uint8_t>(*s)) & ~0x7f) == 0) { + // Fast path for plain ASCII characters. + if (pwc != NULL) { + *pwc = ch; + } + return (ch != '\0' ? 1 : 0); } - return (*s != 0); -} -size_t mbsnrtowcs(wchar_t* dst, const char** src, size_t n, size_t dst_size, mbstate_t* /*ps*/) { - size_t i = 0; // Number of input bytes read. - size_t o = 0; // Number of output characters written. - for (; i < n && (*src)[i] != 0; ++i) { - // TODO: UTF-8 support. - if (static_cast<uint8_t>((*src)[i]) > 0x7f) { + // Determine the number of octets that make up this character + // from the first octet, and a mask that extracts the + // interesting bits of the first octet. We already know + // the character is at least two bytes long. + int length; + int mask; + + // We also specify a lower bound for the character code to + // detect redundant, non-"shortest form" encodings. For + // example, the sequence C0 80 is _not_ a legal representation + // of the null character. This enforces a 1-to-1 mapping + // between character codes and their multibyte representations. + wchar_t lower_bound; + + ch = static_cast<uint8_t>(*s); + if ((ch & 0x80) == 0) { + mask = 0x7f; + length = 1; + lower_bound = 0; + } else if ((ch & 0xe0) == 0xc0) { + mask = 0x1f; + length = 2; + lower_bound = 0x80; + } else if ((ch & 0xf0) == 0xe0) { + mask = 0x0f; + length = 3; + lower_bound = 0x800; + } else if ((ch & 0xf8) == 0xf0) { + mask = 0x07; + length = 4; + lower_bound = 0x10000; + } else { + // Malformed input; input is not UTF-8. See RFC 3629. + errno = EILSEQ; + return ERR_ILLEGAL_SEQUENCE; + } + + // Decode the octet sequence representing the character in chunks + // of 6 bits, most significant first. + wchar_t wch = static_cast<uint8_t>(*s++) & mask; + int i; + for (i = 1; i < MIN(length, n); i++) { + if ((*s & 0xc0) != 0x80) { + // Malformed input; bad characters in the middle of a character. errno = EILSEQ; - if (dst != NULL) { - *src = &(*src)[i]; - } - return static_cast<size_t>(-1); + return ERR_ILLEGAL_SEQUENCE; } - if (dst != NULL) { - if (o + 1 > dst_size) { - break; + wch <<= 6; + wch |= *s++ & 0x3f; + } + if (i < length) { + return ERR_INCOMPLETE_SEQUENCE; + } + if (wch < lower_bound) { + // Malformed input; redundant encoding. + errno = EILSEQ; + return ERR_ILLEGAL_SEQUENCE; + } + if ((wch >= 0xd800 && wch <= 0xdfff) || wch == 0xfffe || wch == 0xffff) { + // Malformed input; invalid code points. + errno = EILSEQ; + return ERR_ILLEGAL_SEQUENCE; + } + if (pwc != NULL) { + *pwc = wch; + } + return (wch == L'\0' ? 0 : length); +} + +size_t mbsnrtowcs(wchar_t* dst, const char** src, size_t nmc, size_t len, mbstate_t* ps) { + size_t i, o, r; + + if (dst == NULL) { + for (i = o = 0; i < nmc; i += r, o++) { + if (static_cast<uint8_t>((*src)[i]) < 0x80) { + // Fast path for plain ASCII characters. + if ((*src)[i] == '\0') { + return o; + } + r = 1; + } else { + r = mbrtowc(NULL, *src + i, nmc - i, ps); + if (r == ERR_ILLEGAL_SEQUENCE) { + return r; + } + if (r == ERR_INCOMPLETE_SEQUENCE) { + return o; + } + if (r == 0) { + return o; + } } - dst[o++] = static_cast<wchar_t>((*src)[i]); - } else { - ++o; } + return o; } - // If we consumed all the input, terminate the output. - if (dst != NULL && o < dst_size) { - dst[o] = 0; - } - // If we were actually consuming input, record how far we got. - if (dst != NULL) { - if ((*src)[i] != 0) { - *src = &(*src)[i]; // This is where the next call should pick up. + + for (i = o = 0; i < nmc && o < len; i += r, o++) { + if (static_cast<uint8_t>((*src)[i]) < 0x80) { + // Fast path for plain ASCII characters. + dst[o] = (*src)[i]; + if ((*src)[i] == '\0') { + *src = NULL; + return o; + } + r = 1; } else { - *src = NULL; // We consumed everything. + r = mbrtowc(dst + o, *src + i, nmc - i, ps); + if (r == ERR_ILLEGAL_SEQUENCE) { + *src += i; + return r; + } + if (r == ERR_INCOMPLETE_SEQUENCE) { + *src += nmc; + return o; + } + if (r == 0) { + *src = NULL; + return o; + } } } + *src += i; return o; } -size_t mbsrtowcs(wchar_t* dst, const char** src, size_t dst_size, mbstate_t* ps) { - return mbsnrtowcs(dst, src, SIZE_MAX, dst_size, ps); -} - -wint_t towlower(wint_t wc) { - return tolower(wc); +size_t mbsrtowcs(wchar_t* dst, const char** src, size_t len, mbstate_t* ps) { + return mbsnrtowcs(dst, src, SIZE_MAX, len, ps); } -wint_t towupper(wint_t wc) { - return toupper(wc); -} +size_t wcrtomb(char* s, wchar_t wc, mbstate_t*) { + unsigned char lead; + int i, len; -int wctomb(char* s, wchar_t wc) { if (s == NULL) { - return 0; + // Reset to initial shift state (no-op). + return 1; + } + + if ((wc & ~0x7f) == 0) { + // Fast path for plain ASCII characters. + *s = wc; + return 1; } - if (wc <= 0xff) { - *s = static_cast<char>(wc); + + // Determine the number of octets needed to represent this character. + // We always output the shortest sequence possible. Also specify the + // first few bits of the first octet, which contains the information + // about the sequence length. + if ((wc & ~0x7f) == 0) { + lead = 0; + len = 1; + } else if ((wc & ~0x7ff) == 0) { + lead = 0xc0; + len = 2; + } else if ((wc & ~0xffff) == 0) { + lead = 0xe0; + len = 3; + } else if ((wc & ~0x1fffff) == 0) { + lead = 0xf0; + len = 4; } else { - *s = '?'; + errno = EILSEQ; + return ERR_ILLEGAL_SEQUENCE; } - return 1; -} -size_t wcrtomb(char* s, wchar_t wc, mbstate_t* /*ps*/) { - if (s == NULL) { - char buf[MB_LEN_MAX]; - return wctomb(buf, L'\0'); + // Output the octets representing the character in chunks + // of 6 bits, least significant last. The first octet is + // a special case because it contains the sequence length + // information. + for (i = len - 1; i > 0; i--) { + s[i] = (wc & 0x3f) | 0x80; + wc >>= 6; } - return wctomb(s, wc); -} + *s = (wc & 0xff) | lead; -size_t wcsftime(wchar_t* wcs, size_t maxsize, const wchar_t* format, const struct tm* timptr) { - return strftime(reinterpret_cast<char*>(wcs), maxsize, reinterpret_cast<const char*>(format), timptr); + return len; } -size_t wcsnrtombs(char* dst, const wchar_t** src, size_t n, size_t dst_size, mbstate_t* /*ps*/) { - size_t i = 0; // Number of input characters read. - size_t o = 0; // Number of output bytes written. - for (; i < n && (*src)[i] != 0; ++i) { - // TODO: UTF-8 support. - if ((*src)[i] > 0x7f) { - errno = EILSEQ; - if (dst != NULL) { - *src = &(*src)[i]; +size_t wcsnrtombs(char* dst, const wchar_t** src, size_t nwc, size_t len, mbstate_t* ps) { + char buf[MB_LEN_MAX]; + size_t i, o, r; + if (dst == NULL) { + for (i = o = 0; i < nwc; i++, o += r) { + wchar_t wc = (*src)[i]; + if (wc < 0x80) { + // Fast path for plain ASCII characters. + if (wc == 0) { + return o; + } + r = 1; + } else { + r = wcrtomb(buf, wc, ps); + if (r == ERR_ILLEGAL_SEQUENCE) { + return r; + } } - return static_cast<size_t>(-1); - } - if (dst != NULL) { - if (o + 1 > dst_size) { - break; - } - dst[o++] = static_cast<char>((*src)[i]); - } else { - ++o; } + return o; } - // If we consumed all the input, terminate the output. - if (dst != NULL && o < dst_size) { - dst[o] = 0; - } - // If we were actually consuming input, record how far we got. - if (dst != NULL) { - if ((*src)[i] != 0) { - *src = &(*src)[i]; // This is where the next call should pick up. + + for (i = o = 0; i < nwc && o < len; i++, o += r) { + wchar_t wc = (*src)[i]; + if (wc < 0x80) { + // Fast path for plain ASCII characters. + dst[o] = wc; + if (wc == 0) { + *src = NULL; + return o; + } + r = 1; + } else if (len - o >= sizeof(buf)) { + // Enough space to translate in-place. + r = wcrtomb(dst + o, wc, ps); + if (r == ERR_ILLEGAL_SEQUENCE) { + *src += i; + return r; + } } else { - *src = NULL; // We consumed everything. + // May not be enough space; use temp buffer. + r = wcrtomb(buf, wc, ps); + if (r == ERR_ILLEGAL_SEQUENCE) { + *src += i; + return r; + } + if (r > len - o) { + break; + } + memcpy(dst + o, buf, r); } } + *src += i; return o; } -size_t wcsrtombs(char* dst, const wchar_t** src, size_t dst_size, mbstate_t* ps) { - return wcsnrtombs(dst, src, SIZE_MAX, dst_size, ps); -} - -wctype_t wctype(const char* property) { - static const char* const properties[WC_TYPE_MAX] = { - "<invalid>", - "alnum", "alpha", "blank", "cntrl", "digit", "graph", - "lower", "print", "punct", "space", "upper", "xdigit" - }; - for (size_t i = 0; i < WC_TYPE_MAX; ++i) { - if (!strcmp(properties[i], property)) { - return static_cast<wctype_t>(i); - } - } - return static_cast<wctype_t>(0); -} - -int wcwidth(wchar_t wc) { - return (wc > 0); +size_t wcsrtombs(char* dst, const wchar_t** src, size_t len, mbstate_t* ps) { + return wcsnrtombs(dst, src, SIZE_MAX, len, ps); } diff --git a/libc/bionic/wctype.cpp b/libc/bionic/wctype.cpp new file mode 100644 index 0000000..4fd590a --- /dev/null +++ b/libc/bionic/wctype.cpp @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> + +// TODO: these only work for the ASCII range; rewrite to dlsym icu4c? + +int iswalnum(wint_t wc) { return isalnum(wc); } +int iswalpha(wint_t wc) { return isalpha(wc); } +int iswblank(wint_t wc) { return isblank(wc); } +int iswcntrl(wint_t wc) { return iscntrl(wc); } +int iswdigit(wint_t wc) { return isdigit(wc); } +int iswgraph(wint_t wc) { return isgraph(wc); } +int iswlower(wint_t wc) { return islower(wc); } +int iswprint(wint_t wc) { return isprint(wc); } +int iswpunct(wint_t wc) { return ispunct(wc); } +int iswspace(wint_t wc) { return isspace(wc); } +int iswupper(wint_t wc) { return isupper(wc); } +int iswxdigit(wint_t wc) { return isxdigit(wc); } + +int iswctype(wint_t wc, wctype_t char_class) { + switch (char_class) { + case WC_TYPE_ALNUM: return isalnum(wc); + case WC_TYPE_ALPHA: return isalpha(wc); + case WC_TYPE_BLANK: return isblank(wc); + case WC_TYPE_CNTRL: return iscntrl(wc); + case WC_TYPE_DIGIT: return isdigit(wc); + case WC_TYPE_GRAPH: return isgraph(wc); + case WC_TYPE_LOWER: return islower(wc); + case WC_TYPE_PRINT: return isprint(wc); + case WC_TYPE_PUNCT: return ispunct(wc); + case WC_TYPE_SPACE: return isspace(wc); + case WC_TYPE_UPPER: return isupper(wc); + case WC_TYPE_XDIGIT: return isxdigit(wc); + default: return 0; + } +} + +wint_t towlower(wint_t wc) { return tolower(wc); } +wint_t towupper(wint_t wc) { return toupper(wc); } + +wctype_t wctype(const char* property) { + static const char* const properties[WC_TYPE_MAX] = { + "<invalid>", + "alnum", "alpha", "blank", "cntrl", "digit", "graph", + "lower", "print", "punct", "space", "upper", "xdigit" + }; + for (size_t i = 0; i < WC_TYPE_MAX; ++i) { + if (!strcmp(properties[i], property)) { + return static_cast<wctype_t>(i); + } + } + return static_cast<wctype_t>(0); +} + +int wcwidth(wchar_t wc) { + return (wc > 0); +} + +// TODO: implement wcsftime. +size_t wcsftime(wchar_t* wcs, size_t maxsize, const wchar_t* format, const struct tm* timptr) { + abort(); +} |