summaryrefslogtreecommitdiffstats
path: root/libs/utils/String16.cpp
diff options
context:
space:
mode:
authorKenny Root <kroot@google.com>2010-11-09 14:37:23 -0800
committerKenny Root <kroot@google.com>2010-11-12 15:53:40 -0800
commit300ba6846949f5b21c6d93d7698dbc39716cf832 (patch)
treef00aed47d06332a09aa6909a6605b0743661c981 /libs/utils/String16.cpp
parentd781089731127bd9199d47f53b170895868b8750 (diff)
downloadframeworks_base-300ba6846949f5b21c6d93d7698dbc39716cf832.zip
frameworks_base-300ba6846949f5b21c6d93d7698dbc39716cf832.tar.gz
frameworks_base-300ba6846949f5b21c6d93d7698dbc39716cf832.tar.bz2
Split UTF functions from String8/16
Split out all the UTF-8/16/32 handling code from String8/16 to its own file to allow better reuse of code. Change-Id: If9ce63920edc75472c38da4adce0d13cda9ad2f7
Diffstat (limited to 'libs/utils/String16.cpp')
-rw-r--r--libs/utils/String16.cpp253
1 files changed, 19 insertions, 234 deletions
diff --git a/libs/utils/String16.cpp b/libs/utils/String16.cpp
index eab7b2b..4ce1664 100644
--- a/libs/utils/String16.cpp
+++ b/libs/utils/String16.cpp
@@ -18,228 +18,17 @@
#include <utils/Debug.h>
#include <utils/Log.h>
+#include <utils/Unicode.h>
#include <utils/String8.h>
#include <utils/TextOutput.h>
#include <utils/threads.h>
#include <private/utils/Static.h>
-#ifdef HAVE_WINSOCK
-# undef nhtol
-# undef htonl
-# undef nhtos
-# undef htons
-
-# ifdef HAVE_LITTLE_ENDIAN
-# define ntohl(x) ( ((x) << 24) | (((x) >> 24) & 255) | (((x) << 8) & 0xff0000) | (((x) >> 8) & 0xff00) )
-# define htonl(x) ntohl(x)
-# define ntohs(x) ( (((x) << 8) & 0xff00) | (((x) >> 8) & 255) )
-# define htons(x) ntohs(x)
-# else
-# define ntohl(x) (x)
-# define htonl(x) (x)
-# define ntohs(x) (x)
-# define htons(x) (x)
-# endif
-#else
-# include <netinet/in.h>
-#endif
-
#include <memory.h>
#include <stdio.h>
#include <ctype.h>
-// ---------------------------------------------------------------------------
-
-int strcmp16(const char16_t *s1, const char16_t *s2)
-{
- char16_t ch;
- int d = 0;
-
- while ( 1 ) {
- d = (int)(ch = *s1++) - (int)*s2++;
- if ( d || !ch )
- break;
- }
-
- return d;
-}
-
-int strncmp16(const char16_t *s1, const char16_t *s2, size_t n)
-{
- char16_t ch;
- int d = 0;
-
- while ( n-- ) {
- d = (int)(ch = *s1++) - (int)*s2++;
- if ( d || !ch )
- break;
- }
-
- return d;
-}
-
-char16_t *strcpy16(char16_t *dst, const char16_t *src)
-{
- char16_t *q = dst;
- const char16_t *p = src;
- char16_t ch;
-
- do {
- *q++ = ch = *p++;
- } while ( ch );
-
- return dst;
-}
-
-size_t strlen16(const char16_t *s)
-{
- const char16_t *ss = s;
- while ( *ss )
- ss++;
- return ss-s;
-}
-
-
-char16_t *strncpy16(char16_t *dst, const char16_t *src, size_t n)
-{
- char16_t *q = dst;
- const char16_t *p = src;
- char ch;
-
- while (n) {
- n--;
- *q++ = ch = *p++;
- if ( !ch )
- break;
- }
-
- *q = 0;
-
- return dst;
-}
-
-size_t strnlen16(const char16_t *s, size_t maxlen)
-{
- const char16_t *ss = s;
-
- /* Important: the maxlen test must precede the reference through ss;
- since the byte beyond the maximum may segfault */
- while ((maxlen > 0) && *ss) {
- ss++;
- maxlen--;
- }
- return ss-s;
-}
-
-int strzcmp16(const char16_t *s1, size_t n1, const char16_t *s2, size_t n2)
-{
- const char16_t* e1 = s1+n1;
- const char16_t* e2 = s2+n2;
-
- while (s1 < e1 && s2 < e2) {
- const int d = (int)*s1++ - (int)*s2++;
- if (d) {
- return d;
- }
- }
-
- return n1 < n2
- ? (0 - (int)*s2)
- : (n1 > n2
- ? ((int)*s1 - 0)
- : 0);
-}
-
-int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2)
-{
- const char16_t* e1 = s1H+n1;
- const char16_t* e2 = s2N+n2;
-
- while (s1H < e1 && s2N < e2) {
- const char16_t c2 = ntohs(*s2N);
- const int d = (int)*s1H++ - (int)c2;
- s2N++;
- if (d) {
- return d;
- }
- }
-
- return n1 < n2
- ? (0 - (int)ntohs(*s2N))
- : (n1 > n2
- ? ((int)*s1H - 0)
- : 0);
-}
-
-static inline size_t
-utf8_char_len(uint8_t ch)
-{
- return ((0xe5000000 >> ((ch >> 3) & 0x1e)) & 3) + 1;
-}
-
-#define UTF8_SHIFT_AND_MASK(unicode, byte) (unicode)<<=6; (unicode) |= (0x3f & (byte));
-
-static inline uint32_t
-utf8_to_utf32(const uint8_t *src, size_t length)
-{
- uint32_t unicode;
-
- switch (length)
- {
- case 1:
- return src[0];
- case 2:
- unicode = src[0] & 0x1f;
- UTF8_SHIFT_AND_MASK(unicode, src[1])
- return unicode;
- case 3:
- unicode = src[0] & 0x0f;
- UTF8_SHIFT_AND_MASK(unicode, src[1])
- UTF8_SHIFT_AND_MASK(unicode, src[2])
- return unicode;
- case 4:
- unicode = src[0] & 0x07;
- UTF8_SHIFT_AND_MASK(unicode, src[1])
- UTF8_SHIFT_AND_MASK(unicode, src[2])
- UTF8_SHIFT_AND_MASK(unicode, src[3])
- return unicode;
- default:
- return 0xffff;
- }
-
- //printf("Char at %p: len=%d, utf-16=%p\n", src, length, (void*)result);
-}
-
-void
-utf8_to_utf16(const uint8_t *src, size_t srcLen,
- char16_t* dst, const size_t dstLen)
-{
- const uint8_t* const end = src + srcLen;
- const char16_t* const dstEnd = dst + dstLen;
- while (src < end && dst < dstEnd) {
- size_t len = utf8_char_len(*src);
- uint32_t codepoint = utf8_to_utf32((const uint8_t*)src, len);
-
- // Convert the UTF32 codepoint to one or more UTF16 codepoints
- if (codepoint <= 0xFFFF) {
- // Single UTF16 character
- *dst++ = (char16_t) codepoint;
- } else {
- // Multiple UTF16 characters with surrogates
- codepoint = codepoint - 0x10000;
- *dst++ = (char16_t) ((codepoint >> 10) + 0xD800);
- *dst++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
- }
-
- src += len;
- }
- if (dst < dstEnd) {
- *dst = 0;
- }
-}
-
-// ---------------------------------------------------------------------------
namespace android {
@@ -270,37 +59,33 @@ void terminate_string16()
// ---------------------------------------------------------------------------
-static char16_t* allocFromUTF8(const char* in, size_t len)
+static char16_t* allocFromUTF8(const char* u8str, size_t u8len)
{
- if (len == 0) return getEmptyString();
-
- size_t chars = 0;
- const char* end = in+len;
- const char* p = in;
-
- while (p < end) {
- chars++;
- int utf8len = utf8_char_len(*p);
- uint32_t codepoint = utf8_to_utf32((const uint8_t*)p, utf8len);
- if (codepoint > 0xFFFF) chars++; // this will be a surrogate pair in utf16
- p += utf8len;
+ if (u8len == 0) return getEmptyString();
+
+ const uint8_t* u8cur = (const uint8_t*) u8str;
+
+ const ssize_t u16len = utf8_to_utf16_length(u8cur, u8len);
+ if (u16len < 0) {
+ return getEmptyString();
}
-
- size_t bufSize = (chars+1)*sizeof(char16_t);
- SharedBuffer* buf = SharedBuffer::alloc(bufSize);
+
+ const uint8_t* const u8end = u8cur + u8len;
+
+ SharedBuffer* buf = SharedBuffer::alloc(sizeof(char16_t)*(u16len+1));
if (buf) {
- p = in;
- char16_t* str = (char16_t*)buf->data();
-
- utf8_to_utf16((const uint8_t*)p, len, str, bufSize);
+ u8cur = (const uint8_t*) u8str;
+ char16_t* u16str = (char16_t*)buf->data();
+
+ utf8_to_utf16(u8cur, u8len, u16str);
//printf("Created UTF-16 string from UTF-8 \"%s\":", in);
//printHexData(1, str, buf->size(), 16, 1);
//printf("\n");
- return str;
+ return u16str;
}
-
+
return getEmptyString();
}