diff options
Diffstat (limited to 'third_party/harfbuzz/contrib/harfbuzz-unicode.c')
-rw-r--r-- | third_party/harfbuzz/contrib/harfbuzz-unicode.c | 264 |
1 files changed, 264 insertions, 0 deletions
diff --git a/third_party/harfbuzz/contrib/harfbuzz-unicode.c b/third_party/harfbuzz/contrib/harfbuzz-unicode.c new file mode 100644 index 0000000..9b3c43e --- /dev/null +++ b/third_party/harfbuzz/contrib/harfbuzz-unicode.c @@ -0,0 +1,264 @@ +#include <stdint.h> +#include <stdlib.h> + +#include <harfbuzz-external.h> +#include <harfbuzz-impl.h> +#include <harfbuzz-shaper.h> +#include "harfbuzz-unicode.h" + +#include "tables/script-properties.h" +#include "tables/grapheme-break-properties.h" + +uint32_t +utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) { + const uint16_t v = chars[(*iter)++]; + if (HB_IsHighSurrogate(v)) { + // surrogate pair + if (*iter >= len) { + // the surrogate is incomplete. + return HB_InvalidCodePoint; + } + const uint16_t v2 = chars[(*iter)++]; + if (!HB_IsLowSurrogate(v2)) { + // invalidate surrogate pair. + return HB_InvalidCodePoint; + } + + return HB_SurrogateToUcs4(v, v2); + } + + if (HB_IsLowSurrogate(v)) { + // this isn't a valid code point + return HB_InvalidCodePoint; + } + + return v; +} + +uint32_t +utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) { + const uint16_t v = chars[(*iter)--]; + if (HB_IsLowSurrogate(v)) { + // surrogate pair + if (*iter < 0) { + // the surrogate is incomplete. + return HB_InvalidCodePoint; + } + const uint16_t v2 = chars[(*iter)--]; + if (!HB_IsHighSurrogate(v2)) { + // invalidate surrogate pair. + return HB_InvalidCodePoint; + } + + return HB_SurrogateToUcs4(v2, v); + } + + if (HB_IsHighSurrogate(v)) { + // this isn't a valid code point + return HB_InvalidCodePoint; + } + + return v; +} + +static int +script_property_cmp(const void *vkey, const void *vcandidate) { + const uint32_t key = (uint32_t) (intptr_t) vkey; + const struct script_property *candidate = vcandidate; + + if (key < candidate->range_start) { + return -1; + } else if (key > candidate->range_end) { + return 1; + } else { + return 0; + } +} + +HB_Script +code_point_to_script(uint32_t cp) { + const void *vprop = bsearch((void *) (intptr_t) cp, script_properties, + script_properties_count, + sizeof(struct script_property), + script_property_cmp); + if (!vprop) + return HB_Script_Common; + + return ((const struct script_property *) vprop)->script; +} + +char +hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output, + const uint16_t *chars, size_t len, ssize_t *iter) { + if (*iter == len) + return 0; + + output->pos = *iter; + const uint32_t init_cp = utf16_to_code_point(chars, len, iter); + unsigned cps = 1; + if (init_cp == HB_InvalidCodePoint) + return 0; + const HB_Script init_script = code_point_to_script(init_cp); + HB_Script current_script = init_script; + output->script = init_script; + + for (;;) { + if (*iter == len) + break; + const ssize_t prev_iter = *iter; + const uint32_t cp = utf16_to_code_point(chars, len, iter); + if (cp == HB_InvalidCodePoint) + return 0; + cps++; + const HB_Script script = code_point_to_script(cp); + + if (script != current_script) { + if (current_script == init_script == HB_Script_Inherited) { + // If we started off as inherited, we take whatever we can find. + output->script = script; + current_script = script; + continue; + } else if (script == HB_Script_Inherited) { + current_script = script; + continue; + } else { + *iter = prev_iter; + cps--; + break; + } + } + } + + if (output->script == HB_Script_Inherited) + output->script = HB_Script_Common; + + output->length = *iter - output->pos; + if (num_code_points) + *num_code_points = cps; + return 1; +} + +char +hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output, + const uint16_t *chars, size_t len, ssize_t *iter) { + if (*iter == (size_t) -1) + return 0; + + const size_t ending_index = *iter; + const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter); + unsigned cps = 1; + if (init_cp == HB_InvalidCodePoint) + return 0; + const HB_Script init_script = code_point_to_script(init_cp); + HB_Script current_script = init_script; + output->script = init_script; + + for (;;) { + if (*iter < 0) + break; + const ssize_t prev_iter = *iter; + const uint32_t cp = utf16_to_code_point_prev(chars, len, iter); + if (cp == HB_InvalidCodePoint) + return 0; + cps++; + const HB_Script script = code_point_to_script(cp); + + if (script != current_script) { + if (current_script == init_script == HB_Script_Inherited) { + // If we started off as inherited, we take whatever we can find. + output->script = script; + current_script = script; + continue; + } else if (script == HB_Script_Inherited) { + current_script = script; + continue; + } else { + *iter = prev_iter; + cps--; + break; + } + } + } + + if (output->script == HB_Script_Inherited) + output->script = HB_Script_Common; + + output->pos = *iter + 1; + output->length = ending_index - *iter; + if (num_code_points) + *num_code_points = cps; + return 1; +} + +static int +grapheme_break_property_cmp(const void *vkey, const void *vcandidate) { + const uint32_t key = (uint32_t) (intptr_t) vkey; + const struct grapheme_break_property *candidate = vcandidate; + + if (key < candidate->range_start) { + return -1; + } else if (key > candidate->range_end) { + return 1; + } else { + return 0; + } +} + +HB_GraphemeClass +HB_GetGraphemeClass(HB_UChar32 ch) { + const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties, + grapheme_break_properties_count, + sizeof(struct grapheme_break_property), + grapheme_break_property_cmp); + if (!vprop) + return HB_Grapheme_Other; + + return ((const struct grapheme_break_property *) vprop)->klass; +} + +HB_WordClass +HB_GetWordClass(HB_UChar32 ch) { + abort(); + return 0; +} + +HB_SentenceClass +HB_GetSentenceClass(HB_UChar32 ch) { + abort(); + return 0; +} + +void +HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) { + *gclass = HB_GetGraphemeClass(ch); + *breakclass = HB_GetLineBreakClass(ch); +} + +HB_UChar16 +HB_GetMirroredChar(HB_UChar16 ch) { + abort(); + return 0; +} + +void * +HB_Library_Resolve(const char *library, const char *symbol) { + abort(); + return NULL; +} + +void * +HB_TextCodecForMib(int mib) { + abort(); + return NULL; +} + +char * +HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength) { + abort(); + return NULL; +} + +void +HB_TextCodec_FreeResult(char *v) { + abort(); +} |