diff options
author | Daiki Ueno <ueno@gnu.org> | 2015-11-17 16:26:33 +0900 |
---|---|---|
committer | Daiki Ueno <ueno@gnu.org> | 2015-11-17 16:26:33 +0900 |
commit | b780a0a376e725d62c154aa60a7cded6d7570b16 (patch) | |
tree | 341a78ac56360134bda72b176f3d6d232d123572 | |
parent | 26200674f0a9769db79d073fa9b692666e926c59 (diff) | |
download | external_gettext-b780a0a376e725d62c154aa60a7cded6d7570b16.zip external_gettext-b780a0a376e725d62c154aa60a7cded6d7570b16.tar.gz external_gettext-b780a0a376e725d62c154aa60a7cded6d7570b16.tar.bz2 |
cldr-plurals: Rewrite XML handling using libxml2
* src/cldr-plurals.c: Include <libxml/tree.h> and
<libxml/parser.h>, instead of "libexpat-compat.h".
(extract_locale, extracted_rules, parser, logical_file_name)
(struct element_state, stack, stack_size, stack_depth)
(ensure_stack_size, start_element_handler, end_element_handler)
(character_data_handler): Remove.
(extract_rules): Rename from extract_rule and rewrite using DOM.
(main): Use extract_rules.
* src/Makefile.am (cldr_plurals_CFLAGS): Add $(INCXML).
-rw-r--r-- | gettext-tools/src/Makefile.am | 1 | ||||
-rw-r--r-- | gettext-tools/src/cldr-plurals.c | 385 |
2 files changed, 127 insertions, 259 deletions
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index d88b8fd..c92cabe 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -239,6 +239,7 @@ recode_sr_latin_SOURCES = recode-sr-latin.c filter-sr-latin.c hostname_SOURCES = hostname.c urlget_SOURCES = urlget.c cldr_plurals_SOURCES = cldr-plural.y cldr-plural-exp.c cldr-plurals.c +cldr_plurals_CFLAGS = $(AM_CFLAGS) $(INCXML) cldr_plurals_LDADD = libgettextsrc.la $(LDADD) # How to build libgettextsrc.la. diff --git a/gettext-tools/src/cldr-plurals.c b/gettext-tools/src/cldr-plurals.c index c73ff96..b8cb005 100644 --- a/gettext-tools/src/cldr-plurals.c +++ b/gettext-tools/src/cldr-plurals.c @@ -27,7 +27,8 @@ #include <error.h> #include <getopt.h> #include "gettext.h" -#include "libexpat-compat.h" +#include <libxml/tree.h> +#include <libxml/parser.h> #include <locale.h> #include "progname.h" #include "propername.h" @@ -38,270 +39,141 @@ #define _(s) gettext(s) -#if DYNLOAD_LIBEXPAT || HAVE_LIBEXPAT -/* Locale name to extract. */ -static char *extract_locale; -/* CLDR plural rules extracted from XML. */ -static char *extracted_rules; - -/* XML parser. */ -static XML_Parser parser; - -/* Logical filename, used to label the extracted messages. */ -static char *logical_file_name; - -struct element_state -{ - bool extract_rules; - bool extract_string; - char *count; - int lineno; - char *buffer; - size_t bufmax; - size_t buflen; -}; -static struct element_state *stack; -static size_t stack_size; -static size_t stack_depth; - -/* Ensures stack_size >= size. */ -static void -ensure_stack_size (size_t size) +static char * +extract_rules (FILE *fp, + const char *real_filename, const char *logical_filename, + const char *locale) { - if (size > stack_size) - { - stack_size = 2 * stack_size; - if (stack_size < size) - stack_size = size; - stack = - (struct element_state *) - xrealloc (stack, stack_size * sizeof (struct element_state)); - } -} - -/* Callback called when <element> is seen. */ -static void -start_element_handler (void *userData, const char *name, - const char **attributes) -{ - struct element_state *p; + xmlDocPtr doc; + xmlNodePtr node, n; + size_t locale_length; + char *buffer = NULL, *p; + size_t bufmax = 0; + size_t buflen = 0; + + doc = xmlReadFd (fileno (fp), logical_filename, NULL, + XML_PARSE_NONET + | XML_PARSE_NOWARNING + | XML_PARSE_NOERROR + | XML_PARSE_NOBLANKS); + if (doc == NULL) + error (EXIT_FAILURE, 0, _("memory exhausted")); - if (!stack_depth && strcmp (name, "supplementalData") != 0) + node = xmlDocGetRootElement (doc); + if (!xmlStrEqual (node->name, BAD_CAST "supplementalData")) { error_at_line (0, 0, - logical_file_name, - XML_GetCurrentLineNumber (parser), + logical_filename, + xmlGetLineNo (node), _("\ The root element <%s> is not allowed in a valid CLDR file"), - name); + node->name); + return NULL; } - /* Increase stack depth. */ - stack_depth++; - ensure_stack_size (stack_depth + 1); - - p = &stack[stack_depth]; - p->count = NULL; - p->extract_rules = false; - p->extract_string = false; - p->lineno = XML_GetCurrentLineNumber (parser); - p->buffer = NULL; - p->bufmax = 0; - p->buflen = 0; - - if (strcmp (name, "pluralRules") == 0) - { - const char *locales = NULL; - const char **attp = attributes; - while (*attp != NULL) - { - if (strcmp (attp[0], "locales") == 0) - locales = attp[1]; - attp += 2; - } - if (locales) - { - const char *cp = locales; - size_t length = strlen (extract_locale); - while (*cp) - { - while (c_isspace (*cp)) - cp++; - if (strncmp (cp, extract_locale, length) == 0 - && (*(cp + length) == ' ' - || *(cp + length) == '\n' - || *(cp + length) == '\0')) - { - p->extract_rules = true; - break; - } - while (*cp && !c_isspace (*cp)) - cp++; - } - } - } - else if (stack_depth > 1 && strcmp (name, "pluralRule") == 0) + for (n = node->children; n; n = n->next) { - struct element_state *parent = &stack[stack_depth - 1]; - - p->extract_string = parent->extract_rules; - if (p->extract_string) + if (xmlStrEqual (n->name, BAD_CAST "plurals")) { - const char *count = NULL; - const char **attp = attributes; - while (*attp != NULL) - { - if (strcmp (attp[0], "count") == 0) - count = attp[1]; - attp += 2; - } - p->count = xstrdup (count); + node = n; + break; } } -} -/* Callback called when </element> is seen. */ -static void -end_element_handler (void *userData, const char *name) -{ - struct element_state *p = &stack[stack_depth]; - - if (p->extract_string && strcmp (name, "pluralRule") == 0) + locale_length = strlen (locale); + for (n = node->children; n; n = n->next) { - struct element_state *parent = &stack[stack_depth - 1]; - size_t length; + xmlChar *locales; + xmlChar *cp; + xmlNodePtr n2; + bool found = false; - /* NUL terminate the buffer. */ - if (p->buflen > 0) - { - if (p->buflen == p->bufmax) - p->buffer = (char *) xrealloc (p->buffer, p->buflen + 1); - p->buffer[p->buflen] = '\0'; - } + if (!xmlStrEqual (n->name, BAD_CAST "pluralRules")) + continue; - length = strlen (p->count) + strlen (": ") - + p->buflen + strlen ("; "); - if (parent->buflen + length + 1 > parent->bufmax) + if (!xmlHasProp (n, BAD_CAST "locales")) { - parent->bufmax = 2 * parent->bufmax; - if (parent->bufmax < parent->buflen + length + 1) - parent->bufmax = parent->buflen + length + 1; - parent->buffer = (char *) xrealloc (parent->buffer, parent->bufmax); + error_at_line (0, 0, + logical_filename, + xmlGetLineNo (n), + _("\ +The element <%s> does not have attribute <%s>"), + "pluralRules", "locales"); + continue; } - sprintf (parent->buffer + parent->buflen, - "%s: %s; ", - p->count, p->buffer == NULL ? "" : p->buffer); - parent->buflen += length; - parent->buffer[parent->buflen] = '\0'; - } - else if (p->extract_rules && strcmp (name, "pluralRules") == 0) - { - char *cp; - /* NUL terminate the buffer. */ - if (p->buflen > 0) + cp = locales = xmlGetProp (n, BAD_CAST "locales"); + while (*cp != '\0') { - if (p->buflen == p->bufmax) - p->buffer = (char *) xrealloc (p->buffer, p->buflen + 1); - p->buffer[p->buflen] = '\0'; + while (c_isspace (*cp)) + cp++; + if (xmlStrncmp (cp, BAD_CAST locale, locale_length) == 0 + && (*(cp + locale_length) == '\0' + || c_isspace (*(cp + locale_length)))) + { + found = true; + break; + } + while (*cp && !c_isspace (*cp)) + cp++; } + xmlFree (locales); - /* Scrub the last semicolon, if any. */ - cp = strrchr (p->buffer, ';'); - if (cp) - *cp = '\0'; - extracted_rules = xstrdup (p->buffer); - } - - /* Free memory for this stack level. */ - if (p->count != NULL) - free (p->count); - if (p->buffer != NULL) - free (p->buffer); - - /* Decrease stack depth. */ - stack_depth--; -} - -/* Callback called when some text is seen. */ -static void -character_data_handler (void *userData, const char *s, int len) -{ - struct element_state *p = &stack[stack_depth]; + if (!found) + continue; - /* Accumulate character data. */ - if (p->extract_string && len > 0) - { - if (p->buflen + len > p->bufmax) + for (n2 = n->children; n2; n2 = n2->next) { - p->bufmax = 2 * p->bufmax; - if (p->bufmax < p->buflen + len) - p->bufmax = p->buflen + len; - p->buffer = (char *) xrealloc (p->buffer, p->bufmax); - } - memcpy (p->buffer + p->buflen, s, len); - p->buflen += len; - } -} + xmlChar *count; + xmlChar *content; + size_t length; -static void -extract_rule (FILE *fp, - const char *real_filename, const char *logical_filename, - const char *locale) -{ - logical_file_name = xstrdup (logical_filename); - extract_locale = xstrdup (locale); + if (!xmlStrEqual (n2->name, BAD_CAST "pluralRule")) + continue; - parser = XML_ParserCreate (NULL); - if (parser == NULL) - error (EXIT_FAILURE, 0, _("memory exhausted")); + if (!xmlHasProp (n2, BAD_CAST "count")) + { + error_at_line (0, 0, + logical_filename, + xmlGetLineNo (n2), + _("\ +The element <%s> does not have attribute <%s>"), + "pluralRule", "count"); + break; + } - XML_SetElementHandler (parser, start_element_handler, end_element_handler); - XML_SetCharacterDataHandler (parser, character_data_handler); + count = xmlGetProp (n2, BAD_CAST "count"); + content = xmlNodeGetContent (n2); + length = xmlStrlen (count) + strlen (": ") + + xmlStrlen (content) + strlen ("; "); - stack_depth = 0; + if (buflen + length + 1 > bufmax) + { + bufmax *= 2; + if (bufmax < buflen + length + 1) + bufmax = buflen + length + 1; + buffer = (char *) xrealloc (buffer, bufmax); + } - while (!feof (fp)) - { - char buf[4096]; - int count = fread (buf, 1, sizeof buf, fp); + sprintf (buffer + buflen, "%s: %s; ", count, content); + xmlFree (count); + xmlFree (content); - if (count == 0) - { - if (ferror (fp)) - error (EXIT_FAILURE, errno, _("\ -error while reading \"%s\""), real_filename); - /* EOF reached. */ - break; + buflen += length; + buffer[buflen] = '\0'; } - - if (XML_Parse (parser, buf, count, 0) == 0) - error (EXIT_FAILURE, 0, _("%s:%lu:%lu: %s"), logical_filename, - (unsigned long) XML_GetCurrentLineNumber (parser), - (unsigned long) XML_GetCurrentColumnNumber (parser) + 1, - XML_ErrorString (XML_GetErrorCode (parser))); } - if (XML_Parse (parser, NULL, 0, 1) == 0) - error (EXIT_FAILURE, 0, _("%s:%lu:%lu: %s"), logical_filename, - (unsigned long) XML_GetCurrentLineNumber (parser), - (unsigned long) XML_GetCurrentColumnNumber (parser) + 1, - XML_ErrorString (XML_GetErrorCode (parser))); - - /* Close scanner. */ - free (logical_file_name); - logical_file_name = NULL; - - free (extract_locale); - extract_locale = NULL; + /* Scrub the last semicolon, if any. */ + p = strrchr (buffer, ';'); + if (p) + *p = '\0'; - XML_ParserFree (parser); - parser = NULL; + xmlFreeDoc (doc); + return buffer; } -#endif - /* Display usage information and exit. */ static void usage (int status) @@ -422,42 +294,37 @@ There is NO WARRANTY, to the extent permitted by law.\n\ if (argc == optind + 2) { /* Two arguments: Read CLDR rules from a file. */ -#if DYNLOAD_LIBEXPAT || HAVE_LIBEXPAT - if (LIBEXPAT_AVAILABLE ()) - { - const char *locale = argv[optind]; - const char *logical_filename = argv[optind + 1]; - FILE *fp; - - fp = fopen (logical_filename, "r"); - if (fp == NULL) - error (1, 0, _("%s cannot be read"), logical_filename); - - extract_rule (fp, logical_filename, logical_filename, locale); - fclose (fp); - if (extracted_rules == NULL) - error (1, 0, _("cannot extract rules for %s"), locale); - - if (opt_cldr_format) - printf ("%s\n", extracted_rules); - else - { - struct cldr_plural_rule_list_ty *result; + const char *locale = argv[optind]; + const char *logical_filename = argv[optind + 1]; + char *extracted_rules; + FILE *fp; - result = cldr_plural_parse (extracted_rules); - if (result == NULL) - error (1, 0, _("cannot parse CLDR rule")); + LIBXML_TEST_VERSION - cldr_plural_rule_list_print (result, stdout); - cldr_plural_rule_list_free (result); - } - free (extracted_rules); - } + fp = fopen (logical_filename, "r"); + if (fp == NULL) + error (1, 0, _("%s cannot be read"), logical_filename); + + extracted_rules = extract_rules (fp, logical_filename, logical_filename, + locale); + fclose (fp); + if (extracted_rules == NULL) + error (1, 0, _("cannot extract rules for %s"), locale); + + if (opt_cldr_format) + printf ("%s\n", extracted_rules); else -#endif { - error (1, 0, _("extraction is not supported")); + struct cldr_plural_rule_list_ty *result; + + result = cldr_plural_parse (extracted_rules); + if (result == NULL) + error (1, 0, _("cannot parse CLDR rule")); + + cldr_plural_rule_list_print (result, stdout); + cldr_plural_rule_list_free (result); } + free (extracted_rules); } else if (argc == optind) { |