summaryrefslogtreecommitdiffstats
path: root/gnulib-local
diff options
context:
space:
mode:
authorDaiki Ueno <ueno@gnu.org>2015-12-09 17:35:34 +0900
committerDaiki Ueno <ueno@gnu.org>2015-12-09 19:07:06 +0900
commit898e184a596c43abf1067089a03df3e79b4e4527 (patch)
treee9f5596bb75f8a0ba47f9b34d26346f53d981613 /gnulib-local
parentf6dde6baeef8e6cb5ec92bc6c67c5c0304ba4396 (diff)
downloadexternal_gettext-898e184a596c43abf1067089a03df3e79b4e4527.zip
external_gettext-898e184a596c43abf1067089a03df3e79b4e4527.tar.gz
external_gettext-898e184a596c43abf1067089a03df3e79b4e4527.tar.bz2
build: Remove expat dependency
* DEPENDENCIES: Suggest libxml2 instead of expat. * gnulib-local/lib/markup.c: New file. * gnulib-local/lib/markup.h: New file. * gnulib-local/modules/markup: New file. * autogen.sh (GNULIB_MODULES_LIBGETTEXTPO): Add markup module. * gettext-tools/configure.ac: Remove checks for expat. * gettext-tools/gnulib-lib/.gitignore: Ignore modules pulled by gnulib-tool due to the markup module usage. * gettext-tools/gnulib-tests/.gitignore: Likewise. * gettext-tools/libgettextpo/.gitignore: Likewise. * gettext-tools/libgettextpo/Makefile.am (libgettextpo_la_AUXSOURCES): Remove ../src/libexpat-compat.c. (libgettextpo_la_LDFLAGS): Remove @LTLIBEXPAT@. * gettext-tools/src/Makefile.am (noinst_HEADERS): Remove libexpat-compat.h. (libgettextsrc_la_SOURCES): Remove libexpat-compat.c. (libgettextsrc_la_LDFLAGS): Remove @LTLIBEXPAT@. * gettext-tools/src/format-kde-kuit.c: Use functions from markup.h, when the file is being compiled as part of libgettextpo. Otherwise use libxml2. * gettext-tools/src/libexpat-compat.c: Remove. * gettext-tools/src/libexpat-compat.h: Remove.
Diffstat (limited to 'gnulib-local')
-rw-r--r--gnulib-local/lib/markup.c1523
-rw-r--r--gnulib-local/lib/markup.h164
-rw-r--r--gnulib-local/modules/markup31
3 files changed, 1718 insertions, 0 deletions
diff --git a/gnulib-local/lib/markup.c b/gnulib-local/lib/markup.c
new file mode 100644
index 0000000..a0f6856
--- /dev/null
+++ b/gnulib-local/lib/markup.c
@@ -0,0 +1,1523 @@
+/* markup.c -- simple XML-like parser
+ Copyright (C) 2015 Free Software Foundation, Inc.
+
+ This file is not part of the GNU gettext program, but is used with
+ GNU gettext.
+
+ This is a stripped down version of GLib's gmarkup.c. The original
+ copyright notice is as follows:
+*/
+
+/* gmarkup.c - Simple XML-like parser
+ *
+ * Copyright 2000, 2003 Red Hat, Inc.
+ * Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca>
+ *
+ * GLib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * GLib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with GLib; see the file COPYING.LIB. If not,
+ * see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+/* Specification */
+#include "markup.h"
+
+#include "c-ctype.h"
+#include "gettext.h"
+#include "gl_linked_list.h"
+#include "gl_xlist.h"
+#include "unictype.h"
+#include "unistr.h"
+#include "xalloc.h"
+#include "xvasprintf.h"
+
+#define _(s) gettext(s)
+
+/**
+ * The "markup" parser is intended to parse a simple markup format
+ * that's a subset of XML. This is a small, efficient, easy-to-use
+ * parser. It should not be used if you expect to interoperate with
+ * other applications generating full-scale XML. However, it's very
+ * useful for application data files, config files, etc. where you
+ * know your application will be the only one writing the file.
+ * Full-scale XML parsers should be able to parse the subset used by
+ * markup, so you can easily migrate to full-scale XML at a later
+ * time if the need arises.
+ *
+ * The parser is not guaranteed to signal an error on all invalid XML;
+ * the parser may accept documents that an XML parser would not.
+ * However, XML documents which are not well-formed (which is a weaker
+ * condition than being valid. See the XML specification
+ * <http://www.w3.org/TR/REC-xml/> for definitions of these terms.)
+ * are not considered valid GMarkup documents.
+ *
+ * Simplifications to XML include:
+ *
+ * - Only UTF-8 encoding is allowed
+ *
+ * - No user-defined entities
+ *
+ * - Processing instructions, comments and the doctype declaration
+ * are "passed through" but are not interpreted in any way
+ *
+ * - No DTD or validation
+ *
+ * The markup format does support:
+ *
+ * - Elements
+ *
+ * - Attributes
+ *
+ * - 5 standard entities: &amp; &lt; &gt; &quot; &apos;
+ *
+ * - Character references
+ *
+ * - Sections marked as CDATA
+ */
+
+typedef enum
+{
+ STATE_START,
+ STATE_AFTER_OPEN_ANGLE,
+ STATE_AFTER_CLOSE_ANGLE,
+ STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
+ STATE_INSIDE_OPEN_TAG_NAME,
+ STATE_INSIDE_ATTRIBUTE_NAME,
+ STATE_AFTER_ATTRIBUTE_NAME,
+ STATE_BETWEEN_ATTRIBUTES,
+ STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
+ STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
+ STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
+ STATE_INSIDE_TEXT,
+ STATE_AFTER_CLOSE_TAG_SLASH,
+ STATE_INSIDE_CLOSE_TAG_NAME,
+ STATE_AFTER_CLOSE_TAG_NAME,
+ STATE_INSIDE_PASSTHROUGH,
+ STATE_ERROR
+} markup_parse_state_ty;
+
+typedef struct
+{
+ const char *prev_element;
+ const markup_parser_ty *prev_parser;
+ void *prev_user_data;
+} markup_recursion_tracker_ty;
+
+typedef struct
+{
+ char *buffer;
+ size_t bufmax;
+ size_t buflen;
+} markup_string_ty;
+
+struct _markup_parse_context_ty
+{
+ const markup_parser_ty *parser;
+
+ markup_parse_flags_ty flags;
+
+ int line_number;
+ int char_number;
+
+ markup_parse_state_ty state;
+
+ void *user_data;
+
+ /* A piece of character data or an element that
+ * hasn't "ended" yet so we haven't yet called
+ * the callback for it.
+ */
+ markup_string_ty *partial_chunk;
+
+ gl_list_t tag_stack; /* <markup_string_ty> */
+
+ char **attr_names;
+ char **attr_values;
+ int cur_attr;
+ int alloc_attrs;
+
+ const char *current_text;
+ ssize_t current_text_len;
+ const char *current_text_end;
+
+ /* used to save the start of the last interesting thingy */
+ const char *start;
+
+ const char *iter;
+
+ char *error_text;
+
+ unsigned int document_empty : 1;
+ unsigned int parsing : 1;
+ unsigned int awaiting_pop : 1;
+ int balance;
+
+ /* subparser support */
+ gl_list_t subparser_stack; /* <markup_recursion_tracker_ty *> */
+ const char *subparser_element;
+};
+
+static markup_string_ty *
+markup_string_new (void)
+{
+ return XZALLOC (markup_string_ty);
+}
+
+static char *
+markup_string_free (markup_string_ty *string, bool free_segment)
+{
+ if (free_segment)
+ {
+ free (string->buffer);
+ free (string);
+ return NULL;
+ }
+ else
+ {
+ char *result = string->buffer;
+ free (string);
+ return result;
+ }
+}
+
+static void
+markup_string_free1 (markup_string_ty *string)
+{
+ markup_string_free (string, true);
+}
+
+static void
+markup_string_truncate (markup_string_ty *string, size_t length)
+{
+ assert (string && length < string->buflen - 1);
+ string->buffer[length] = '\0';
+ string->buflen = length;
+}
+
+static void
+markup_string_append (markup_string_ty *string, const char *to_append,
+ size_t length)
+{
+ if (string->buflen + length + 1 > string->bufmax)
+ {
+ string->bufmax *= 2;
+ if (string->buflen + length + 1 > string->bufmax)
+ string->bufmax = string->buflen + length + 1;
+ string->buffer = xrealloc (string->buffer, string->bufmax);
+ }
+ memcpy (string->buffer + string->buflen, to_append, length);
+ string->buffer[length] = '\0';
+ string->buflen = length;
+}
+
+static inline void
+string_blank (markup_string_ty *string)
+{
+ if (string->bufmax > 0)
+ {
+ *string->buffer = '\0';
+ string->buflen = 0;
+ }
+}
+
+/* Creates a new parse context. A parse context is used to parse
+ marked-up documents. You can feed any number of documents into a
+ context, as long as no errors occur; once an error occurs, the
+ parse context can't continue to parse text (you have to free it and
+ create a new parse context). */
+markup_parse_context_ty *
+markup_parse_context_new (const markup_parser_ty *parser,
+ markup_parse_flags_ty flags,
+ void *user_data)
+{
+ markup_parse_context_ty *context;
+
+ assert (parser != NULL);
+
+ context = XMALLOC (markup_parse_context_ty);
+
+ context->parser = parser;
+ context->flags = flags;
+ context->user_data = user_data;
+
+ context->line_number = 1;
+ context->char_number = 1;
+
+ context->partial_chunk = NULL;
+
+ context->state = STATE_START;
+ context->tag_stack =
+ gl_list_create_empty (GL_LINKED_LIST,
+ NULL, NULL,
+ (gl_listelement_dispose_fn) markup_string_free1,
+ true);
+ context->attr_names = NULL;
+ context->attr_values = NULL;
+ context->cur_attr = -1;
+ context->alloc_attrs = 0;
+
+ context->current_text = NULL;
+ context->current_text_len = -1;
+ context->current_text_end = NULL;
+
+ context->start = NULL;
+ context->iter = NULL;
+
+ context->error_text = NULL;
+
+ context->document_empty = true;
+ context->parsing = false;
+
+ context->awaiting_pop = false;
+ context->subparser_stack =
+ gl_list_create_empty (GL_LINKED_LIST,
+ NULL, NULL,
+ (gl_listelement_dispose_fn) free,
+ true);
+ context->subparser_element = NULL;
+
+ context->balance = 0;
+
+ return context;
+}
+
+static void clear_attributes (markup_parse_context_ty *context);
+
+/* Frees a parse context. This function can't be called from inside
+ one of the markup_parser_ty functions or while a subparser is
+ pushed. */
+void
+markup_parse_context_free (markup_parse_context_ty *context)
+{
+ assert (context != NULL);
+ assert (!context->parsing);
+ assert (gl_list_size (context->subparser_stack) == 0);
+ assert (!context->awaiting_pop);
+
+ clear_attributes (context);
+ free (context->attr_names);
+ free (context->attr_values);
+
+ gl_list_free (context->tag_stack);
+ gl_list_free (context->subparser_stack);
+
+ if (context->partial_chunk)
+ markup_string_free (context->partial_chunk, true);
+
+ free (context->error_text);
+
+ free (context);
+}
+
+static void pop_subparser_stack (markup_parse_context_ty *context);
+
+static void
+emit_error (markup_parse_context_ty *context, const char *error_text)
+{
+ context->state = STATE_ERROR;
+
+ if (context->parser->error)
+ (*context->parser->error) (context, error_text, context->user_data);
+
+ /* report the error all the way up to free all the user-data */
+ while (gl_list_size (context->subparser_stack) > 0)
+ {
+ pop_subparser_stack (context);
+ context->awaiting_pop = false; /* already been freed */
+
+ if (context->parser->error)
+ (*context->parser->error) (context, error_text, context->user_data);
+ }
+
+ if (context->error_text)
+ free (context->error_text);
+ context->error_text = xstrdup (error_text);
+}
+
+#define IS_COMMON_NAME_END_CHAR(c) \
+ ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
+
+static bool
+slow_name_validate (markup_parse_context_ty *context, const char *name)
+{
+ const char *p = name;
+ ucs4_t uc;
+
+ if (u8_check ((uint8_t *) name, strlen (name)) != NULL)
+ {
+ emit_error (context, _("invalid UTF-8 sequence"));
+ return false;
+ }
+
+ if (!(c_isalpha (*p)
+ || (!IS_COMMON_NAME_END_CHAR (*p)
+ && (*p == '_'
+ || *p == ':'
+ || (u8_mbtouc (&uc, (uint8_t *) name, strlen (name)) > 0
+ && uc_is_alpha (uc))))))
+ {
+ char *error_text = xasprintf (_("'%s' is not a valid name"), name);
+ emit_error (context, error_text);
+ free (error_text);
+ return false;
+ }
+
+ for (p = (char *) u8_next (&uc, (uint8_t *) name);
+ p != NULL;
+ p = (char *) u8_next (&uc, (uint8_t *) p))
+ {
+ /* is_name_char */
+ if (!(c_isalnum (*p) ||
+ (!IS_COMMON_NAME_END_CHAR (*p) &&
+ (*p == '.' ||
+ *p == '-' ||
+ *p == '_' ||
+ *p == ':' ||
+ uc_is_alpha (uc)))))
+ {
+ char *error_text = xasprintf (_("'%s' is not a valid name: '%c'"),
+ name, *p);
+ emit_error (context, error_text);
+ free (error_text);
+ return false;
+ }
+ }
+ return true;
+}
+
+/*
+ * Use me for elements, attributes etc.
+ */
+static bool
+name_validate (markup_parse_context_ty *context, const char *name)
+{
+ char mask;
+ const char *p;
+
+ /* name start char */
+ p = name;
+ if (IS_COMMON_NAME_END_CHAR (*p)
+ || !(c_isalpha (*p) || *p == '_' || *p == ':'))
+ goto slow_validate;
+
+ for (mask = *p++; *p != '\0'; p++)
+ {
+ mask |= *p;
+
+ /* is_name_char */
+ if (!(c_isalnum (*p)
+ || (!IS_COMMON_NAME_END_CHAR (*p)
+ && (*p == '.' || *p == '-' || *p == '_' || *p == ':'))))
+ goto slow_validate;
+ }
+
+ if (mask & 0x80) /* un-common / non-ascii */
+ goto slow_validate;
+
+ return true;
+
+ slow_validate:
+ return slow_name_validate (context, name);
+}
+
+static bool
+text_validate (markup_parse_context_ty *context,
+ const char *p,
+ int len)
+{
+ if (u8_check ((const uint8_t *) p, len) != NULL)
+ {
+ emit_error (context, _("invalid UTF-8 sequence"));
+ return false;
+ }
+ else
+ return true;
+}
+
+/*
+ * re-write the GString in-place, unescaping anything that escaped.
+ * most XML does not contain entities, or escaping.
+ */
+static bool
+unescape_string_inplace (markup_parse_context_ty *context,
+ markup_string_ty *string,
+ bool *is_ascii)
+{
+ char mask, *to;
+ const char *from;
+ bool normalize_attribute;
+
+ if (string->buflen == 0)
+ return true;
+
+ *is_ascii = false;
+
+ /* are we unescaping an attribute or not ? */
+ if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ
+ || context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
+ normalize_attribute = true;
+ else
+ normalize_attribute = false;
+
+ /*
+ * Meeks' theorem: unescaping can only shrink text.
+ * for &lt; etc. this is obvious, for &#xffff; more
+ * thought is required, but this is patently so.
+ */
+ mask = 0;
+ for (from = to = string->buffer; *from != '\0'; from++, to++)
+ {
+ *to = *from;
+
+ mask |= *to;
+ if (normalize_attribute && (*to == '\t' || *to == '\n'))
+ *to = ' ';
+ if (*to == '\r')
+ {
+ *to = normalize_attribute ? ' ' : '\n';
+ if (from[1] == '\n')
+ from++;
+ }
+ if (*from == '&')
+ {
+ from++;
+ if (*from == '#')
+ {
+ int base = 10;
+ unsigned long l;
+ char *end = NULL;
+
+ from++;
+
+ if (*from == 'x')
+ {
+ base = 16;
+ from++;
+ }
+
+ errno = 0;
+ l = strtoul (from, &end, base);
+
+ if (end == from || errno != 0)
+ {
+ emit_error (context,
+ _("out of range when resolving character ref"));
+ return false;
+ }
+ else if (*end != ';')
+ {
+ emit_error (context,
+ _("character reference does not end with a ';'"));
+ return false;
+ }
+ else
+ {
+ /* characters XML 1.1 permits */
+ if ((0 < l && l <= 0xD7FF) ||
+ (0xE000 <= l && l <= 0xFFFD) ||
+ (0x10000 <= l && l <= 0x10FFFF))
+ {
+ char buf[8];
+ int length;
+ length = u8_uctomb ((uint8_t *) buf, l, 8);
+ memcpy (to, buf, length);
+ to += length - 1;
+ from = end;
+ if (l >= 0x80) /* not ascii */
+ mask |= 0x80;
+ }
+ else
+ {
+ emit_error (context, _("invalid character reference"));
+ return false;
+ }
+ }
+ }
+
+ else if (strncmp (from, "lt;", 3) == 0)
+ {
+ *to = '<';
+ from += 2;
+ }
+ else if (strncmp (from, "gt;", 3) == 0)
+ {
+ *to = '>';
+ from += 2;
+ }
+ else if (strncmp (from, "amp;", 4) == 0)
+ {
+ *to = '&';
+ from += 3;
+ }
+ else if (strncmp (from, "quot;", 5) == 0)
+ {
+ *to = '"';
+ from += 4;
+ }
+ else if (strncmp (from, "apos;", 5) == 0)
+ {
+ *to = '\'';
+ from += 4;
+ }
+ else
+ {
+ if (*from == ';')
+ emit_error (context, _("empty entity '&;'"));
+ else
+ {
+ const char *end = strchr (from, ';');
+ if (end)
+ emit_error (context, _("unknown entity name"));
+ else
+ emit_error (context, _("entity does not end with a ';'"));
+ }
+ return false;
+ }
+ }
+ }
+
+ assert (to - string->buffer <= string->buflen);
+ if (to - string->buffer != string->buflen)
+ markup_string_truncate (string, to - string->buffer);
+
+ *is_ascii = !(mask & 0x80);
+
+ return true;
+}
+
+static inline bool
+advance_char (markup_parse_context_ty *context)
+{
+ context->iter++;
+ context->char_number++;
+
+ if (context->iter == context->current_text_end)
+ return false;
+
+ else if (*context->iter == '\n')
+ {
+ context->line_number++;
+ context->char_number = 1;
+ }
+
+ return true;
+}
+
+static inline bool
+xml_isspace (char c)
+{
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+}
+
+static void
+skip_spaces (markup_parse_context_ty *context)
+{
+ do
+ {
+ if (!xml_isspace (*context->iter))
+ return;
+ }
+ while (advance_char (context));
+}
+
+static void
+advance_to_name_end (markup_parse_context_ty *context)
+{
+ do
+ {
+ if (IS_COMMON_NAME_END_CHAR (*(context->iter)))
+ return;
+ if (xml_isspace (*(context->iter)))
+ return;
+ }
+ while (advance_char (context));
+}
+
+static void
+add_to_partial (markup_parse_context_ty *context,
+ const char *text_start,
+ const char *text_end)
+{
+ if (context->partial_chunk == NULL)
+ { /* allocate a new chunk to parse into */
+
+ context->partial_chunk = markup_string_new ();
+ }
+
+ if (text_start != text_end)
+ markup_string_append (context->partial_chunk,
+ text_start, text_end - text_start);
+}
+
+static inline void
+truncate_partial (markup_parse_context_ty *context)
+{
+ if (context->partial_chunk != NULL)
+ string_blank (context->partial_chunk);
+}
+
+static inline const char*
+current_element (markup_parse_context_ty *context)
+{
+ const markup_string_ty *string = gl_list_get_at (context->tag_stack, 0);
+ return string->buffer;
+}
+
+static void
+pop_subparser_stack (markup_parse_context_ty *context)
+{
+ markup_recursion_tracker_ty *tracker;
+
+ assert (gl_list_size (context->subparser_stack) > 0);
+
+ tracker = (markup_recursion_tracker_ty *) gl_list_get_at (context->subparser_stack, 0);
+
+ context->awaiting_pop = true;
+
+ context->user_data = tracker->prev_user_data;
+ context->parser = tracker->prev_parser;
+ context->subparser_element = tracker->prev_element;
+ free (tracker);
+
+ gl_list_remove_at (context->subparser_stack, 0);
+}
+
+static void
+push_partial_as_tag (markup_parse_context_ty *context)
+{
+ gl_list_add_first (context->tag_stack, context->partial_chunk);
+ context->partial_chunk = NULL;
+}
+
+static void
+pop_tag (markup_parse_context_ty *context)
+{
+ gl_list_remove_at (context->tag_stack, 0);
+}
+
+static void
+possibly_finish_subparser (markup_parse_context_ty *context)
+{
+ if (current_element (context) == context->subparser_element)
+ pop_subparser_stack (context);
+}
+
+static void
+ensure_no_outstanding_subparser (markup_parse_context_ty *context)
+{
+ context->awaiting_pop = false;
+}
+
+static void
+add_attribute (markup_parse_context_ty *context, markup_string_ty *string)
+{
+ if (context->cur_attr + 2 >= context->alloc_attrs)
+ {
+ context->alloc_attrs += 5; /* silly magic number */
+ context->attr_names = xrealloc (context->attr_names, sizeof (char *) * context->alloc_attrs);
+ context->attr_values = xrealloc (context->attr_values, sizeof(char *) * context->alloc_attrs);
+ }
+ context->cur_attr++;
+ context->attr_names[context->cur_attr] = xstrdup (string->buffer);
+ context->attr_values[context->cur_attr] = NULL;
+ context->attr_names[context->cur_attr+1] = NULL;
+ context->attr_values[context->cur_attr+1] = NULL;
+}
+
+static void
+clear_attributes (markup_parse_context_ty *context)
+{
+ /* Go ahead and free the attributes. */
+ for (; context->cur_attr >= 0; context->cur_attr--)
+ {
+ int pos = context->cur_attr;
+ free (context->attr_names[pos]);
+ free (context->attr_values[pos]);
+ context->attr_names[pos] = context->attr_values[pos] = NULL;
+ }
+ assert (context->cur_attr == -1);
+ assert (context->attr_names == NULL ||
+ context->attr_names[0] == NULL);
+ assert (context->attr_values == NULL ||
+ context->attr_values[0] == NULL);
+}
+
+static void
+markup_parse_context_push (markup_parse_context_ty *context,
+ const markup_parser_ty *parser,
+ void *user_data)
+{
+ markup_recursion_tracker_ty *tracker;
+
+ tracker = XMALLOC (markup_recursion_tracker_ty);
+ tracker->prev_element = context->subparser_element;
+ tracker->prev_parser = context->parser;
+ tracker->prev_user_data = context->user_data;
+
+ context->subparser_element = current_element (context);
+ context->parser = parser;
+ context->user_data = user_data;
+
+ gl_list_add_first (context->subparser_stack, tracker);
+}
+
+static void
+markup_parse_context_pop (markup_parse_context_ty *context)
+{
+ if (!context->awaiting_pop)
+ possibly_finish_subparser (context);
+
+ assert (context->awaiting_pop);
+
+ context->awaiting_pop = false;
+}
+
+/* This has to be a separate function to ensure the alloca's
+ * are unwound on exit - otherwise we grow & blow the stack
+ * with large documents
+ */
+static inline void
+emit_start_element (markup_parse_context_ty *context)
+{
+ int i, j = 0;
+ const char *start_name;
+ const char **attr_names;
+ const char **attr_values;
+
+ /* In case we want to ignore qualified tags and we see that we have
+ * one here, we push a subparser. This will ignore all tags inside of
+ * the qualified tag.
+ *
+ * We deal with the end of the subparser from emit_end_element.
+ */
+ if ((context->flags & MARKUP_IGNORE_QUALIFIED)
+ && strchr (current_element (context), ':'))
+ {
+ static const markup_parser_ty ignore_parser;
+ markup_parse_context_push (context, &ignore_parser, NULL);
+ clear_attributes (context);
+ return;
+ }
+
+ attr_names = XCALLOC (context->cur_attr + 2, const char *);
+ attr_values = XCALLOC (context->cur_attr + 2, const char *);
+ for (i = 0; i < context->cur_attr + 1; i++)
+ {
+ /* Possibly omit qualified attribute names from the list */
+ if ((context->flags & MARKUP_IGNORE_QUALIFIED)
+ && strchr (context->attr_names[i], ':'))
+ continue;
+
+ attr_names[j] = context->attr_names[i];
+ attr_values[j] = context->attr_values[i];
+ j++;
+ }
+ attr_names[j] = NULL;
+ attr_values[j] = NULL;
+
+ /* Call user callback for element start */
+ start_name = current_element (context);
+
+ if (context->parser->start_element && name_validate (context, start_name))
+ (* context->parser->start_element) (context,
+ start_name,
+ (const char **)attr_names,
+ (const char **)attr_values,
+ context->user_data);
+ free (attr_names);
+ free (attr_values);
+ clear_attributes (context);
+}
+
+static void
+emit_end_element (markup_parse_context_ty *context)
+{
+ assert (gl_list_size (context->tag_stack) != 0);
+
+ possibly_finish_subparser (context);
+
+ /* We might have just returned from our ignore subparser */
+ if ((context->flags & MARKUP_IGNORE_QUALIFIED)
+ && strchr (current_element (context), ':'))
+ {
+ markup_parse_context_pop (context);
+ pop_tag (context);
+ return;
+ }
+
+ if (context->parser->end_element)
+ (* context->parser->end_element) (context,
+ current_element (context),
+ context->user_data);
+
+ ensure_no_outstanding_subparser (context);
+
+ pop_tag (context);
+}
+
+/* Feed some data to the parse context. The data need not be valid
+ UTF-8; an error will be signaled if it's invalid. The data need
+ not be an entire document; you can feed a document into the parser
+ incrementally, via multiple calls to this function. Typically, as
+ you receive data from a network connection or file, you feed each
+ received chunk of data into this function, aborting the process if
+ an error occurs. Once an error is reported, no further data may be
+ fed to the parse context; all errors are fatal. */
+bool
+markup_parse_context_parse (markup_parse_context_ty *context,
+ const char *text,
+ ssize_t text_len)
+{
+ assert (context != NULL);
+ assert (text != NULL);
+ assert (context->state != STATE_ERROR);
+ assert (!context->parsing);
+
+ if (text_len < 0)
+ text_len = strlen (text);
+
+ if (text_len == 0)
+ return true;
+
+ context->parsing = true;
+
+
+ context->current_text = text;
+ context->current_text_len = text_len;
+ context->current_text_end = context->current_text + text_len;
+ context->iter = context->current_text;
+ context->start = context->iter;
+
+ while (context->iter != context->current_text_end)
+ {
+ switch (context->state)
+ {
+ case STATE_START:
+ /* Possible next state: AFTER_OPEN_ANGLE */
+
+ assert (gl_list_size (context->tag_stack) == 0);
+
+ /* whitespace is ignored outside of any elements */
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ if (*context->iter == '<')
+ {
+ /* Move after the open angle */
+ advance_char (context);
+
+ context->state = STATE_AFTER_OPEN_ANGLE;
+
+ /* this could start a passthrough */
+ context->start = context->iter;
+
+ /* document is now non-empty */
+ context->document_empty = false;
+ }
+ else
+ {
+ emit_error (context,
+ _("document must begin with an element"));
+ }
+ }
+ break;
+
+ case STATE_AFTER_OPEN_ANGLE:
+ /* Possible next states: INSIDE_OPEN_TAG_NAME,
+ * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
+ */
+ if (*context->iter == '?' ||
+ *context->iter == '!')
+ {
+ /* include < in the passthrough */
+ const char *openangle = "<";
+ add_to_partial (context, openangle, openangle + 1);
+ context->start = context->iter;
+ context->balance = 1;
+ context->state = STATE_INSIDE_PASSTHROUGH;
+ }
+ else if (*context->iter == '/')
+ {
+ /* move after it */
+ advance_char (context);
+
+ context->state = STATE_AFTER_CLOSE_TAG_SLASH;
+ }
+ else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
+ {
+ context->state = STATE_INSIDE_OPEN_TAG_NAME;
+
+ /* start of tag name */
+ context->start = context->iter;
+ }
+ else
+ {
+ emit_error (context, _("invalid character after '<'"));
+ }
+ break;
+
+ /* The AFTER_CLOSE_ANGLE state is actually sort of
+ * broken, because it doesn't correspond to a range
+ * of characters in the input stream as the others do,
+ * and thus makes things harder to conceptualize
+ */
+ case STATE_AFTER_CLOSE_ANGLE:
+ /* Possible next states: INSIDE_TEXT, STATE_START */
+ if (gl_list_size (context->tag_stack) == 0)
+ {
+ context->start = NULL;
+ context->state = STATE_START;
+ }
+ else
+ {
+ context->start = context->iter;
+ context->state = STATE_INSIDE_TEXT;
+ }
+ break;
+
+ case STATE_AFTER_ELISION_SLASH:
+ /* Possible next state: AFTER_CLOSE_ANGLE */
+ if (*context->iter == '>')
+ {
+ /* move after the close angle */
+ advance_char (context);
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ emit_end_element (context);
+ }
+ else
+ {
+ emit_error (context, _("missing '>'"));
+ }
+ break;
+
+ case STATE_INSIDE_OPEN_TAG_NAME:
+ /* Possible next states: BETWEEN_ATTRIBUTES */
+
+ /* if there's a partial chunk then it's the first part of the
+ * tag name. If there's a context->start then it's the start
+ * of the tag name in current_text, the partial chunk goes
+ * before that start though.
+ */
+ advance_to_name_end (context);
+
+ if (context->iter == context->current_text_end)
+ {
+ /* The name hasn't necessarily ended. Merge with
+ * partial chunk, leave state unchanged.
+ */
+ add_to_partial (context, context->start, context->iter);
+ }
+ else
+ {
+ /* The name has ended. Combine it with the partial chunk
+ * if any; push it on the stack; enter next state.
+ */
+ add_to_partial (context, context->start, context->iter);
+ push_partial_as_tag (context);
+
+ context->state = STATE_BETWEEN_ATTRIBUTES;
+ context->start = NULL;
+ }
+ break;
+
+ case STATE_INSIDE_ATTRIBUTE_NAME:
+ /* Possible next states: AFTER_ATTRIBUTE_NAME */
+
+ advance_to_name_end (context);
+ add_to_partial (context, context->start, context->iter);
+
+ /* read the full name, if we enter the equals sign state
+ * then add the attribute to the list (without the value),
+ * otherwise store a partial chunk to be prepended later.
+ */
+ if (context->iter != context->current_text_end)
+ context->state = STATE_AFTER_ATTRIBUTE_NAME;
+ break;
+
+ case STATE_AFTER_ATTRIBUTE_NAME:
+ /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
+
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ /* The name has ended. Combine it with the partial chunk
+ * if any; push it on the stack; enter next state.
+ */
+ if (!name_validate (context, context->partial_chunk->buffer))
+ break;
+
+ add_attribute (context, context->partial_chunk);
+
+ markup_string_free (context->partial_chunk, true);
+ context->partial_chunk = NULL;
+ context->start = NULL;
+
+ if (*context->iter == '=')
+ {
+ advance_char (context);
+ context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
+ }
+ else
+ {
+ emit_error (context, _("missing '='"));
+ }
+ }
+ break;
+
+ case STATE_BETWEEN_ATTRIBUTES:
+ /* Possible next states: AFTER_CLOSE_ANGLE,
+ * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
+ */
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ if (*context->iter == '/')
+ {
+ advance_char (context);
+ context->state = STATE_AFTER_ELISION_SLASH;
+ }
+ else if (*context->iter == '>')
+ {
+ advance_char (context);
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ }
+ else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
+ {
+ context->state = STATE_INSIDE_ATTRIBUTE_NAME;
+ /* start of attribute name */
+ context->start = context->iter;
+ }
+ else
+ {
+ emit_error (context, _("missing '>' or '/'"));
+ }
+
+ /* If we're done with attributes, invoke
+ * the start_element callback
+ */
+ if (context->state == STATE_AFTER_ELISION_SLASH ||
+ context->state == STATE_AFTER_CLOSE_ANGLE)
+ emit_start_element (context);
+ }
+ break;
+
+ case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
+ /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
+
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ if (*context->iter == '"')
+ {
+ advance_char (context);
+ context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
+ context->start = context->iter;
+ }
+ else if (*context->iter == '\'')
+ {
+ advance_char (context);
+ context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
+ context->start = context->iter;
+ }
+ else
+ {
+ emit_error (context, _("missing opening quote"));
+ }
+ }
+ break;
+
+ case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
+ case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
+ /* Possible next states: BETWEEN_ATTRIBUTES */
+ {
+ char delim;
+
+ if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
+ {
+ delim = '\'';
+ }
+ else
+ {
+ delim = '"';
+ }
+
+ do
+ {
+ if (*context->iter == delim)
+ break;
+ }
+ while (advance_char (context));
+ }
+ if (context->iter == context->current_text_end)
+ {
+ /* The value hasn't necessarily ended. Merge with
+ * partial chunk, leave state unchanged.
+ */
+ add_to_partial (context, context->start, context->iter);
+ }
+ else
+ {
+ bool is_ascii;
+ /* The value has ended at the quote mark. Combine it
+ * with the partial chunk if any; set it for the current
+ * attribute.
+ */
+ add_to_partial (context, context->start, context->iter);
+
+ assert (context->cur_attr >= 0);
+
+ if (unescape_string_inplace (context, context->partial_chunk,
+ &is_ascii)
+ && (is_ascii
+ || text_validate (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen)))
+ {
+ /* success, advance past quote and set state. */
+ context->attr_values[context->cur_attr] =
+ markup_string_free (context->partial_chunk, false);
+ context->partial_chunk = NULL;
+ advance_char (context);
+ context->state = STATE_BETWEEN_ATTRIBUTES;
+ context->start = NULL;
+ }
+
+ truncate_partial (context);
+ }
+ break;
+
+ case STATE_INSIDE_TEXT:
+ /* Possible next states: AFTER_OPEN_ANGLE */
+ do
+ {
+ if (*context->iter == '<')
+ break;
+ }
+ while (advance_char (context));
+
+ /* The text hasn't necessarily ended. Merge with
+ * partial chunk, leave state unchanged.
+ */
+
+ add_to_partial (context, context->start, context->iter);
+
+ if (context->iter != context->current_text_end)
+ {
+ bool is_ascii;
+
+ /* The text has ended at the open angle. Call the text
+ * callback.
+ */
+ if (unescape_string_inplace (context, context->partial_chunk,
+ &is_ascii)
+ && (is_ascii
+ || text_validate (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen)))
+ {
+ if (context->parser->text)
+ (*context->parser->text) (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen,
+ context->user_data);
+
+ /* advance past open angle and set state. */
+ advance_char (context);
+ context->state = STATE_AFTER_OPEN_ANGLE;
+ /* could begin a passthrough */
+ context->start = context->iter;
+ }
+
+ truncate_partial (context);
+ }
+ break;
+
+ case STATE_AFTER_CLOSE_TAG_SLASH:
+ /* Possible next state: INSIDE_CLOSE_TAG_NAME */
+ if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
+ {
+ context->state = STATE_INSIDE_CLOSE_TAG_NAME;
+
+ /* start of tag name */
+ context->start = context->iter;
+ }
+ else
+ {
+ emit_error (context, _("invalid character after '</'"));
+ }
+ break;
+
+ case STATE_INSIDE_CLOSE_TAG_NAME:
+ /* Possible next state: AFTER_CLOSE_TAG_NAME */
+ advance_to_name_end (context);
+ add_to_partial (context, context->start, context->iter);
+
+ if (context->iter != context->current_text_end)
+ context->state = STATE_AFTER_CLOSE_TAG_NAME;
+ break;
+
+ case STATE_AFTER_CLOSE_TAG_NAME:
+ /* Possible next state: AFTER_CLOSE_TAG_SLASH */
+
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ markup_string_ty *close_name;
+
+ close_name = context->partial_chunk;
+ context->partial_chunk = NULL;
+
+ if (*context->iter != '>')
+ {
+ emit_error (context,
+ _("invalid character after a close element name"));
+ }
+ else if (gl_list_size (context->tag_stack) == 0)
+ {
+ emit_error (context, _("element is closed"));
+ }
+ else if (strcmp (close_name->buffer, current_element (context)) != 0)
+ {
+ emit_error (context, _("element is closed"));
+ }
+ else
+ {
+ advance_char (context);
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ context->start = NULL;
+
+ emit_end_element (context);
+ }
+ context->partial_chunk = close_name;
+ truncate_partial (context);
+ }
+ break;
+
+ case STATE_INSIDE_PASSTHROUGH:
+ /* Possible next state: AFTER_CLOSE_ANGLE */
+ do
+ {
+ if (*context->iter == '<')
+ context->balance++;
+ if (*context->iter == '>')
+ {
+ char *str;
+ size_t len;
+
+ context->balance--;
+ add_to_partial (context, context->start, context->iter);
+ context->start = context->iter;
+
+ str = context->partial_chunk->buffer;
+ len = context->partial_chunk->buflen;
+
+ if (str[1] == '?' && str[len - 1] == '?')
+ break;
+ if (strncmp (str, "<!--", 4) == 0 &&
+ strcmp (str + len - 2, "--") == 0)
+ break;
+ if (strncmp (str, "<![CDATA[", 9) == 0 &&
+ strcmp (str + len - 2, "]]") == 0)
+ break;
+ if (strncmp (str, "<!DOCTYPE", 9) == 0 &&
+ context->balance == 0)
+ break;
+ }
+ }
+ while (advance_char (context));
+
+ if (context->iter == context->current_text_end)
+ {
+ /* The passthrough hasn't necessarily ended. Merge with
+ * partial chunk, leave state unchanged.
+ */
+ add_to_partial (context, context->start, context->iter);
+ }
+ else
+ {
+ /* The passthrough has ended at the close angle. Combine
+ * it with the partial chunk if any. Call the passthrough
+ * callback. Note that the open/close angles are
+ * included in the text of the passthrough.
+ */
+ advance_char (context); /* advance past close angle */
+ add_to_partial (context, context->start, context->iter);
+
+ if (context->flags & MARKUP_TREAT_CDATA_AS_TEXT &&
+ strncmp (context->partial_chunk->buffer, "<![CDATA[", 9) == 0)
+ {
+ if (context->parser->text &&
+ text_validate (context,
+ context->partial_chunk->buffer + 9,
+ context->partial_chunk->buflen - 12))
+ (*context->parser->text) (context,
+ context->partial_chunk->buffer + 9,
+ context->partial_chunk->buflen - 12,
+ context->user_data);
+ }
+ else if (context->parser->passthrough &&
+ text_validate (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen))
+ (*context->parser->passthrough) (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen,
+ context->user_data);
+
+ truncate_partial (context);
+
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ context->start = context->iter; /* could begin text */
+ }
+ break;
+
+ case STATE_ERROR:
+ goto finished;
+ break;
+
+ default:
+ abort ();
+ break;
+ }
+ }
+
+ finished:
+ context->parsing = false;
+
+ return context->state != STATE_ERROR;
+}
+
+/* Signals to the parse context that all data has been fed into the
+ * parse context with markup_parse_context_parse.
+ *
+ * This function reports an error if the document isn't complete,
+ * for example if elements are still open. */
+bool
+markup_parse_context_end_parse (markup_parse_context_ty *context)
+{
+ assert (context != NULL);
+ assert (!context->parsing);
+ assert (context->state != STATE_ERROR);
+
+ if (context->partial_chunk != NULL)
+ {
+ markup_string_free (context->partial_chunk, true);
+ context->partial_chunk = NULL;
+ }
+
+ if (context->document_empty)
+ {
+ emit_error (context, _("empty document"));
+ return false;
+ }
+
+ context->parsing = true;
+
+ switch (context->state)
+ {
+ case STATE_START:
+ /* Nothing to do */
+ break;
+
+ case STATE_AFTER_OPEN_ANGLE:
+ emit_error (context,
+ _("document ended unexpectedly just after '<'"));
+ break;
+
+ case STATE_AFTER_CLOSE_ANGLE:
+ if (gl_list_size (context->tag_stack) > 0)
+ {
+ /* Error message the same as for INSIDE_TEXT */
+ emit_error (context,
+ _("document ended unexpectedly with elements still open"));
+ }
+ break;
+
+ case STATE_AFTER_ELISION_SLASH:
+ emit_error (context, _("document ended unexpectedly without '>'"));
+ break;
+
+ case STATE_INSIDE_OPEN_TAG_NAME:
+ emit_error (context,
+ _("document ended unexpectedly inside an element name"));
+ break;
+
+ case STATE_INSIDE_ATTRIBUTE_NAME:
+ case STATE_AFTER_ATTRIBUTE_NAME:
+ emit_error (context,
+ _("document ended unexpectedly inside an attribute name"));
+ break;
+
+ case STATE_BETWEEN_ATTRIBUTES:
+ emit_error (context,
+ _("document ended unexpectedly inside an open tag"));
+ break;
+
+ case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
+ emit_error (context, _("document ended unexpectedly after '='"));
+ break;
+
+ case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
+ case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
+ emit_error (context,
+ _("document ended unexpectedly inside an attribute value"));
+ break;
+
+ case STATE_INSIDE_TEXT:
+ assert (gl_list_size (context->tag_stack) > 0);
+ emit_error (context,
+ _("document ended unexpectedly with elements still open"));
+ break;
+
+ case STATE_AFTER_CLOSE_TAG_SLASH:
+ case STATE_INSIDE_CLOSE_TAG_NAME:
+ case STATE_AFTER_CLOSE_TAG_NAME:
+ emit_error (context,
+ _("document ended unexpectedly inside the close tag"));
+ break;
+
+ case STATE_INSIDE_PASSTHROUGH:
+ emit_error (context,
+ _("document ended unexpectedly inside a comment or "
+ "processing instruction"));
+ break;
+
+ case STATE_ERROR:
+ default:
+ abort ();
+ break;
+ }
+
+ context->parsing = false;
+
+ return context->state != STATE_ERROR;
+}
+
+const char *
+markup_parse_context_get_error (markup_parse_context_ty *context)
+{
+ return context->error_text;
+}
diff --git a/gnulib-local/lib/markup.h b/gnulib-local/lib/markup.h
new file mode 100644
index 0000000..61e5b0e
--- /dev/null
+++ b/gnulib-local/lib/markup.h
@@ -0,0 +1,164 @@
+/* markup.h -- simple XML-like string parser
+ Copyright (C) 2015 Free Software Foundation, Inc.
+
+ This file is not part of the GNU gettext program, but is used with
+ GNU gettext.
+
+ This is a stripped down version of GLib's gmarkup.h. The original
+ copyright notice is as follows:
+ */
+
+/* gmarkup.h - Simple XML-like string parser/writer
+ *
+ * Copyright 2000 Red Hat, Inc.
+ *
+ * GLib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * GLib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with GLib; see the file COPYING.LIB. If not,
+ * see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MARKUP_H__
+#define __MARKUP_H__ 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/types.h>
+
+/**
+ * markup_parse_flags_ty:
+ * @MARKUP_DO_NOT_USE_THIS_UNSUPPORTED_FLAG: flag you should not use
+ * @MARKUP_TREAT_CDATA_AS_TEXT: When this flag is set, CDATA marked
+ * sections are not passed literally to the @passthrough function of
+ * the parser. Instead, the content of the section (without the
+ * `<![CDATA[` and `]]>`) is
+ * passed to the @text function. This flag was added in GLib 2.12
+ * @MARKUP_PREFIX_ERROR_POSITION: Normally errors caught by GMarkup
+ * itself have line/column information prefixed to them to let the
+ * caller know the location of the error. When this flag is set the
+ * location information is also prefixed to errors generated by the
+ * #GMarkupParser implementation functions
+ * @MARKUP_IGNORE_QUALIFIED: Ignore (don't report) qualified
+ * attributes and tags, along with their contents. A qualified
+ * attribute or tag is one that contains ':' in its name (ie: is in
+ * another namespace). Since: 2.40.
+ *
+ * Flags that affect the behaviour of the parser.
+ */
+typedef enum
+ {
+ MARKUP_DO_NOT_USE_THIS_UNSUPPORTED_FLAG = 1 << 0,
+ MARKUP_TREAT_CDATA_AS_TEXT = 1 << 1,
+ MARKUP_PREFIX_ERROR_POSITION = 1 << 2,
+ MARKUP_IGNORE_QUALIFIED = 1 << 3
+ } markup_parse_flags_ty;
+
+/**
+ * markup_parse_context_ty:
+ *
+ * A parse context is used to parse a stream of bytes that
+ * you expect to contain marked-up text.
+ *
+ * See markup_parse_context_new(), #markup_parser_ty, and so
+ * on for more details.
+ */
+typedef struct _markup_parse_context_ty markup_parse_context_ty;
+typedef struct _markup_parser_ty markup_parser_ty;
+
+/**
+ * markup_parser_ty:
+ * @start_element: Callback to invoke when the opening tag of an element
+ * is seen. The callback's @attribute_names and @attribute_values parameters
+ * are %NULL-terminated.
+ * @end_element: Callback to invoke when the closing tag of an element
+ * is seen. Note that this is also called for empty tags like
+ * `<empty/>`.
+ * @text: Callback to invoke when some text is seen (text is always
+ * inside an element). Note that the text of an element may be spread
+ * over multiple calls of this function. If the
+ * %MARKUP_TREAT_CDATA_AS_TEXT flag is set, this function is also
+ * called for the content of CDATA marked sections.
+ * @passthrough: Callback to invoke for comments, processing instructions
+ * and doctype declarations; if you're re-writing the parsed document,
+ * write the passthrough text back out in the same position. If the
+ * %MARKUP_TREAT_CDATA_AS_TEXT flag is not set, this function is also
+ * called for CDATA marked sections.
+ * @error: Callback to invoke when an error occurs.
+ *
+ * Any of the fields in #markup_parser_ty can be %NULL, in which case they
+ * will be ignored. Except for the @error function, any of these callbacks
+ * can set an error; in particular the %MARKUP_ERROR_UNKNOWN_ELEMENT,
+ * %MARKUP_ERROR_UNKNOWN_ATTRIBUTE, and %MARKUP_ERROR_INVALID_CONTENT
+ * errors are intended to be set from these callbacks. If you set an error
+ * from a callback, markup_parse_context_parse() will report that error
+ * back to its caller.
+ */
+struct _markup_parser_ty
+{
+ /* Called for open tags <foo bar="baz"> */
+ bool (*start_element) (markup_parse_context_ty *context,
+ const char *element_name,
+ const char **attribute_names,
+ const char **attribute_values,
+ void *user_data);
+
+ /* Called for close tags </foo> */
+ bool (*end_element) (markup_parse_context_ty *context,
+ const char *element_name,
+ void *user_data);
+
+ /* Called for character data */
+ /* text is not nul-terminated */
+ bool (*text) (markup_parse_context_ty *context,
+ const char *text,
+ size_t text_len,
+ void *user_data);
+
+ /* Called for strings that should be re-saved verbatim in this same
+ * position, but are not otherwise interpretable. At the moment
+ * this includes comments and processing instructions.
+ */
+ /* text is not nul-terminated. */
+ bool (*passthrough) (markup_parse_context_ty *context,
+ const char *passthrough_text,
+ size_t text_len,
+ void *user_data);
+
+ /* Called on error, including one set by other
+ * methods in the vtable. The GError should not be freed.
+ */
+ void (*error) (markup_parse_context_ty *context,
+ const char *error_text,
+ void *user_data);
+};
+
+extern markup_parse_context_ty *
+ markup_parse_context_new (const markup_parser_ty *parser,
+ markup_parse_flags_ty flags,
+ void *user_data);
+extern void markup_parse_context_free (markup_parse_context_ty *context);
+extern bool markup_parse_context_parse (markup_parse_context_ty *context,
+ const char *text,
+ ssize_t text_len);
+extern bool markup_parse_context_end_parse (markup_parse_context_ty *context);
+extern const char *
+ markup_parse_context_get_error (markup_parse_context_ty *context);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MARKUP_H__ */
diff --git a/gnulib-local/modules/markup b/gnulib-local/modules/markup
new file mode 100644
index 0000000..8d969d4
--- /dev/null
+++ b/gnulib-local/modules/markup
@@ -0,0 +1,31 @@
+Description:
+Simple XML-like parser
+
+Files:
+lib/markup.h
+lib/markup.c
+
+Depends-on:
+c-ctype
+linked-list
+unistr/u8-mbtouc
+unistr/u8-next
+unictype/ctype-alpha
+xalloc
+xlist
+xvasprintf
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += markup.h markup.c
+
+Include:
+"markup.h"
+
+License:
+LGPL
+
+Maintainer:
+Daiki Ueno
+