diff options
author | Daiki Ueno <ueno@gnu.org> | 2015-12-09 17:35:34 +0900 |
---|---|---|
committer | Daiki Ueno <ueno@gnu.org> | 2015-12-09 19:07:06 +0900 |
commit | 898e184a596c43abf1067089a03df3e79b4e4527 (patch) | |
tree | e9f5596bb75f8a0ba47f9b34d26346f53d981613 | |
parent | f6dde6baeef8e6cb5ec92bc6c67c5c0304ba4396 (diff) | |
download | external_gettext-898e184a596c43abf1067089a03df3e79b4e4527.zip external_gettext-898e184a596c43abf1067089a03df3e79b4e4527.tar.gz external_gettext-898e184a596c43abf1067089a03df3e79b4e4527.tar.bz2 |
build: Remove expat dependency
* DEPENDENCIES: Suggest libxml2 instead of expat.
* gnulib-local/lib/markup.c: New file.
* gnulib-local/lib/markup.h: New file.
* gnulib-local/modules/markup: New file.
* autogen.sh (GNULIB_MODULES_LIBGETTEXTPO): Add markup module.
* gettext-tools/configure.ac: Remove checks for expat.
* gettext-tools/gnulib-lib/.gitignore: Ignore modules pulled by
gnulib-tool due to the markup module usage.
* gettext-tools/gnulib-tests/.gitignore: Likewise.
* gettext-tools/libgettextpo/.gitignore: Likewise.
* gettext-tools/libgettextpo/Makefile.am (libgettextpo_la_AUXSOURCES):
Remove ../src/libexpat-compat.c.
(libgettextpo_la_LDFLAGS): Remove @LTLIBEXPAT@.
* gettext-tools/src/Makefile.am (noinst_HEADERS): Remove
libexpat-compat.h.
(libgettextsrc_la_SOURCES): Remove libexpat-compat.c.
(libgettextsrc_la_LDFLAGS): Remove @LTLIBEXPAT@.
* gettext-tools/src/format-kde-kuit.c: Use functions from markup.h, when
the file is being compiled as part of libgettextpo. Otherwise use
libxml2.
* gettext-tools/src/libexpat-compat.c: Remove.
* gettext-tools/src/libexpat-compat.h: Remove.
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | DEPENDENCIES | 10 | ||||
-rwxr-xr-x | autogen.sh | 1 | ||||
-rw-r--r-- | gettext-tools/configure.ac | 19 | ||||
-rw-r--r-- | gettext-tools/gnulib-lib/.gitignore | 92 | ||||
-rw-r--r-- | gettext-tools/gnulib-tests/.gitignore | 1 | ||||
-rw-r--r-- | gettext-tools/libgettextpo/.gitignore | 56 | ||||
-rw-r--r-- | gettext-tools/libgettextpo/Makefile.am | 9 | ||||
-rw-r--r-- | gettext-tools/src/Makefile.am | 6 | ||||
-rw-r--r-- | gettext-tools/src/format-kde-kuit.c | 150 | ||||
-rw-r--r-- | gettext-tools/src/libexpat-compat.c | 327 | ||||
-rw-r--r-- | gettext-tools/src/libexpat-compat.h | 95 | ||||
-rw-r--r-- | gnulib-local/lib/markup.c | 1523 | ||||
-rw-r--r-- | gnulib-local/lib/markup.h | 164 | ||||
-rw-r--r-- | gnulib-local/modules/markup | 31 |
15 files changed, 1899 insertions, 586 deletions
@@ -43,6 +43,7 @@ Makefile /gettext-tools/examples/ChangeLog /gettext-tools/po/ChangeLog +/build-aux/ar-lib /build-aux/git-version-gen /build-aux/gitlog-to-changelog /build-aux/snippet diff --git a/DEPENDENCIES b/DEPENDENCIES index e11eb94..4a26690 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -28,15 +28,15 @@ The following packages should be installed before GNU gettext is installed + If it is installed in a nonstandard directory, pass the option --with-ncurses-prefix=DIR or --with-libtermcap-prefix to 'configure'. -* expat 1.95 or newer +* libxml + Recommended. - Needed for 'xgettext', so that it can parse Glade XML files. + Needed for 'xgettext' and 'msgfmt', so that it can parse XML files. + Homepage: - http://expat.sourceforge.net/ + http://xmlsoft.org/ + Download: - http://sourceforge.net/project/showfiles.php?group_id=10127 + ftp://xmlsoft.org/ + If it is installed in a nonstandard directory, pass the option - --with-libexpat-prefix to 'configure'. + --with-libxml2-prefix to 'configure'. * A Java runtime and compiler (e.g. GNU gcj or kaffe). + Recommended. @@ -342,6 +342,7 @@ if ! $skip_gnulib; then hash iconv libunistring-optional + markup minmax open ostream diff --git a/gettext-tools/configure.ac b/gettext-tools/configure.ac index fdf156e..dba0775 100644 --- a/gettext-tools/configure.ac +++ b/gettext-tools/configure.ac @@ -291,25 +291,6 @@ AH_BOTTOM([ #endif ]) -dnl Check for the expat XML parser. -dnl On operating systems where binary distribution vendors are likely to -dnl ship both gettext and expat, we use dynamic loading to avoid a hard -dnl dependency from gettext to expat. -case "$host_os" in - linux*) - AC_DEFINE([DYNLOAD_LIBEXPAT], [1], - [Define to 1 if libexpat shall be dynamically loaded via dlopen().]) - LIBEXPAT="-ldl" - LTLIBEXPAT="-ldl" - AC_SUBST([LIBEXPAT]) - AC_SUBST([LTLIBEXPAT]) - ;; - *) - AC_LIB_HAVE_LINKFLAGS([expat], [], - [#include <expat.h>], [XML_ExpatVersion();]) - ;; -esac - dnl Check for nm output filter that yields the exported symbols. gt_GLOBAL_SYMBOL_PIPE diff --git a/gettext-tools/gnulib-lib/.gitignore b/gettext-tools/gnulib-lib/.gitignore index 70ff6a8..5ff51b2 100644 --- a/gettext-tools/gnulib-lib/.gitignore +++ b/gettext-tools/gnulib-lib/.gitignore @@ -10,7 +10,14 @@ /uniwidth # Files brought in by gnulib-tool: + + + +# Files generated by gperf: +# Files generated by moopp: +# Files generated by the autotools: /Makefile.gnulib +/Makefile.in /acl-errno-valid.c /acl-internal.c /acl-internal.h @@ -90,13 +97,21 @@ /fcntl.in.h /fd-hook.c /fd-hook.h +/fd-ostream.c +/fd-ostream.h /fd-ostream.oo.c /fd-ostream.oo.h /fd-safer-flag.c /fd-safer.c +/fd_ostream.priv.h +/fd_ostream.vt.h /file-has-acl.c +/file-ostream.c +/file-ostream.h /file-ostream.oo.c /file-ostream.oo.h +/file_ostream.priv.h +/file_ostream.vt.h /filename.h /findprog.c /findprog.h @@ -132,6 +147,8 @@ /gl_anylinked_list2.h /gl_array_list.c /gl_array_list.h +/gl_linked_list.c +/gl_linked_list.h /gl_linkedhash_list.c /gl_linkedhash_list.h /gl_list.c @@ -142,18 +159,31 @@ /glibconfig.in.h /hash.c /hash.h +/html-ostream.c +/html-ostream.h /html-ostream.oo.c /html-ostream.oo.h +/html-styled-ostream.c +/html-styled-ostream.h /html-styled-ostream.oo.c /html-styled-ostream.oo.h +/html_ostream.priv.h +/html_ostream.vt.h +/html_styled_ostream.priv.h +/html_styled_ostream.vt.h /iconv.c /iconv.in.h /iconv_close.c /iconv_open-aix.gperf +/iconv_open-aix.h /iconv_open-hpux.gperf +/iconv_open-hpux.h /iconv_open-irix.gperf +/iconv_open-irix.h /iconv_open-osf.gperf +/iconv_open-osf.h /iconv_open-solaris.gperf +/iconv_open-solaris.h /iconv_open.c /iconveh.h /ignore-value.h @@ -180,6 +210,8 @@ /malloca.c /malloca.h /malloca.valgrind +/markup.c +/markup.h /mbchar.c /mbchar.h /mbiter.c @@ -210,8 +242,12 @@ /obstack.h /open.c /opendir.c +/ostream.c +/ostream.h /ostream.oo.c /ostream.oo.h +/ostream.priv.h +/ostream.vt.h /pathmax.h /pipe-filter-aux.c /pipe-filter-aux.h @@ -319,8 +355,12 @@ /strstr.c /strtol.c /strtoul.c +/styled-ostream.c +/styled-ostream.h /styled-ostream.oo.c /styled-ostream.oo.h +/styled_ostream.priv.h +/styled_ostream.vt.h /sys_select.in.h /sys_socket.in.h /sys_stat.in.h @@ -329,10 +369,18 @@ /sys_wait.in.h /tempname.c /tempname.h +/term-ostream.c +/term-ostream.h /term-ostream.oo.c /term-ostream.oo.h +/term-styled-ostream.c +/term-styled-ostream.h /term-styled-ostream.oo.c /term-styled-ostream.oo.h +/term_ostream.priv.h +/term_ostream.vt.h +/term_styled_ostream.priv.h +/term_styled_ostream.vt.h /terminfo.h /time.in.h /tmpdir.c @@ -389,47 +437,3 @@ /xstriconveh.h /xvasprintf.c /xvasprintf.h - -# Files generated by the autotools: -/Makefile.in - -# Files generated by gperf: -/iconv_open-aix.h -/iconv_open-hpux.h -/iconv_open-irix.h -/iconv_open-osf.h -/iconv_open-solaris.h - -# Files generated by moopp: -/fd-ostream.c -/fd-ostream.h -/fd_ostream.priv.h -/fd_ostream.vt.h -/file-ostream.c -/file-ostream.h -/file_ostream.priv.h -/file_ostream.vt.h -/html-ostream.c -/html-ostream.h -/html-styled-ostream.c -/html-styled-ostream.h -/html_ostream.priv.h -/html_ostream.vt.h -/html_styled_ostream.priv.h -/html_styled_ostream.vt.h -/ostream.c -/ostream.h -/ostream.priv.h -/ostream.vt.h -/styled-ostream.c -/styled-ostream.h -/styled_ostream.priv.h -/styled_ostream.vt.h -/term-ostream.c -/term-ostream.h -/term-styled-ostream.c -/term-styled-ostream.h -/term_ostream.priv.h -/term_ostream.vt.h -/term_styled_ostream.priv.h -/term_styled_ostream.vt.h diff --git a/gettext-tools/gnulib-tests/.gitignore b/gettext-tools/gnulib-tests/.gitignore index 5f93dbc..d66e6d0 100644 --- a/gettext-tools/gnulib-tests/.gitignore +++ b/gettext-tools/gnulib-tests/.gitignore @@ -116,6 +116,7 @@ /test-inttypes.c /test-iswblank.c /test-langinfo.c +/test-linked_list.c /test-linkedhash_list.c /test-locale.c /test-localename.c diff --git a/gettext-tools/libgettextpo/.gitignore b/gettext-tools/libgettextpo/.gitignore index a7d1bb1..41424a8 100644 --- a/gettext-tools/libgettextpo/.gitignore +++ b/gettext-tools/libgettextpo/.gitignore @@ -11,15 +11,13 @@ # Files brought in by gnulib-tool: /Makefile.gnulib /alignof.h +/alloca.h /alloca.in.h /arg-nonnull.h /asnprintf.c /asprintf.c /basename.c /basename.h -/charset.alias -/close.c -/configmake.h /c++defs.h /c-ctype.c /c-ctype.h @@ -29,9 +27,12 @@ /c-strncasecmp.c /c-strstr.c /c-strstr.h +/charset.alias +/close.c /concat-filename.c /concat-filename.h /config.charset +/configmake.h /diffseq.h /dosname.h /errno.in.h @@ -42,6 +43,7 @@ /exitfail.c /exitfail.h /exported.sh +/fcntl.h /fcntl.in.h /fd-hook.c /fd-hook.h @@ -61,12 +63,21 @@ /gcd.h /getdelim.c /getline.c -/gettext.h /gettext-po.h +/gettext.h /gettimeofday.c +/gl_anylinked_list1.h +/gl_anylinked_list2.h +/gl_linked_list.c +/gl_linked_list.h +/gl_list.c +/gl_list.h +/gl_xlist.c +/gl_xlist.h /hash.c /hash.h /iconv.c +/iconv.h /iconv.in.h /iconv_close.c /iconv_open-aix.gperf @@ -85,6 +96,8 @@ /malloca.c /malloca.h /malloca.valgrind +/markup.c +/markup.h /mbrtowc.c /mbsinit.c /mbswidth.c @@ -113,10 +126,13 @@ /rawmemchr.c /rawmemchr.valgrind /realloc.c +/ref-add.sed /ref-add.sin +/ref-del.sed /ref-del.sin /relocatable.c /relocatable.h +/signal.h /signal.in.h /sigprocmask.c /size_max.h @@ -126,7 +142,10 @@ /stddef.in.h /stdint.in.h /stdio-write.c +/stdio.c +/stdio.h /stdio.in.h +/stdlib.h /stdlib.in.h /stpcpy.c /stpncpy.c @@ -143,15 +162,19 @@ /striconveh.h /striconveha.c /striconveha.h +/string.h /string.in.h /strstr.c /sys_stat.in.h /sys_time.in.h /sys_types.in.h +/time.h /time.in.h /uniconv.in.h /unictype.in.h /unilbrk.in.h +/unistd.c +/unistd.h /unistd.in.h /unistr.in.h /unitypes.in.h @@ -163,9 +186,11 @@ /vasprintf.c /verify.h /warn-on-use.h +/wchar.h /wchar.in.h -/wctype.in.h /wctype-h.c +/wctype.h +/wctype.in.h /wcwidth.c /xalloc.h /xasprintf.c @@ -185,21 +210,12 @@ # Files generated by the autotools: /Makefile.in -/alloca.h -/fcntl.h -/iconv.h -/ref-add.sed -/ref-del.sed -/signal.h -/stdio.c -/stdio.h -/stdlib.h -/string.h -/time.h -/unistd.c -/unistd.h -/wchar.h -/wctype.h +/uniconv.h +/unictype.h +/unilbrk.h +/unistr.h +/unitypes.h +/uniwidth.h # Files generated by gperf: /iconv_open-aix.h diff --git a/gettext-tools/libgettextpo/Makefile.am b/gettext-tools/libgettextpo/Makefile.am index 5a4404d..dfbf40c 100644 --- a/gettext-tools/libgettextpo/Makefile.am +++ b/gettext-tools/libgettextpo/Makefile.am @@ -41,6 +41,8 @@ AM_CPPFLAGS = \ -I../src -I$(top_srcdir)/src \ -I../intl -I$(top_srcdir)/../gettext-runtime/intl +DEFS = -DIN_LIBGETTEXTPO=1 @DEFS@ + # libgettextpo contains the public API for PO files. libgettextpo_la_SOURCES = \ gettext-po.c \ @@ -93,8 +95,7 @@ libgettextpo_la_AUXSOURCES = \ ../src/plural-exp.c \ ../src/plural-eval.c \ ../src/msgl-check.c \ - ../src/sentence.c \ - ../src/libexpat-compat.c + ../src/sentence.c # Libtool's library version information for libgettextpo. # See the libtool documentation, section "Library interface versions". @@ -109,7 +110,7 @@ libgettextpo_la_LIBADD = libgnu.la $(WOE32_LIBADD) $(LTLIBUNISTRING) libgettextpo_la_LDFLAGS = \ -version-info $(LTV_CURRENT):$(LTV_REVISION):$(LTV_AGE) \ -rpath $(libdir) \ - @LTLIBINTL@ @LTLIBICONV@ @LTLIBEXPAT@ -lc -no-undefined + @LTLIBINTL@ @LTLIBICONV@ -lc -no-undefined # Tell the mingw or Cygwin linker which symbols to export. if WOE32DLL @@ -155,7 +156,7 @@ config.h: $(BUILT_SOURCES) sf=`echo "$$f" | sed -e 's,\\.[^.]*$$,,'`.c; \ test -f $$sf || sf=$(srcdir)/$$sf; \ of=`echo "$$f" | sed -e 's,^.*/,,' -e 's,\\.[^.]*$$,,'`.$(OBJEXT); \ - $(COMPILE) -c $$sf || { rm -f config.h; exit 1; }; \ + $(COMPILE) $(DEFS) -c $$sf || { rm -f config.h; exit 1; }; \ sh ./exported.sh $$of 1>&5; \ rm -f $$of `echo "$$of" | sed -e 's,\\.$(OBJEXT)$$,.lo,'`; \ ;; \ diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index f3dab4c..7109072 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -56,7 +56,7 @@ po-time.h plural-table.h lang-table.h format.h filters.h \ xgettext.h x-c.h x-po.h x-sh.h x-python.h x-lisp.h x-elisp.h x-librep.h \ x-scheme.h x-smalltalk.h x-java.h x-properties.h x-csharp.h x-awk.h x-ycp.h \ x-tcl.h x-perl.h x-php.h x-stringtable.h x-rst.h x-glade.h x-lua.h \ -x-javascript.h x-vala.h x-gsettings.h x-desktop.h x-appdata.h libexpat-compat.h +x-javascript.h x-vala.h x-gsettings.h x-desktop.h x-appdata.h EXTRA_DIST += FILES project-id @@ -152,7 +152,7 @@ $(COMMON_SOURCE) read-catalog.c \ color.c write-catalog.c write-properties.c write-stringtable.c write-po.c \ msgl-ascii.c msgl-iconv.c msgl-equal.c msgl-cat.c msgl-header.c msgl-english.c \ msgl-check.c file-list.c msgl-charset.c po-time.c plural-exp.c plural-eval.c \ -plural-table.c quote.h sentence.h sentence.c libexpat-compat.c \ +plural-table.c quote.h sentence.h sentence.c \ $(FORMAT_SOURCE) \ read-desktop.c locating-rule.c its.c @@ -254,7 +254,7 @@ cldr_plurals_LDADD = libgettextsrc.la $(LDADD) # use iconv(). libgettextsrc_la_LDFLAGS = \ -release @VERSION@ \ - ../gnulib-lib/libgettextlib.la $(LTLIBUNISTRING) @LTLIBINTL@ @LTLIBICONV@ @LTLIBEXPAT@ -lc -no-undefined + ../gnulib-lib/libgettextlib.la $(LTLIBUNISTRING) @LTLIBINTL@ @LTLIBICONV@ -lc -no-undefined libgettextsrc_la_CPPFLAGS = $(AM_CPPFLAGS) $(INCXML) diff --git a/gettext-tools/src/format-kde-kuit.c b/gettext-tools/src/format-kde-kuit.c index 3e00697..afd6b15 100644 --- a/gettext-tools/src/format-kde-kuit.c +++ b/gettext-tools/src/format-kde-kuit.c @@ -24,12 +24,28 @@ #include <stdlib.h> #include "format.h" -#include "libexpat-compat.h" #include "unistr.h" #include "xalloc.h" #include "xvasprintf.h" #include "gettext.h" +#if IN_LIBGETTEXTPO +/* Use included markup parser to avoid extra dependency from + libgettextpo to libxml2. */ +# ifndef FORMAT_KDE_KUIT_FALLBACK_MARKUP +# define FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP 1 +# endif +#else +# define FORMAT_KDE_KUIT_USE_LIBXML2 1 +#endif + +#if FORMAT_KDE_KUIT_USE_LIBXML2 +# include <libxml/parser.h> +#elif FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP +# include "markup.h" +#endif + + #define _(str) gettext (str) #define SIZEOF(a) (sizeof(a) / sizeof(a[0])) @@ -48,26 +64,8 @@ struct spec void *base; }; -#if DYNLOAD_LIBEXPAT || HAVE_LIBEXPAT - #define XML_NS "https://www.gnu.org/s/gettext/kde" -/* Callback called when <element> is seen. */ -static void -start_element_handler (void *data, const char *name, - const char **attributes) -{ - /* Nothing to do for now. We could check text outside of a - structuring tag, etc. */ -} - -/* Callback called when </element> is seen. */ -static void -end_element_handler (void *data, const char *name) -{ - /* Nothing to do. */ -} - struct char_range { ucs4_t start; @@ -182,8 +180,6 @@ is_reference (const char *input) return false; } -#endif - static void * format_parse (const char *format, bool translated, char *fdi, @@ -191,83 +187,99 @@ format_parse (const char *format, bool translated, char *fdi, { struct spec spec; struct spec *result; + const char *str; + const char *str_limit; + size_t amp_count; + char *buffer, *bp; spec.base = NULL; -#if DYNLOAD_LIBEXPAT || HAVE_LIBEXPAT - if (LIBEXPAT_AVAILABLE ()) - { - XML_Parser parser; - const char *str = format; - const char *str_limit = str + strlen (format); - size_t amp_count; - char *buffer, *bp; + /* Preprocess the input, putting the content in a <gt:kuit> element. */ + str = format; + str_limit = str + strlen (format); - for (amp_count = 0; str < str_limit; amp_count++) - { - const char *amp = strchrnul (str, '&'); - if (*amp != '&') - break; - str = amp + 1; - } + for (amp_count = 0; str < str_limit; amp_count++) + { + const char *amp = strchrnul (str, '&'); + if (*amp != '&') + break; + str = amp + 1; + } - buffer = xmalloc (amp_count * 4 - + strlen (format) - + strlen ("<gt:kuit xmlns:gt=\"" XML_NS "\"></gt:kuit>") - + 1); - *buffer = '\0'; + buffer = xmalloc (amp_count * 4 + + strlen (format) + + strlen ("<gt:kuit xmlns:gt=\"" XML_NS "\"></gt:kuit>") + + 1); + *buffer = '\0'; - bp = buffer; - bp = stpcpy (bp, "<gt:kuit xmlns:gt=\"" XML_NS "\">"); - str = format; - while (str < str_limit) - { - const char *amp = strchrnul (str, '&'); + bp = buffer; + bp = stpcpy (bp, "<gt:kuit xmlns:gt=\"" XML_NS "\">"); + str = format; + while (str < str_limit) + { + const char *amp = strchrnul (str, '&'); - bp = stpncpy (bp, str, amp - str); - if (*amp != '&') - break; + bp = stpncpy (bp, str, amp - str); + if (*amp != '&') + break; - bp = stpcpy (bp, is_reference (amp) ? "&" : "&"); - str = amp + 1; - } - stpcpy (bp, "</gt:kuit>"); + bp = stpcpy (bp, is_reference (amp) ? "&" : "&"); + str = amp + 1; + } + stpcpy (bp, "</gt:kuit>"); - parser = XML_ParserCreate (NULL); - if (parser == NULL) +#if FORMAT_KDE_KUIT_USE_LIBXML2 + { + xmlDocPtr doc; + + doc = xmlReadMemory (buffer, strlen (buffer), "", NULL, + XML_PARSE_NONET + | XML_PARSE_NOWARNING + | XML_PARSE_NOERROR + | XML_PARSE_NOBLANKS); + if (doc == NULL) { - *invalid_reason = xasprintf (_("memory exhausted")); - free (buffer); - return NULL; + xmlError *err = xmlGetLastError (); + *invalid_reason = + xasprintf (_("error while parsing: %s"), + err->message); } - XML_SetElementHandler (parser, - start_element_handler, - end_element_handler); + free (buffer); + xmlFreeDoc (doc); + } +#elif FORMAT_KDE_KUIT_FALLBACK_MARKUP + { + markup_parser_ty parser; + markup_parse_context_ty *context; - if (XML_Parse (parser, buffer, strlen (buffer), 0) == 0) + memset (&parser, 0, sizeof (markup_parser_ty)); + context = markup_parse_context_new (&parser, 0, NULL); + if (!markup_parse_context_parse (context, buffer, strlen (buffer))) { *invalid_reason = xasprintf (_("error while parsing: %s"), - XML_ErrorString (XML_GetErrorCode (parser))); + markup_parse_context_get_error (context)); free (buffer); - XML_ParserFree (parser); + markup_parse_context_free (context); return NULL; } - if (XML_Parse (parser, NULL, 0, 1) == 0) + if (!markup_parse_context_end_parse (context)) { *invalid_reason = xasprintf (_("error while parsing: %s"), - XML_ErrorString (XML_GetErrorCode (parser))); + markup_parse_context_get_error (context)); free (buffer); - XML_ParserFree (parser); + markup_parse_context_free (context); return NULL; } free (buffer); - XML_ParserFree (parser); + markup_parse_context_free (context); } +#else + /* No support for XML. */ #endif spec.base = formatstring_kde.parse (format, translated, fdi, invalid_reason); diff --git a/gettext-tools/src/libexpat-compat.c b/gettext-tools/src/libexpat-compat.c deleted file mode 100644 index 33d264a..0000000 --- a/gettext-tools/src/libexpat-compat.c +++ /dev/null @@ -1,327 +0,0 @@ -/* xgettext libexpat compatibility. - Copyright (C) 2002-2003, 2005-2009, 2013, 2015 Free Software - Foundation, Inc. - - This file was written by Bruno Haible <haible@clisp.cons.org>, 2002. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include <stdlib.h> -#include <string.h> - -#if DYNLOAD_LIBEXPAT -# include <dlfcn.h> -#else -# if HAVE_LIBEXPAT -# include <expat.h> -# endif -#endif - -/* Keep the references to XML_GetCurrent{Line,Column}Number symbols - before loading libexpat-compat.h, since they are redefined to - rpl_XML_GetCurrent{Line,Column}Number . */ -#if !DYNLOAD_LIBEXPAT && XML_MAJOR_VERSION >= 2 -static void *p_XML_GetCurrentLineNumber = (void *) &XML_GetCurrentLineNumber; -static void *p_XML_GetCurrentColumnNumber = (void *) &XML_GetCurrentColumnNumber; -#endif - -#include "libexpat-compat.h" - -/* ======================= Different libexpat ABIs. ======================= */ - -/* There are three different ABIs of libexpat, regarding the functions - XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber. - In expat < 2.0, they return an 'int'. - In expat >= 2.0, they return - - a 'long' if expat was compiled with the default flags, or - - a 'long long' if expat was compiled with -DXML_LARGE_SIZE. - But the <expat.h> include file does not contain the information whether - expat was compiled with -DXML_LARGE_SIZE; so the include file is lying! - For this information, we need to call XML_GetFeatureList(), for - expat >= 2.0.1; for expat = 2.0.0, we have to assume the default flags. */ - -#if !DYNLOAD_LIBEXPAT && XML_MAJOR_VERSION >= 2 - -/* expat >= 2.0 -> Return type is 'int64_t' worst-case. */ - -/* Return true if libexpat was compiled with -DXML_LARGE_SIZE. */ -static bool -is_XML_LARGE_SIZE_ABI (void) -{ - static bool tested; - static bool is_large; - - if (!tested) - { - const XML_Feature *features; - - is_large = false; - for (features = XML_GetFeatureList (); features->name != NULL; features++) - if (strcmp (features->name, "XML_LARGE_SIZE") == 0) - { - is_large = true; - break; - } - - tested = true; - } - return is_large; -} - -int64_t -rpl_XML_GetCurrentLineNumber (XML_Parser parser) -{ - if (is_XML_LARGE_SIZE_ABI ()) - return ((int64_t (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser); - else - return ((long (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser); -} - -int64_t -rpl_XML_GetCurrentColumnNumber (XML_Parser parser) -{ - if (is_XML_LARGE_SIZE_ABI ()) - return ((int64_t (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser); - else - return ((long (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser); -} -#endif - - -/* ===================== Dynamic loading of libexpat. ===================== */ - -#if DYNLOAD_LIBEXPAT - -static XML_Expat_Version (*p_XML_ExpatVersionInfo) (void); - -XML_Expat_Version -XML_ExpatVersionInfo (void) -{ - return (*p_XML_ExpatVersionInfo) (); -} - -static const XML_Feature * (*p_XML_GetFeatureList) (void); - -const XML_Feature * -XML_GetFeatureList (void) -{ - return (*p_XML_GetFeatureList) (); -} - -enum XML_Size_ABI -get_XML_Size_ABI (void) -{ - static bool tested; - static enum XML_Size_ABI abi; - - if (!tested) - { - if (XML_ExpatVersionInfo () .major >= 2) - /* expat >= 2.0 -> XML_Size is 'int64_t' or 'long'. */ - { - const XML_Feature *features; - - abi = is_long; - for (features = XML_GetFeatureList (); - features->name != NULL; - features++) - if (strcmp (features->name, "XML_LARGE_SIZE") == 0) - { - abi = is_int64_t; - break; - } - } - else - /* expat < 2.0 -> XML_Size is 'int'. */ - abi = is_int; - tested = true; - } - return abi; -} - -static XML_Parser (*p_XML_ParserCreate) (const XML_Char *encoding); - -XML_Parser -XML_ParserCreate (const XML_Char *encoding) -{ - return (*p_XML_ParserCreate) (encoding); -} - -static void (*p_XML_SetElementHandler) (XML_Parser parser, - XML_StartElementHandler start, - XML_EndElementHandler end); - -void -XML_SetElementHandler (XML_Parser parser, - XML_StartElementHandler start, - XML_EndElementHandler end) -{ - (*p_XML_SetElementHandler) (parser, start, end); -} - - -static void (*p_XML_SetCharacterDataHandler) (XML_Parser parser, - XML_CharacterDataHandler handler); - -void -XML_SetCharacterDataHandler (XML_Parser parser, - XML_CharacterDataHandler handler) -{ - (*p_XML_SetCharacterDataHandler) (parser, handler); -} - - -static void (*p_XML_SetCommentHandler) (XML_Parser parser, - XML_CommentHandler handler); - -void -XML_SetCommentHandler (XML_Parser parser, XML_CommentHandler handler) -{ - (*p_XML_SetCommentHandler) (parser, handler); -} - - -static int (*p_XML_Parse) (XML_Parser parser, const char *s, - int len, int isFinal); - -int -XML_Parse (XML_Parser parser, const char *s, int len, int isFinal) -{ - return (*p_XML_Parse) (parser, s, len, isFinal); -} - - -static enum XML_Error (*p_XML_GetErrorCode) (XML_Parser parser); - -enum XML_Error -XML_GetErrorCode (XML_Parser parser) -{ - return (*p_XML_GetErrorCode) (parser); -} - - -static void *p_XML_GetCurrentLineNumber; - -int64_t -XML_GetCurrentLineNumber (XML_Parser parser) -{ - switch (get_XML_Size_ABI ()) - { - case is_int: - return ((int (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser); - case is_long: - return ((long (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser); - case is_int64_t: - return ((int64_t (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser); - default: - abort (); - } -} - -static void *p_XML_GetCurrentColumnNumber; - -int64_t -XML_GetCurrentColumnNumber (XML_Parser parser) -{ - switch (get_XML_Size_ABI ()) - { - case is_int: - return ((int (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser); - case is_long: - return ((long (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser); - case is_int64_t: - return ((int64_t (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser); - default: - abort (); - } -} - - -static const XML_LChar * (*p_XML_ErrorString) (int code); - -const XML_LChar * -XML_ErrorString (int code) -{ - return (*p_XML_ErrorString) (code); -} - -static void (*p_XML_ParserFree) (XML_Parser parser); - -void -XML_ParserFree (XML_Parser parser) -{ - return (*p_XML_ParserFree) (parser); -} - -static int libexpat_loaded = 0; - -bool -load_libexpat () -{ - if (libexpat_loaded == 0) - { - void *handle; - - /* Try to load libexpat-2.x. */ - handle = dlopen ("libexpat.so.1", RTLD_LAZY); - if (handle == NULL) - /* Try to load libexpat-1.x. */ - handle = dlopen ("libexpat.so.0", RTLD_LAZY); - if (handle != NULL - && (p_XML_ExpatVersionInfo = - (XML_Expat_Version (*) (void)) - dlsym (handle, "XML_ExpatVersionInfo")) != NULL - && (p_XML_GetFeatureList = - (const XML_Feature * (*) (void)) - dlsym (handle, "XML_GetFeatureList")) != NULL - && (p_XML_ParserCreate = - (XML_Parser (*) (const XML_Char *)) - dlsym (handle, "XML_ParserCreate")) != NULL - && (p_XML_SetElementHandler = - (void (*) (XML_Parser, XML_StartElementHandler, XML_EndElementHandler)) - dlsym (handle, "XML_SetElementHandler")) != NULL - && (p_XML_SetCharacterDataHandler = - (void (*) (XML_Parser, XML_CharacterDataHandler)) - dlsym (handle, "XML_SetCharacterDataHandler")) != NULL - && (p_XML_SetCommentHandler = - (void (*) (XML_Parser, XML_CommentHandler)) - dlsym (handle, "XML_SetCommentHandler")) != NULL - && (p_XML_Parse = - (int (*) (XML_Parser, const char *, int, int)) - dlsym (handle, "XML_Parse")) != NULL - && (p_XML_GetErrorCode = - (enum XML_Error (*) (XML_Parser)) - dlsym (handle, "XML_GetErrorCode")) != NULL - && (p_XML_GetCurrentLineNumber = - dlsym (handle, "XML_GetCurrentLineNumber")) != NULL - && (p_XML_GetCurrentColumnNumber = - dlsym (handle, "XML_GetCurrentColumnNumber")) != NULL - && (p_XML_ParserFree = - (void (*) (XML_Parser)) - dlsym (handle, "XML_ParserFree")) != NULL - && (p_XML_ErrorString = - (const XML_LChar * (*) (int)) - dlsym (handle, "XML_ErrorString")) != NULL) - libexpat_loaded = 1; - else - libexpat_loaded = -1; - } - return libexpat_loaded >= 0; -} - -#endif diff --git a/gettext-tools/src/libexpat-compat.h b/gettext-tools/src/libexpat-compat.h deleted file mode 100644 index 3e41e82..0000000 --- a/gettext-tools/src/libexpat-compat.h +++ /dev/null @@ -1,95 +0,0 @@ -/* xgettext libexpat compatibility. - Copyright (C) 2002-2003, 2005-2009, 2013, 2015 Free Software - Foundation, Inc. - - This file was written by Bruno Haible <haible@clisp.cons.org>, 2002. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -#include <stdbool.h> -#include <stdint.h> -#if DYNLOAD_LIBEXPAT -# include <dlfcn.h> -#else -# if HAVE_LIBEXPAT -# include <expat.h> -# endif -#endif - -#if !DYNLOAD_LIBEXPAT && XML_MAJOR_VERSION >= 2 -int64_t rpl_XML_GetCurrentLineNumber (XML_Parser parser); -# undef XML_GetCurrentLineNumber -# define XML_GetCurrentLineNumber rpl_XML_GetCurrentLineNumber - -int64_t rpl_XML_GetCurrentColumnNumber (XML_Parser parser); -# undef XML_GetCurrentColumnNumber -# define XML_GetCurrentColumnNumber rpl_XML_GetCurrentColumnNumber -#endif - -/* ===================== Dynamic loading of libexpat. ===================== */ - -#if DYNLOAD_LIBEXPAT -typedef struct - { - int major; - int minor; - int micro; - } - XML_Expat_Version; -enum XML_FeatureEnum { XML_FEATURE_END = 0 }; -typedef struct - { - enum XML_FeatureEnum feature; - const char *name; - long int value; - } - XML_Feature; -typedef void *XML_Parser; -typedef char XML_Char; -typedef char XML_LChar; -enum XML_Error { XML_ERROR_NONE }; -typedef void (*XML_StartElementHandler) (void *userData, const XML_Char *name, const XML_Char **atts); -typedef void (*XML_EndElementHandler) (void *userData, const XML_Char *name); -typedef void (*XML_CharacterDataHandler) (void *userData, const XML_Char *s, int len); -typedef void (*XML_CommentHandler) (void *userData, const XML_Char *data); - -XML_Expat_Version XML_ExpatVersionInfo (void); -const XML_Feature * XML_GetFeatureList (void); - -enum XML_Size_ABI { is_int, is_long, is_int64_t }; -enum XML_Size_ABI get_XML_Size_ABI (void); - -XML_Parser XML_ParserCreate (const XML_Char *encoding); -void XML_SetElementHandler (XML_Parser parser, - XML_StartElementHandler start, - XML_EndElementHandler end); -void XML_SetCharacterDataHandler (XML_Parser parser, - XML_CharacterDataHandler handler); -void XML_SetCommentHandler (XML_Parser parser, XML_CommentHandler handler); -int XML_Parse (XML_Parser parser, const char *s, int len, int isFinal); -enum XML_Error XML_GetErrorCode (XML_Parser parser); -int64_t XML_GetCurrentLineNumber (XML_Parser parser); -int64_t XML_GetCurrentColumnNumber (XML_Parser parser); -const XML_LChar * XML_ErrorString (int code); -void XML_ParserFree (XML_Parser parser); - -bool load_libexpat (); - -#define LIBEXPAT_AVAILABLE() (load_libexpat ()) - -#elif HAVE_LIBEXPAT - -#define LIBEXPAT_AVAILABLE() true - -#endif diff --git a/gnulib-local/lib/markup.c b/gnulib-local/lib/markup.c new file mode 100644 index 0000000..a0f6856 --- /dev/null +++ b/gnulib-local/lib/markup.c @@ -0,0 +1,1523 @@ +/* markup.c -- simple XML-like parser + Copyright (C) 2015 Free Software Foundation, Inc. + + This file is not part of the GNU gettext program, but is used with + GNU gettext. + + This is a stripped down version of GLib's gmarkup.c. The original + copyright notice is as follows: +*/ + +/* gmarkup.c - Simple XML-like parser + * + * Copyright 2000, 2003 Red Hat, Inc. + * Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca> + * + * GLib is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 3 of the + * License, or (at your option) any later version. + * + * GLib is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with GLib; see the file COPYING.LIB. If not, + * see <http://www.gnu.org/licenses/>. + */ + +#include "config.h" + +#include <assert.h> +#include <stdarg.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> + +/* Specification */ +#include "markup.h" + +#include "c-ctype.h" +#include "gettext.h" +#include "gl_linked_list.h" +#include "gl_xlist.h" +#include "unictype.h" +#include "unistr.h" +#include "xalloc.h" +#include "xvasprintf.h" + +#define _(s) gettext(s) + +/** + * The "markup" parser is intended to parse a simple markup format + * that's a subset of XML. This is a small, efficient, easy-to-use + * parser. It should not be used if you expect to interoperate with + * other applications generating full-scale XML. However, it's very + * useful for application data files, config files, etc. where you + * know your application will be the only one writing the file. + * Full-scale XML parsers should be able to parse the subset used by + * markup, so you can easily migrate to full-scale XML at a later + * time if the need arises. + * + * The parser is not guaranteed to signal an error on all invalid XML; + * the parser may accept documents that an XML parser would not. + * However, XML documents which are not well-formed (which is a weaker + * condition than being valid. See the XML specification + * <http://www.w3.org/TR/REC-xml/> for definitions of these terms.) + * are not considered valid GMarkup documents. + * + * Simplifications to XML include: + * + * - Only UTF-8 encoding is allowed + * + * - No user-defined entities + * + * - Processing instructions, comments and the doctype declaration + * are "passed through" but are not interpreted in any way + * + * - No DTD or validation + * + * The markup format does support: + * + * - Elements + * + * - Attributes + * + * - 5 standard entities: & < > " ' + * + * - Character references + * + * - Sections marked as CDATA + */ + +typedef enum +{ + STATE_START, + STATE_AFTER_OPEN_ANGLE, + STATE_AFTER_CLOSE_ANGLE, + STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */ + STATE_INSIDE_OPEN_TAG_NAME, + STATE_INSIDE_ATTRIBUTE_NAME, + STATE_AFTER_ATTRIBUTE_NAME, + STATE_BETWEEN_ATTRIBUTES, + STATE_AFTER_ATTRIBUTE_EQUALS_SIGN, + STATE_INSIDE_ATTRIBUTE_VALUE_SQ, + STATE_INSIDE_ATTRIBUTE_VALUE_DQ, + STATE_INSIDE_TEXT, + STATE_AFTER_CLOSE_TAG_SLASH, + STATE_INSIDE_CLOSE_TAG_NAME, + STATE_AFTER_CLOSE_TAG_NAME, + STATE_INSIDE_PASSTHROUGH, + STATE_ERROR +} markup_parse_state_ty; + +typedef struct +{ + const char *prev_element; + const markup_parser_ty *prev_parser; + void *prev_user_data; +} markup_recursion_tracker_ty; + +typedef struct +{ + char *buffer; + size_t bufmax; + size_t buflen; +} markup_string_ty; + +struct _markup_parse_context_ty +{ + const markup_parser_ty *parser; + + markup_parse_flags_ty flags; + + int line_number; + int char_number; + + markup_parse_state_ty state; + + void *user_data; + + /* A piece of character data or an element that + * hasn't "ended" yet so we haven't yet called + * the callback for it. + */ + markup_string_ty *partial_chunk; + + gl_list_t tag_stack; /* <markup_string_ty> */ + + char **attr_names; + char **attr_values; + int cur_attr; + int alloc_attrs; + + const char *current_text; + ssize_t current_text_len; + const char *current_text_end; + + /* used to save the start of the last interesting thingy */ + const char *start; + + const char *iter; + + char *error_text; + + unsigned int document_empty : 1; + unsigned int parsing : 1; + unsigned int awaiting_pop : 1; + int balance; + + /* subparser support */ + gl_list_t subparser_stack; /* <markup_recursion_tracker_ty *> */ + const char *subparser_element; +}; + +static markup_string_ty * +markup_string_new (void) +{ + return XZALLOC (markup_string_ty); +} + +static char * +markup_string_free (markup_string_ty *string, bool free_segment) +{ + if (free_segment) + { + free (string->buffer); + free (string); + return NULL; + } + else + { + char *result = string->buffer; + free (string); + return result; + } +} + +static void +markup_string_free1 (markup_string_ty *string) +{ + markup_string_free (string, true); +} + +static void +markup_string_truncate (markup_string_ty *string, size_t length) +{ + assert (string && length < string->buflen - 1); + string->buffer[length] = '\0'; + string->buflen = length; +} + +static void +markup_string_append (markup_string_ty *string, const char *to_append, + size_t length) +{ + if (string->buflen + length + 1 > string->bufmax) + { + string->bufmax *= 2; + if (string->buflen + length + 1 > string->bufmax) + string->bufmax = string->buflen + length + 1; + string->buffer = xrealloc (string->buffer, string->bufmax); + } + memcpy (string->buffer + string->buflen, to_append, length); + string->buffer[length] = '\0'; + string->buflen = length; +} + +static inline void +string_blank (markup_string_ty *string) +{ + if (string->bufmax > 0) + { + *string->buffer = '\0'; + string->buflen = 0; + } +} + +/* Creates a new parse context. A parse context is used to parse + marked-up documents. You can feed any number of documents into a + context, as long as no errors occur; once an error occurs, the + parse context can't continue to parse text (you have to free it and + create a new parse context). */ +markup_parse_context_ty * +markup_parse_context_new (const markup_parser_ty *parser, + markup_parse_flags_ty flags, + void *user_data) +{ + markup_parse_context_ty *context; + + assert (parser != NULL); + + context = XMALLOC (markup_parse_context_ty); + + context->parser = parser; + context->flags = flags; + context->user_data = user_data; + + context->line_number = 1; + context->char_number = 1; + + context->partial_chunk = NULL; + + context->state = STATE_START; + context->tag_stack = + gl_list_create_empty (GL_LINKED_LIST, + NULL, NULL, + (gl_listelement_dispose_fn) markup_string_free1, + true); + context->attr_names = NULL; + context->attr_values = NULL; + context->cur_attr = -1; + context->alloc_attrs = 0; + + context->current_text = NULL; + context->current_text_len = -1; + context->current_text_end = NULL; + + context->start = NULL; + context->iter = NULL; + + context->error_text = NULL; + + context->document_empty = true; + context->parsing = false; + + context->awaiting_pop = false; + context->subparser_stack = + gl_list_create_empty (GL_LINKED_LIST, + NULL, NULL, + (gl_listelement_dispose_fn) free, + true); + context->subparser_element = NULL; + + context->balance = 0; + + return context; +} + +static void clear_attributes (markup_parse_context_ty *context); + +/* Frees a parse context. This function can't be called from inside + one of the markup_parser_ty functions or while a subparser is + pushed. */ +void +markup_parse_context_free (markup_parse_context_ty *context) +{ + assert (context != NULL); + assert (!context->parsing); + assert (gl_list_size (context->subparser_stack) == 0); + assert (!context->awaiting_pop); + + clear_attributes (context); + free (context->attr_names); + free (context->attr_values); + + gl_list_free (context->tag_stack); + gl_list_free (context->subparser_stack); + + if (context->partial_chunk) + markup_string_free (context->partial_chunk, true); + + free (context->error_text); + + free (context); +} + +static void pop_subparser_stack (markup_parse_context_ty *context); + +static void +emit_error (markup_parse_context_ty *context, const char *error_text) +{ + context->state = STATE_ERROR; + + if (context->parser->error) + (*context->parser->error) (context, error_text, context->user_data); + + /* report the error all the way up to free all the user-data */ + while (gl_list_size (context->subparser_stack) > 0) + { + pop_subparser_stack (context); + context->awaiting_pop = false; /* already been freed */ + + if (context->parser->error) + (*context->parser->error) (context, error_text, context->user_data); + } + + if (context->error_text) + free (context->error_text); + context->error_text = xstrdup (error_text); +} + +#define IS_COMMON_NAME_END_CHAR(c) \ + ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ') + +static bool +slow_name_validate (markup_parse_context_ty *context, const char *name) +{ + const char *p = name; + ucs4_t uc; + + if (u8_check ((uint8_t *) name, strlen (name)) != NULL) + { + emit_error (context, _("invalid UTF-8 sequence")); + return false; + } + + if (!(c_isalpha (*p) + || (!IS_COMMON_NAME_END_CHAR (*p) + && (*p == '_' + || *p == ':' + || (u8_mbtouc (&uc, (uint8_t *) name, strlen (name)) > 0 + && uc_is_alpha (uc)))))) + { + char *error_text = xasprintf (_("'%s' is not a valid name"), name); + emit_error (context, error_text); + free (error_text); + return false; + } + + for (p = (char *) u8_next (&uc, (uint8_t *) name); + p != NULL; + p = (char *) u8_next (&uc, (uint8_t *) p)) + { + /* is_name_char */ + if (!(c_isalnum (*p) || + (!IS_COMMON_NAME_END_CHAR (*p) && + (*p == '.' || + *p == '-' || + *p == '_' || + *p == ':' || + uc_is_alpha (uc))))) + { + char *error_text = xasprintf (_("'%s' is not a valid name: '%c'"), + name, *p); + emit_error (context, error_text); + free (error_text); + return false; + } + } + return true; +} + +/* + * Use me for elements, attributes etc. + */ +static bool +name_validate (markup_parse_context_ty *context, const char *name) +{ + char mask; + const char *p; + + /* name start char */ + p = name; + if (IS_COMMON_NAME_END_CHAR (*p) + || !(c_isalpha (*p) || *p == '_' || *p == ':')) + goto slow_validate; + + for (mask = *p++; *p != '\0'; p++) + { + mask |= *p; + + /* is_name_char */ + if (!(c_isalnum (*p) + || (!IS_COMMON_NAME_END_CHAR (*p) + && (*p == '.' || *p == '-' || *p == '_' || *p == ':')))) + goto slow_validate; + } + + if (mask & 0x80) /* un-common / non-ascii */ + goto slow_validate; + + return true; + + slow_validate: + return slow_name_validate (context, name); +} + +static bool +text_validate (markup_parse_context_ty *context, + const char *p, + int len) +{ + if (u8_check ((const uint8_t *) p, len) != NULL) + { + emit_error (context, _("invalid UTF-8 sequence")); + return false; + } + else + return true; +} + +/* + * re-write the GString in-place, unescaping anything that escaped. + * most XML does not contain entities, or escaping. + */ +static bool +unescape_string_inplace (markup_parse_context_ty *context, + markup_string_ty *string, + bool *is_ascii) +{ + char mask, *to; + const char *from; + bool normalize_attribute; + + if (string->buflen == 0) + return true; + + *is_ascii = false; + + /* are we unescaping an attribute or not ? */ + if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ + || context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ) + normalize_attribute = true; + else + normalize_attribute = false; + + /* + * Meeks' theorem: unescaping can only shrink text. + * for < etc. this is obvious, for  more + * thought is required, but this is patently so. + */ + mask = 0; + for (from = to = string->buffer; *from != '\0'; from++, to++) + { + *to = *from; + + mask |= *to; + if (normalize_attribute && (*to == '\t' || *to == '\n')) + *to = ' '; + if (*to == '\r') + { + *to = normalize_attribute ? ' ' : '\n'; + if (from[1] == '\n') + from++; + } + if (*from == '&') + { + from++; + if (*from == '#') + { + int base = 10; + unsigned long l; + char *end = NULL; + + from++; + + if (*from == 'x') + { + base = 16; + from++; + } + + errno = 0; + l = strtoul (from, &end, base); + + if (end == from || errno != 0) + { + emit_error (context, + _("out of range when resolving character ref")); + return false; + } + else if (*end != ';') + { + emit_error (context, + _("character reference does not end with a ';'")); + return false; + } + else + { + /* characters XML 1.1 permits */ + if ((0 < l && l <= 0xD7FF) || + (0xE000 <= l && l <= 0xFFFD) || + (0x10000 <= l && l <= 0x10FFFF)) + { + char buf[8]; + int length; + length = u8_uctomb ((uint8_t *) buf, l, 8); + memcpy (to, buf, length); + to += length - 1; + from = end; + if (l >= 0x80) /* not ascii */ + mask |= 0x80; + } + else + { + emit_error (context, _("invalid character reference")); + return false; + } + } + } + + else if (strncmp (from, "lt;", 3) == 0) + { + *to = '<'; + from += 2; + } + else if (strncmp (from, "gt;", 3) == 0) + { + *to = '>'; + from += 2; + } + else if (strncmp (from, "amp;", 4) == 0) + { + *to = '&'; + from += 3; + } + else if (strncmp (from, "quot;", 5) == 0) + { + *to = '"'; + from += 4; + } + else if (strncmp (from, "apos;", 5) == 0) + { + *to = '\''; + from += 4; + } + else + { + if (*from == ';') + emit_error (context, _("empty entity '&;'")); + else + { + const char *end = strchr (from, ';'); + if (end) + emit_error (context, _("unknown entity name")); + else + emit_error (context, _("entity does not end with a ';'")); + } + return false; + } + } + } + + assert (to - string->buffer <= string->buflen); + if (to - string->buffer != string->buflen) + markup_string_truncate (string, to - string->buffer); + + *is_ascii = !(mask & 0x80); + + return true; +} + +static inline bool +advance_char (markup_parse_context_ty *context) +{ + context->iter++; + context->char_number++; + + if (context->iter == context->current_text_end) + return false; + + else if (*context->iter == '\n') + { + context->line_number++; + context->char_number = 1; + } + + return true; +} + +static inline bool +xml_isspace (char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} + +static void +skip_spaces (markup_parse_context_ty *context) +{ + do + { + if (!xml_isspace (*context->iter)) + return; + } + while (advance_char (context)); +} + +static void +advance_to_name_end (markup_parse_context_ty *context) +{ + do + { + if (IS_COMMON_NAME_END_CHAR (*(context->iter))) + return; + if (xml_isspace (*(context->iter))) + return; + } + while (advance_char (context)); +} + +static void +add_to_partial (markup_parse_context_ty *context, + const char *text_start, + const char *text_end) +{ + if (context->partial_chunk == NULL) + { /* allocate a new chunk to parse into */ + + context->partial_chunk = markup_string_new (); + } + + if (text_start != text_end) + markup_string_append (context->partial_chunk, + text_start, text_end - text_start); +} + +static inline void +truncate_partial (markup_parse_context_ty *context) +{ + if (context->partial_chunk != NULL) + string_blank (context->partial_chunk); +} + +static inline const char* +current_element (markup_parse_context_ty *context) +{ + const markup_string_ty *string = gl_list_get_at (context->tag_stack, 0); + return string->buffer; +} + +static void +pop_subparser_stack (markup_parse_context_ty *context) +{ + markup_recursion_tracker_ty *tracker; + + assert (gl_list_size (context->subparser_stack) > 0); + + tracker = (markup_recursion_tracker_ty *) gl_list_get_at (context->subparser_stack, 0); + + context->awaiting_pop = true; + + context->user_data = tracker->prev_user_data; + context->parser = tracker->prev_parser; + context->subparser_element = tracker->prev_element; + free (tracker); + + gl_list_remove_at (context->subparser_stack, 0); +} + +static void +push_partial_as_tag (markup_parse_context_ty *context) +{ + gl_list_add_first (context->tag_stack, context->partial_chunk); + context->partial_chunk = NULL; +} + +static void +pop_tag (markup_parse_context_ty *context) +{ + gl_list_remove_at (context->tag_stack, 0); +} + +static void +possibly_finish_subparser (markup_parse_context_ty *context) +{ + if (current_element (context) == context->subparser_element) + pop_subparser_stack (context); +} + +static void +ensure_no_outstanding_subparser (markup_parse_context_ty *context) +{ + context->awaiting_pop = false; +} + +static void +add_attribute (markup_parse_context_ty *context, markup_string_ty *string) +{ + if (context->cur_attr + 2 >= context->alloc_attrs) + { + context->alloc_attrs += 5; /* silly magic number */ + context->attr_names = xrealloc (context->attr_names, sizeof (char *) * context->alloc_attrs); + context->attr_values = xrealloc (context->attr_values, sizeof(char *) * context->alloc_attrs); + } + context->cur_attr++; + context->attr_names[context->cur_attr] = xstrdup (string->buffer); + context->attr_values[context->cur_attr] = NULL; + context->attr_names[context->cur_attr+1] = NULL; + context->attr_values[context->cur_attr+1] = NULL; +} + +static void +clear_attributes (markup_parse_context_ty *context) +{ + /* Go ahead and free the attributes. */ + for (; context->cur_attr >= 0; context->cur_attr--) + { + int pos = context->cur_attr; + free (context->attr_names[pos]); + free (context->attr_values[pos]); + context->attr_names[pos] = context->attr_values[pos] = NULL; + } + assert (context->cur_attr == -1); + assert (context->attr_names == NULL || + context->attr_names[0] == NULL); + assert (context->attr_values == NULL || + context->attr_values[0] == NULL); +} + +static void +markup_parse_context_push (markup_parse_context_ty *context, + const markup_parser_ty *parser, + void *user_data) +{ + markup_recursion_tracker_ty *tracker; + + tracker = XMALLOC (markup_recursion_tracker_ty); + tracker->prev_element = context->subparser_element; + tracker->prev_parser = context->parser; + tracker->prev_user_data = context->user_data; + + context->subparser_element = current_element (context); + context->parser = parser; + context->user_data = user_data; + + gl_list_add_first (context->subparser_stack, tracker); +} + +static void +markup_parse_context_pop (markup_parse_context_ty *context) +{ + if (!context->awaiting_pop) + possibly_finish_subparser (context); + + assert (context->awaiting_pop); + + context->awaiting_pop = false; +} + +/* This has to be a separate function to ensure the alloca's + * are unwound on exit - otherwise we grow & blow the stack + * with large documents + */ +static inline void +emit_start_element (markup_parse_context_ty *context) +{ + int i, j = 0; + const char *start_name; + const char **attr_names; + const char **attr_values; + + /* In case we want to ignore qualified tags and we see that we have + * one here, we push a subparser. This will ignore all tags inside of + * the qualified tag. + * + * We deal with the end of the subparser from emit_end_element. + */ + if ((context->flags & MARKUP_IGNORE_QUALIFIED) + && strchr (current_element (context), ':')) + { + static const markup_parser_ty ignore_parser; + markup_parse_context_push (context, &ignore_parser, NULL); + clear_attributes (context); + return; + } + + attr_names = XCALLOC (context->cur_attr + 2, const char *); + attr_values = XCALLOC (context->cur_attr + 2, const char *); + for (i = 0; i < context->cur_attr + 1; i++) + { + /* Possibly omit qualified attribute names from the list */ + if ((context->flags & MARKUP_IGNORE_QUALIFIED) + && strchr (context->attr_names[i], ':')) + continue; + + attr_names[j] = context->attr_names[i]; + attr_values[j] = context->attr_values[i]; + j++; + } + attr_names[j] = NULL; + attr_values[j] = NULL; + + /* Call user callback for element start */ + start_name = current_element (context); + + if (context->parser->start_element && name_validate (context, start_name)) + (* context->parser->start_element) (context, + start_name, + (const char **)attr_names, + (const char **)attr_values, + context->user_data); + free (attr_names); + free (attr_values); + clear_attributes (context); +} + +static void +emit_end_element (markup_parse_context_ty *context) +{ + assert (gl_list_size (context->tag_stack) != 0); + + possibly_finish_subparser (context); + + /* We might have just returned from our ignore subparser */ + if ((context->flags & MARKUP_IGNORE_QUALIFIED) + && strchr (current_element (context), ':')) + { + markup_parse_context_pop (context); + pop_tag (context); + return; + } + + if (context->parser->end_element) + (* context->parser->end_element) (context, + current_element (context), + context->user_data); + + ensure_no_outstanding_subparser (context); + + pop_tag (context); +} + +/* Feed some data to the parse context. The data need not be valid + UTF-8; an error will be signaled if it's invalid. The data need + not be an entire document; you can feed a document into the parser + incrementally, via multiple calls to this function. Typically, as + you receive data from a network connection or file, you feed each + received chunk of data into this function, aborting the process if + an error occurs. Once an error is reported, no further data may be + fed to the parse context; all errors are fatal. */ +bool +markup_parse_context_parse (markup_parse_context_ty *context, + const char *text, + ssize_t text_len) +{ + assert (context != NULL); + assert (text != NULL); + assert (context->state != STATE_ERROR); + assert (!context->parsing); + + if (text_len < 0) + text_len = strlen (text); + + if (text_len == 0) + return true; + + context->parsing = true; + + + context->current_text = text; + context->current_text_len = text_len; + context->current_text_end = context->current_text + text_len; + context->iter = context->current_text; + context->start = context->iter; + + while (context->iter != context->current_text_end) + { + switch (context->state) + { + case STATE_START: + /* Possible next state: AFTER_OPEN_ANGLE */ + + assert (gl_list_size (context->tag_stack) == 0); + + /* whitespace is ignored outside of any elements */ + skip_spaces (context); + + if (context->iter != context->current_text_end) + { + if (*context->iter == '<') + { + /* Move after the open angle */ + advance_char (context); + + context->state = STATE_AFTER_OPEN_ANGLE; + + /* this could start a passthrough */ + context->start = context->iter; + + /* document is now non-empty */ + context->document_empty = false; + } + else + { + emit_error (context, + _("document must begin with an element")); + } + } + break; + + case STATE_AFTER_OPEN_ANGLE: + /* Possible next states: INSIDE_OPEN_TAG_NAME, + * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH + */ + if (*context->iter == '?' || + *context->iter == '!') + { + /* include < in the passthrough */ + const char *openangle = "<"; + add_to_partial (context, openangle, openangle + 1); + context->start = context->iter; + context->balance = 1; + context->state = STATE_INSIDE_PASSTHROUGH; + } + else if (*context->iter == '/') + { + /* move after it */ + advance_char (context); + + context->state = STATE_AFTER_CLOSE_TAG_SLASH; + } + else if (!IS_COMMON_NAME_END_CHAR (*(context->iter))) + { + context->state = STATE_INSIDE_OPEN_TAG_NAME; + + /* start of tag name */ + context->start = context->iter; + } + else + { + emit_error (context, _("invalid character after '<'")); + } + break; + + /* The AFTER_CLOSE_ANGLE state is actually sort of + * broken, because it doesn't correspond to a range + * of characters in the input stream as the others do, + * and thus makes things harder to conceptualize + */ + case STATE_AFTER_CLOSE_ANGLE: + /* Possible next states: INSIDE_TEXT, STATE_START */ + if (gl_list_size (context->tag_stack) == 0) + { + context->start = NULL; + context->state = STATE_START; + } + else + { + context->start = context->iter; + context->state = STATE_INSIDE_TEXT; + } + break; + + case STATE_AFTER_ELISION_SLASH: + /* Possible next state: AFTER_CLOSE_ANGLE */ + if (*context->iter == '>') + { + /* move after the close angle */ + advance_char (context); + context->state = STATE_AFTER_CLOSE_ANGLE; + emit_end_element (context); + } + else + { + emit_error (context, _("missing '>'")); + } + break; + + case STATE_INSIDE_OPEN_TAG_NAME: + /* Possible next states: BETWEEN_ATTRIBUTES */ + + /* if there's a partial chunk then it's the first part of the + * tag name. If there's a context->start then it's the start + * of the tag name in current_text, the partial chunk goes + * before that start though. + */ + advance_to_name_end (context); + + if (context->iter == context->current_text_end) + { + /* The name hasn't necessarily ended. Merge with + * partial chunk, leave state unchanged. + */ + add_to_partial (context, context->start, context->iter); + } + else + { + /* The name has ended. Combine it with the partial chunk + * if any; push it on the stack; enter next state. + */ + add_to_partial (context, context->start, context->iter); + push_partial_as_tag (context); + + context->state = STATE_BETWEEN_ATTRIBUTES; + context->start = NULL; + } + break; + + case STATE_INSIDE_ATTRIBUTE_NAME: + /* Possible next states: AFTER_ATTRIBUTE_NAME */ + + advance_to_name_end (context); + add_to_partial (context, context->start, context->iter); + + /* read the full name, if we enter the equals sign state + * then add the attribute to the list (without the value), + * otherwise store a partial chunk to be prepended later. + */ + if (context->iter != context->current_text_end) + context->state = STATE_AFTER_ATTRIBUTE_NAME; + break; + + case STATE_AFTER_ATTRIBUTE_NAME: + /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */ + + skip_spaces (context); + + if (context->iter != context->current_text_end) + { + /* The name has ended. Combine it with the partial chunk + * if any; push it on the stack; enter next state. + */ + if (!name_validate (context, context->partial_chunk->buffer)) + break; + + add_attribute (context, context->partial_chunk); + + markup_string_free (context->partial_chunk, true); + context->partial_chunk = NULL; + context->start = NULL; + + if (*context->iter == '=') + { + advance_char (context); + context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN; + } + else + { + emit_error (context, _("missing '='")); + } + } + break; + + case STATE_BETWEEN_ATTRIBUTES: + /* Possible next states: AFTER_CLOSE_ANGLE, + * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME + */ + skip_spaces (context); + + if (context->iter != context->current_text_end) + { + if (*context->iter == '/') + { + advance_char (context); + context->state = STATE_AFTER_ELISION_SLASH; + } + else if (*context->iter == '>') + { + advance_char (context); + context->state = STATE_AFTER_CLOSE_ANGLE; + } + else if (!IS_COMMON_NAME_END_CHAR (*(context->iter))) + { + context->state = STATE_INSIDE_ATTRIBUTE_NAME; + /* start of attribute name */ + context->start = context->iter; + } + else + { + emit_error (context, _("missing '>' or '/'")); + } + + /* If we're done with attributes, invoke + * the start_element callback + */ + if (context->state == STATE_AFTER_ELISION_SLASH || + context->state == STATE_AFTER_CLOSE_ANGLE) + emit_start_element (context); + } + break; + + case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN: + /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */ + + skip_spaces (context); + + if (context->iter != context->current_text_end) + { + if (*context->iter == '"') + { + advance_char (context); + context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ; + context->start = context->iter; + } + else if (*context->iter == '\'') + { + advance_char (context); + context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ; + context->start = context->iter; + } + else + { + emit_error (context, _("missing opening quote")); + } + } + break; + + case STATE_INSIDE_ATTRIBUTE_VALUE_SQ: + case STATE_INSIDE_ATTRIBUTE_VALUE_DQ: + /* Possible next states: BETWEEN_ATTRIBUTES */ + { + char delim; + + if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ) + { + delim = '\''; + } + else + { + delim = '"'; + } + + do + { + if (*context->iter == delim) + break; + } + while (advance_char (context)); + } + if (context->iter == context->current_text_end) + { + /* The value hasn't necessarily ended. Merge with + * partial chunk, leave state unchanged. + */ + add_to_partial (context, context->start, context->iter); + } + else + { + bool is_ascii; + /* The value has ended at the quote mark. Combine it + * with the partial chunk if any; set it for the current + * attribute. + */ + add_to_partial (context, context->start, context->iter); + + assert (context->cur_attr >= 0); + + if (unescape_string_inplace (context, context->partial_chunk, + &is_ascii) + && (is_ascii + || text_validate (context, + context->partial_chunk->buffer, + context->partial_chunk->buflen))) + { + /* success, advance past quote and set state. */ + context->attr_values[context->cur_attr] = + markup_string_free (context->partial_chunk, false); + context->partial_chunk = NULL; + advance_char (context); + context->state = STATE_BETWEEN_ATTRIBUTES; + context->start = NULL; + } + + truncate_partial (context); + } + break; + + case STATE_INSIDE_TEXT: + /* Possible next states: AFTER_OPEN_ANGLE */ + do + { + if (*context->iter == '<') + break; + } + while (advance_char (context)); + + /* The text hasn't necessarily ended. Merge with + * partial chunk, leave state unchanged. + */ + + add_to_partial (context, context->start, context->iter); + + if (context->iter != context->current_text_end) + { + bool is_ascii; + + /* The text has ended at the open angle. Call the text + * callback. + */ + if (unescape_string_inplace (context, context->partial_chunk, + &is_ascii) + && (is_ascii + || text_validate (context, + context->partial_chunk->buffer, + context->partial_chunk->buflen))) + { + if (context->parser->text) + (*context->parser->text) (context, + context->partial_chunk->buffer, + context->partial_chunk->buflen, + context->user_data); + + /* advance past open angle and set state. */ + advance_char (context); + context->state = STATE_AFTER_OPEN_ANGLE; + /* could begin a passthrough */ + context->start = context->iter; + } + + truncate_partial (context); + } + break; + + case STATE_AFTER_CLOSE_TAG_SLASH: + /* Possible next state: INSIDE_CLOSE_TAG_NAME */ + if (!IS_COMMON_NAME_END_CHAR (*(context->iter))) + { + context->state = STATE_INSIDE_CLOSE_TAG_NAME; + + /* start of tag name */ + context->start = context->iter; + } + else + { + emit_error (context, _("invalid character after '</'")); + } + break; + + case STATE_INSIDE_CLOSE_TAG_NAME: + /* Possible next state: AFTER_CLOSE_TAG_NAME */ + advance_to_name_end (context); + add_to_partial (context, context->start, context->iter); + + if (context->iter != context->current_text_end) + context->state = STATE_AFTER_CLOSE_TAG_NAME; + break; + + case STATE_AFTER_CLOSE_TAG_NAME: + /* Possible next state: AFTER_CLOSE_TAG_SLASH */ + + skip_spaces (context); + + if (context->iter != context->current_text_end) + { + markup_string_ty *close_name; + + close_name = context->partial_chunk; + context->partial_chunk = NULL; + + if (*context->iter != '>') + { + emit_error (context, + _("invalid character after a close element name")); + } + else if (gl_list_size (context->tag_stack) == 0) + { + emit_error (context, _("element is closed")); + } + else if (strcmp (close_name->buffer, current_element (context)) != 0) + { + emit_error (context, _("element is closed")); + } + else + { + advance_char (context); + context->state = STATE_AFTER_CLOSE_ANGLE; + context->start = NULL; + + emit_end_element (context); + } + context->partial_chunk = close_name; + truncate_partial (context); + } + break; + + case STATE_INSIDE_PASSTHROUGH: + /* Possible next state: AFTER_CLOSE_ANGLE */ + do + { + if (*context->iter == '<') + context->balance++; + if (*context->iter == '>') + { + char *str; + size_t len; + + context->balance--; + add_to_partial (context, context->start, context->iter); + context->start = context->iter; + + str = context->partial_chunk->buffer; + len = context->partial_chunk->buflen; + + if (str[1] == '?' && str[len - 1] == '?') + break; + if (strncmp (str, "<!--", 4) == 0 && + strcmp (str + len - 2, "--") == 0) + break; + if (strncmp (str, "<![CDATA[", 9) == 0 && + strcmp (str + len - 2, "]]") == 0) + break; + if (strncmp (str, "<!DOCTYPE", 9) == 0 && + context->balance == 0) + break; + } + } + while (advance_char (context)); + + if (context->iter == context->current_text_end) + { + /* The passthrough hasn't necessarily ended. Merge with + * partial chunk, leave state unchanged. + */ + add_to_partial (context, context->start, context->iter); + } + else + { + /* The passthrough has ended at the close angle. Combine + * it with the partial chunk if any. Call the passthrough + * callback. Note that the open/close angles are + * included in the text of the passthrough. + */ + advance_char (context); /* advance past close angle */ + add_to_partial (context, context->start, context->iter); + + if (context->flags & MARKUP_TREAT_CDATA_AS_TEXT && + strncmp (context->partial_chunk->buffer, "<![CDATA[", 9) == 0) + { + if (context->parser->text && + text_validate (context, + context->partial_chunk->buffer + 9, + context->partial_chunk->buflen - 12)) + (*context->parser->text) (context, + context->partial_chunk->buffer + 9, + context->partial_chunk->buflen - 12, + context->user_data); + } + else if (context->parser->passthrough && + text_validate (context, + context->partial_chunk->buffer, + context->partial_chunk->buflen)) + (*context->parser->passthrough) (context, + context->partial_chunk->buffer, + context->partial_chunk->buflen, + context->user_data); + + truncate_partial (context); + + context->state = STATE_AFTER_CLOSE_ANGLE; + context->start = context->iter; /* could begin text */ + } + break; + + case STATE_ERROR: + goto finished; + break; + + default: + abort (); + break; + } + } + + finished: + context->parsing = false; + + return context->state != STATE_ERROR; +} + +/* Signals to the parse context that all data has been fed into the + * parse context with markup_parse_context_parse. + * + * This function reports an error if the document isn't complete, + * for example if elements are still open. */ +bool +markup_parse_context_end_parse (markup_parse_context_ty *context) +{ + assert (context != NULL); + assert (!context->parsing); + assert (context->state != STATE_ERROR); + + if (context->partial_chunk != NULL) + { + markup_string_free (context->partial_chunk, true); + context->partial_chunk = NULL; + } + + if (context->document_empty) + { + emit_error (context, _("empty document")); + return false; + } + + context->parsing = true; + + switch (context->state) + { + case STATE_START: + /* Nothing to do */ + break; + + case STATE_AFTER_OPEN_ANGLE: + emit_error (context, + _("document ended unexpectedly just after '<'")); + break; + + case STATE_AFTER_CLOSE_ANGLE: + if (gl_list_size (context->tag_stack) > 0) + { + /* Error message the same as for INSIDE_TEXT */ + emit_error (context, + _("document ended unexpectedly with elements still open")); + } + break; + + case STATE_AFTER_ELISION_SLASH: + emit_error (context, _("document ended unexpectedly without '>'")); + break; + + case STATE_INSIDE_OPEN_TAG_NAME: + emit_error (context, + _("document ended unexpectedly inside an element name")); + break; + + case STATE_INSIDE_ATTRIBUTE_NAME: + case STATE_AFTER_ATTRIBUTE_NAME: + emit_error (context, + _("document ended unexpectedly inside an attribute name")); + break; + + case STATE_BETWEEN_ATTRIBUTES: + emit_error (context, + _("document ended unexpectedly inside an open tag")); + break; + + case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN: + emit_error (context, _("document ended unexpectedly after '='")); + break; + + case STATE_INSIDE_ATTRIBUTE_VALUE_SQ: + case STATE_INSIDE_ATTRIBUTE_VALUE_DQ: + emit_error (context, + _("document ended unexpectedly inside an attribute value")); + break; + + case STATE_INSIDE_TEXT: + assert (gl_list_size (context->tag_stack) > 0); + emit_error (context, + _("document ended unexpectedly with elements still open")); + break; + + case STATE_AFTER_CLOSE_TAG_SLASH: + case STATE_INSIDE_CLOSE_TAG_NAME: + case STATE_AFTER_CLOSE_TAG_NAME: + emit_error (context, + _("document ended unexpectedly inside the close tag")); + break; + + case STATE_INSIDE_PASSTHROUGH: + emit_error (context, + _("document ended unexpectedly inside a comment or " + "processing instruction")); + break; + + case STATE_ERROR: + default: + abort (); + break; + } + + context->parsing = false; + + return context->state != STATE_ERROR; +} + +const char * +markup_parse_context_get_error (markup_parse_context_ty *context) +{ + return context->error_text; +} diff --git a/gnulib-local/lib/markup.h b/gnulib-local/lib/markup.h new file mode 100644 index 0000000..61e5b0e --- /dev/null +++ b/gnulib-local/lib/markup.h @@ -0,0 +1,164 @@ +/* markup.h -- simple XML-like string parser + Copyright (C) 2015 Free Software Foundation, Inc. + + This file is not part of the GNU gettext program, but is used with + GNU gettext. + + This is a stripped down version of GLib's gmarkup.h. The original + copyright notice is as follows: + */ + +/* gmarkup.h - Simple XML-like string parser/writer + * + * Copyright 2000 Red Hat, Inc. + * + * GLib is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 3 of the + * License, or (at your option) any later version. + * + * GLib is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with GLib; see the file COPYING.LIB. If not, + * see <http://www.gnu.org/licenses/>. + */ + +#ifndef __MARKUP_H__ +#define __MARKUP_H__ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdbool.h> +#include <stddef.h> +#include <sys/types.h> + +/** + * markup_parse_flags_ty: + * @MARKUP_DO_NOT_USE_THIS_UNSUPPORTED_FLAG: flag you should not use + * @MARKUP_TREAT_CDATA_AS_TEXT: When this flag is set, CDATA marked + * sections are not passed literally to the @passthrough function of + * the parser. Instead, the content of the section (without the + * `<![CDATA[` and `]]>`) is + * passed to the @text function. This flag was added in GLib 2.12 + * @MARKUP_PREFIX_ERROR_POSITION: Normally errors caught by GMarkup + * itself have line/column information prefixed to them to let the + * caller know the location of the error. When this flag is set the + * location information is also prefixed to errors generated by the + * #GMarkupParser implementation functions + * @MARKUP_IGNORE_QUALIFIED: Ignore (don't report) qualified + * attributes and tags, along with their contents. A qualified + * attribute or tag is one that contains ':' in its name (ie: is in + * another namespace). Since: 2.40. + * + * Flags that affect the behaviour of the parser. + */ +typedef enum + { + MARKUP_DO_NOT_USE_THIS_UNSUPPORTED_FLAG = 1 << 0, + MARKUP_TREAT_CDATA_AS_TEXT = 1 << 1, + MARKUP_PREFIX_ERROR_POSITION = 1 << 2, + MARKUP_IGNORE_QUALIFIED = 1 << 3 + } markup_parse_flags_ty; + +/** + * markup_parse_context_ty: + * + * A parse context is used to parse a stream of bytes that + * you expect to contain marked-up text. + * + * See markup_parse_context_new(), #markup_parser_ty, and so + * on for more details. + */ +typedef struct _markup_parse_context_ty markup_parse_context_ty; +typedef struct _markup_parser_ty markup_parser_ty; + +/** + * markup_parser_ty: + * @start_element: Callback to invoke when the opening tag of an element + * is seen. The callback's @attribute_names and @attribute_values parameters + * are %NULL-terminated. + * @end_element: Callback to invoke when the closing tag of an element + * is seen. Note that this is also called for empty tags like + * `<empty/>`. + * @text: Callback to invoke when some text is seen (text is always + * inside an element). Note that the text of an element may be spread + * over multiple calls of this function. If the + * %MARKUP_TREAT_CDATA_AS_TEXT flag is set, this function is also + * called for the content of CDATA marked sections. + * @passthrough: Callback to invoke for comments, processing instructions + * and doctype declarations; if you're re-writing the parsed document, + * write the passthrough text back out in the same position. If the + * %MARKUP_TREAT_CDATA_AS_TEXT flag is not set, this function is also + * called for CDATA marked sections. + * @error: Callback to invoke when an error occurs. + * + * Any of the fields in #markup_parser_ty can be %NULL, in which case they + * will be ignored. Except for the @error function, any of these callbacks + * can set an error; in particular the %MARKUP_ERROR_UNKNOWN_ELEMENT, + * %MARKUP_ERROR_UNKNOWN_ATTRIBUTE, and %MARKUP_ERROR_INVALID_CONTENT + * errors are intended to be set from these callbacks. If you set an error + * from a callback, markup_parse_context_parse() will report that error + * back to its caller. + */ +struct _markup_parser_ty +{ + /* Called for open tags <foo bar="baz"> */ + bool (*start_element) (markup_parse_context_ty *context, + const char *element_name, + const char **attribute_names, + const char **attribute_values, + void *user_data); + + /* Called for close tags </foo> */ + bool (*end_element) (markup_parse_context_ty *context, + const char *element_name, + void *user_data); + + /* Called for character data */ + /* text is not nul-terminated */ + bool (*text) (markup_parse_context_ty *context, + const char *text, + size_t text_len, + void *user_data); + + /* Called for strings that should be re-saved verbatim in this same + * position, but are not otherwise interpretable. At the moment + * this includes comments and processing instructions. + */ + /* text is not nul-terminated. */ + bool (*passthrough) (markup_parse_context_ty *context, + const char *passthrough_text, + size_t text_len, + void *user_data); + + /* Called on error, including one set by other + * methods in the vtable. The GError should not be freed. + */ + void (*error) (markup_parse_context_ty *context, + const char *error_text, + void *user_data); +}; + +extern markup_parse_context_ty * + markup_parse_context_new (const markup_parser_ty *parser, + markup_parse_flags_ty flags, + void *user_data); +extern void markup_parse_context_free (markup_parse_context_ty *context); +extern bool markup_parse_context_parse (markup_parse_context_ty *context, + const char *text, + ssize_t text_len); +extern bool markup_parse_context_end_parse (markup_parse_context_ty *context); +extern const char * + markup_parse_context_get_error (markup_parse_context_ty *context); + +#ifdef __cplusplus +} +#endif + +#endif /* __MARKUP_H__ */ diff --git a/gnulib-local/modules/markup b/gnulib-local/modules/markup new file mode 100644 index 0000000..8d969d4 --- /dev/null +++ b/gnulib-local/modules/markup @@ -0,0 +1,31 @@ +Description: +Simple XML-like parser + +Files: +lib/markup.h +lib/markup.c + +Depends-on: +c-ctype +linked-list +unistr/u8-mbtouc +unistr/u8-next +unictype/ctype-alpha +xalloc +xlist +xvasprintf + +configure.ac: + +Makefile.am: +lib_SOURCES += markup.h markup.c + +Include: +"markup.h" + +License: +LGPL + +Maintainer: +Daiki Ueno + |